start查
Array ( [0] => XSDocument Object ( [_data:XSDocument:private] => Array ( [id] => python2556_1 [title] => 爬取绝招-n [content] => 爬取的 [255] => ) [_terms:XSDocument:private] => [_texts:XSDocument:private] => [_charset:XSDocument:private] => UTF-8 [_meta:XSDocument:private] => Array ( [docid] => 172 [rank] => 1 [ccount] => 0 [percent] => 100 [weight] => 3.7128221988678 ) ) [1] => XSDocument Object ( [_data:XSDocument:private] => Array ( [id] => python2562 [title] => 爬取网页中动态加载的数据 [lang] => python [database_name] => database_name [doc_type] => 1 [database_type] => 1 [doc_id] => 2562 [255] => ) [_terms:XSDocument:private] => [_texts:XSDocument:private] => [_charset:XSDocument:private] => UTF-8 [_meta:XSDocument:private] => Array ( [docid] => 4 [rank] => 2 [ccount] => 0 [percent] => 78 [weight] => 2.9172174930573 ) ) [2] => XSDocument Object ( [_data:XSDocument:private] => Array ( [id] => python2560 [title] => 实现定时爬取网页内容 [lang] => python [database_name] => database_name [doc_type] => 1 [database_type] => 1 [doc_id] => 2560 [255] => ) [_terms:XSDocument:private] => [_texts:XSDocument:private] => [_charset:XSDocument:private] => UTF-8 [_meta:XSDocument:private] => Array ( [docid] => 6 [rank] => 3 [ccount] => 0 [percent] => 78 [weight] => 2.9172174930573 ) ) [3] => XSDocument Object ( [_data:XSDocument:private] => Array ( [id] => python2558 [title] => 爬取北、上、广租房信息 [lang] => python [database_name] => database_name [doc_type] => 1 [database_type] => 1 [doc_id] => 2558 [255] => ) [_terms:XSDocument:private] => [_texts:XSDocument:private] => [_charset:XSDocument:private] => UTF-8 [_meta:XSDocument:private] => Array ( [docid] => 8 [rank] => 4 [ccount] => 0 [percent] => 78 [weight] => 2.9172174930573 ) ) [4] => XSDocument Object ( [_data:XSDocument:private] => Array ( [id] => python2557 [title] => 使用多进程爬取在线课程MySQL版 [lang] => python [database_name] => database_name [doc_type] => 1 [database_type] => 1 [doc_id] => 2557 [255] => ) [_terms:XSDocument:private] => [_texts:XSDocument:private] => [_charset:XSDocument:private] => UTF-8 [_meta:XSDocument:private] => Array ( [docid] => 9 [rank] => 5 [ccount] => 0 [percent] => 78 [weight] => 2.9172174930573 ) ) [5] => XSDocument Object ( [_data:XSDocument:private] => Array ( [id] => python2556 [title] => 爬取在线课程MySQL版 [lang] => python [database_name] => database_name [doc_type] => 1 [database_type] => 1 [doc_id] => 2556 [255] => ) [_terms:XSDocument:private] => [_texts:XSDocument:private] => [_charset:XSDocument:private] => UTF-8 [_meta:XSDocument:private] => Array ( [docid] => 10 [rank] => 6 [ccount] => 0 [percent] => 78 [weight] => 2.9172174930573 ) ) [6] => XSDocument Object ( [_data:XSDocument:private] => Array ( [id] => python2555 [title] => 爬取在线课程Excel版 [content] => 11122 [lang] => python [database_name] => database_name [doc_type] => 1 [database_type] => 1 [doc_id] => 2555 [255] => ) [_terms:XSDocument:private] => [_texts:XSDocument:private] => [_charset:XSDocument:private] => UTF-8 [_meta:XSDocument:private] => Array ( [docid] => 11 [rank] => 7 [ccount] => 0 [percent] => 78 [weight] => 2.9172174930573 ) ) [7] => XSDocument Object ( [_data:XSDocument:private] => Array ( [id] => python4664 [title] => 批量爬取B站小视频 [lang] => python [database_name] => database_name [doc_type] => 1 [database_type] => 1 [doc_id] => 4664 [255] => ) [_terms:XSDocument:private] => [_texts:XSDocument:private] => [_charset:XSDocument:private] => UTF-8 [_meta:XSDocument:private] => Array ( [docid] => 111 [rank] => 8 [ccount] => 0 [percent] => 78 [weight] => 2.9172174930573 ) ) [8] => XSDocument Object ( [_data:XSDocument:private] => Array ( [id] => python4665 [title] => 爬取B站小视频之随机生成浏览器的头部信息 [lang] => python [database_name] => database_name [doc_type] => 1 [database_type] => 1 [doc_id] => 4665 [255] => ) [_terms:XSDocument:private] => [_texts:XSDocument:private] => [_charset:XSDocument:private] => UTF-8 [_meta:XSDocument:private] => Array ( [docid] => 112 [rank] => 9 [ccount] => 0 [percent] => 78 [weight] => 2.9172174930573 ) ) [9] => XSDocument Object ( [_data:XSDocument:private] => Array ( [id] => python4666 [title] => 爬取B站小视频之获取要下载视频的大小 [lang] => python [database_name] => database_name [doc_type] => 1 [database_type] => 1 [doc_id] => 4666 [255] => ) [_terms:XSDocument:private] => [_texts:XSDocument:private] => [_charset:XSDocument:private] => UTF-8 [_meta:XSDocument:private] => Array ( [docid] => 113 [rank] => 10 [ccount] => 0 [percent] => 78 [weight] => 2.9172174930573 ) ) [10] => XSDocument Object ( [_data:XSDocument:private] => Array ( [id] => python4667 [title] => 爬取B站小视频之实时打印文件下载进度 [lang] => python [database_name] => database_name [doc_type] => 1 [database_type] => 1 [doc_id] => 4667 [255] => ) [_terms:XSDocument:private] => [_texts:XSDocument:private] => [_charset:XSDocument:private] => UTF-8 [_meta:XSDocument:private] => Array ( [docid] => 114 [rank] => 11 [ccount] => 0 [percent] => 78 [weight] => 2.9172174930573 ) ) [11] => XSDocument Object ( [_data:XSDocument:private] => Array ( [id] => python2565 [title] => 通过selenium模块实现自动切换浏览器页面 [content] =>[lang] => python [database_name] => database_name [doc_type] => 1 [database_type] => 1 [doc_id] => 2565 [255] => ) [_terms:XSDocument:private] => [_texts:XSDocument:private] => [_charset:XSDocument:private] => UTF-8 [_meta:XSDocument:private] => Array ( [docid] => 1 [rank] => 12 [ccount] => 0 [percent] => 5 [weight] => 0.21397353708744 ) ) )
实例描述
在使用selenium框架爬取网页信息时1,偶尔需要实现爬取多个页面信息,此时如果没有进行浏览器页面的切换将无法爬取对应网页的数据内容如图1所示,那么该如何通过selenium框架实现自动切换浏览器页面呢?本实例将实现通过selenium模块实现自动切换浏览器页面。
![]()
图1 浏览器页面切换
代码实现
通过selenium框架实现自动切换浏览器页面时,首先需要区分每个页面的window_handle,也就是每个页面的浏览器窗口,确定浏览器页面窗口以后进行页面窗口的切换并将执行权利传递给切换后的页面窗口中,此时切换的窗口才会拥有执行权(当前窗口的控制权)。示例代码如下:
from selenium import webdriver # 导入浏览器驱动模块
import time # 导入时间模块
class Selenium(): # 创建selenium类
def __init__(self):
# 加载谷歌浏览器驱动
self.driver = webdriver.Chrome(
executable_path='G:/Python/Python37/chromedriver')
self.driver.maximize_window() # 浏览器窗口最大化
self.driver.get('https://www.taobao.com/') # 打开淘宝官网
# 切换页面
def toggle_pages(self,url):
time.sleep(3) # 等待2秒
js = 'window.open("{url}")'.format(url=url) # 通过执行js,开启一个天猫的窗口
self.driver.execute_script(js)
# 当前窗口的handle,也就是淘宝窗口
taobao_handle = self.driver.current_window_handle
# 获取所有窗口句柄集合(列表类型)
handles = self.driver.window_handles
# 获取天猫窗口
new_handle = None
for handle in handles:
if handle != taobao_handle:
new_handle = handle
time.sleep(3)
# 切换淘宝窗口,并传递执行权力
self.driver.switch_to.window(taobao_handle)
time.sleep(3)
self.driver.close() # 关闭淘宝窗口
# 切换天猫窗口,并传递执行权力
self.driver.switch_to.window(new_handle)
time.sleep(3)
self.driver.close() # 关闭天猫窗口
if __name__ == '__main__':
selenium = Selenium() # 创建Selenium对象
selenium.toggle_pages('https://www.tmall.com/') # 切换天猫官网
运行代码,首先谷歌浏览器将自动打开“淘宝”网页,然后再打开“天猫”网页,再执行从“天猫”网页自动切换至“淘宝”网页并关闭“淘宝”网页的操作。如图2所示。
![]()
图2 自动关闭“淘宝”网页保留“天猫”网页
说明:“淘宝”网页自动关闭后,“天猫”网页将自动关闭。