环境搭建
Chrome浏览器 + chromedriver + selenium
选择浏览器版本
谷歌浏览器地址 https://www.chromedownloads.net/
选择驱动
- 驱动地址
 - 在浏览器地址栏输入:chrome://version
 
切记不要点击帮助下面的“关于Google Chrome(G)”,否则就会自动升级
浏览器不要选择较新的版本,驱动可能还没更新或不稳定。
Selenium基础用法
from selenium import webdriverfrom lxml import etreefrom time import sleep#实例化一个浏览器对象(传入浏览器的驱动成)bro = webdriver.Chrome(executable_path='./chromedriver')#让浏览器发起一个指定url对应请求bro.get('http://125.35.6.84:81/xk/')#page_source获取浏览器当前页面的页面源码数据page_text = bro.page_source#解析企业名称tree = etree.HTML(page_text)li_list = tree.xpath('//ul[@id="gzlist"]/li')for li in li_list:name = li.xpath('./dl/@title')[0]print(name)sleep(5)bro.quit()
Selenium自动化
from selenium import webdriverfrom time import sleepbro = webdriver.Chrome(executable_path='./chromedriver')bro.get('https://www.taobao.com/')#标签定位search_input = bro.find_element_by_id('q') # 比较推荐find_element_by_xpath#标签交互search_input.send_keys('Iphone')#执行一组js程序bro.execute_script('window.scrollTo(0,document.body.scrollHeight)')sleep(2)#点击搜索按钮btn = bro.find_element_by_css_selector('.btn-search')btn.click()bro.get('https://www.baidu.com')sleep(2)#回退bro.back()sleep(2)#前进bro.forward()sleep(5)bro.quit()
# 动作链和iframe处理from selenium import webdriverfrom time import sleep#导入动作链对应的类from selenium.webdriver import ActionChainsbro = webdriver.Chrome(executable_path='./chromedriver')bro.get('https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')#如果定位的标签是存在于iframe标签之中的则必须通过如下操作在进行标签定位bro.switch_to.frame('iframeResult')#切换浏览器标签定位的作用域div = bro.find_element_by_id('draggable')#动作链action = ActionChains(bro)#点击长按指定的标签action.click_and_hold(div)for i in range(5):#perform()立即执行动作链操作#move_by_offset(x,y):x水平方向 y竖直方向action.move_by_offset(17,0).perform()sleep(0.5)#释放动作链action.release()bro.quit()
规避检测
from selenium import webdriverfrom time import sleep#实现无可视化界面from selenium.webdriver.chrome.options import Options#实现规避检测from selenium.webdriver import ChromeOptions#实现无可视化界面的操作chrome_options = Options()chrome_options.add_argument('--headless')chrome_options.add_argument('--disable-gpu')#实现规避检测option = ChromeOptions()option.add_experimental_option('excludeSwitches', ['enable-automation'])#如何实现让selenium规避被检测到的风险bro = webdriver.Chrome(executable_path='./chromedriver',chrome_options=chrome_options,options=option)#无可视化界面(无头浏览器) phantomJsbro.get('https://www.baidu.com')print(bro.page_source)sleep(2)bro.quit()
模拟登录qq空间
from selenium import webdriverfrom time import sleepbro = webdriver.Chrome(executable_path='./chromedriver')bro.get('https://qzone.qq.com/')bro.switch_to.frame('login_frame')a_tag = bro.find_element_by_id("switcher_plogin")a_tag.click()userName_tag = bro.find_element_by_id('u')password_tag = bro.find_element_by_id('p')sleep(1)userName_tag.send_keys('598779794')sleep(1)password_tag.send_keys('123456789')sleep(1)btn = bro.find_element_by_id('login_button')btn.click()sleep(3)bro.quit()
