首先打开搜索界面,然后打开检查
    image.png
    res=browser.find_elements_by_class_name(‘result’)抓取页面的内容
    image.png
    x=browser.find_element_by_id(‘page’)
    image.png
    然后抓取page里面的a标签,links=x.find_elements_by_tag_name(‘a’)
    然后for循环,赋值给a for a in links:
    if page_num in a.get_attribute(‘href’):
    image.png

    百度采集

    1. from selenium import webdriver
    2. from selenium.webdriver.common.keys import Keys
    3. from time import sleep
    4. import requests
    5. headers = {
    6. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    7. 'Accept-Encoding': 'gzip, deflate, compress',
    8. 'Accept-Language': 'en-us;q=0.5,en;q=0.3',
    9. 'Cache-Control': 'max-age=0',
    10. 'Connection': 'keep-alive',
    11. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36'
    12. }
    13. def baidu_url():
    14. browser=webdriver.Chrome()
    15. browser.get('https://www.baidu.com')
    16. browser.find_element_by_id('kw').send_keys('inurl:asp?id=')
    17. browser.find_element_by_id('kw').send_keys(Keys.ENTER)
    18. pn=0
    19. for p in range(10,1000,10):
    20. page_num='&pn='+str(p)
    21. sleep(5)
    22. res=browser.find_elements_by_class_name('result')
    23. zhuqu(res)
    24. x=browser.find_element_by_id('page')
    25. links=x.find_elements_by_tag_name('a')
    26. for a in links:
    27. if page_num in a.get_attribute('href'):
    28. a.click()
    29. break
    30. print('当前抓取第%s 页内容' % int(p / 10+1))
    31. def zhuqu(res):
    32. for re in res:
    33. f=open('d:/baidu_url.txt','a')
    34. t=re.find_element_by_tag_name('a').text
    35. zu=re.find_element_by_tag_name('a').get_attribute('href')
    36. baiurl=requests.get(zu,headers=headers,allow_redirects=False)
    37. true_url=baiurl.headers['Location']
    38. print(t)
    39. print(true_url)
    40. f.write(true_url+'\n')
    41. baidu_url()

    模拟登录

    1. from selenium import webdriver
    2. from selenium.webdriver.common.keys import Keys
    3. from time import sleep
    4. br=webdriver.Chrome()
    5. br.get('https://localhost:3443/#/login')
    6. input=br.find_element_by_id('txtEmailAddress').send_keys('2920015236@qq.com')
    7. input=br.find_element_by_id('txtPassword').send_keys('x5201314..')
    8. dj=br.find_element_by_id('btnLogin').click()