1. import requests
    2. from lxml import etree
    3. import csv
    4. def read_page(url, headers):
    5. data_list = []
    6. res1 = requests.get(url, headers=headers)
    7. html1 = etree.HTML(res1.text)
    8. div_tags = html1.xpath('//div[@id="main"]/div/div')
    9. for div_tag in div_tags:
    10. item = {}
    11. url2 = "https:"+div_tag.xpath('./a/@href')[0]
    12. res2 = requests.get(url2, headers=headers)
    13. html2 = etree.HTML(res2.text)
    14. item['url'] = html2.xpath('//ul[@class="clearfix"]/li[1]/a/@href')[0]
    15. item['name'] = div_tag.xpath('./p/a/text()')[0].replace('免费下载', '')
    16. data_list.append(item)
    17. return data_list
    18. def write(dig):
    19. with open('jianlimuban.csv', 'w', encoding='utf-8-sig', newline='') as f:
    20. wt = csv.DictWriter(f, fieldnames=['url', 'name'])
    21. wt.writeheader()
    22. wt.writerows(data_list)
    23. headers = {
    24. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36'
    25. }
    26. for i in range(1, 6):
    27. url = f'https://aspx.sc.chinaz.com/query.aspx?keyword=%E5%85%8D%E8%B4%B9&issale=&classID=864&page={i}'
    28. data_list = read_page(url, headers)
    29. write(data_list)