1. import requests
    2. import csv
    3. from lxml import etree
    4. headers = {
    5. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36'
    6. }
    7. lis = []
    8. for i in range(1, 6):
    9. url = f'https://cs.lianjia.com/ershoufang/pg{i}/'
    10. response = requests.get(url=url, headers=headers).text
    11. html_element = etree.HTML(response)
    12. div_tags = html_element.xpath('//div[@class="info clear"]')
    13. for div_tag in div_tags:
    14. dic = {}
    15. title = div_tag.xpath('./div[@class="title"]/a/text()')[0]
    16. location = div_tag.xpath('./div[@class="flood"]/div//text()')[0] + div_tag.xpath('./div[@class="flood"]/div//text()')[1] + div_tag.xpath('./div[@class="flood"]/div//text()')[2]
    17. address = div_tag.xpath('./div[@class="address"]/div/text()')[0]
    18. priceInfo = div_tag.xpath('./div[@class="priceInfo"]/div//text()')[1] + div_tag.xpath('./div[@class="priceInfo"]/div//text()')[2] + ' ' + div_tag.xpath('./div[@class="priceInfo"]/div//text()')[3]
    19. dic['title'] = title
    20. dic['location'] = location
    21. dic['address'] = address
    22. dic['priceInfo'] = priceInfo
    23. lis.append(dic)
    24. with open('房子.csv', 'w', encoding='utf-8', newline='') as f:
    25. wt = csv.DictWriter(f, fieldnames=['title', 'location', 'address', 'priceInfo'])
    26. wt.writeheader()
    27. wt.writerows(lis)