1. import requests
    2. import csv
    3. from lxml import etree
    4. def read_page(url, headers):
    5. res = requests.get(url, headers=headers)
    6. html = res.text
    7. html_ement = etree.HTML(html)
    8. infos = html_ement.xpath('//div[@class="info clear"]')
    9. info = []
    10. for shuju in infos:
    11. fangziDict = {}
    12. name = shuju.xpath('.//a/text()')[0]
    13. dizhi1 = shuju.xpath('.//a/text()')[1]
    14. dizhi2 = shuju.xpath('.//a/text()')[2]
    15. dizhi = dizhi1 + '-' + dizhi2
    16. sheji1 = shuju.xpath('.//div/text()')[2]
    17. sheji2 = shuju.xpath('.//div/text()')[3]
    18. sheji = sheji1 + sheji2
    19. jiage1 = shuju.xpath('.//span/text()')[-2]
    20. jiage2 = shuju.xpath('.//span/text()')[-1]
    21. jiage = jiage1 + '万' + ' ' + jiage2
    22. fangziDict['房名'] = name
    23. fangziDict['地址'] = dizhi
    24. fangziDict['设计样式'] = sheji
    25. fangziDict['价格'] = jiage
    26. info.append(fangziDict)
    27. # print(info)
    28. return info
    29. def bao_cun(info):
    30. with open('ershoufang.csv', 'w', encoding='utf-8-sig', newline='') as f:
    31. writer = csv.DictWriter(f, fieldnames=['房名', '地址', '设计样式', '价格'])
    32. writer.writeheader()
    33. writer.writerows(info)
    34. url = 'https://cs.lianjia.com/ershoufang/'
    35. headers = {
    36. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36'
    37. }
    38. info = read_page(url, headers)
    39. bao_cun(info)