1. """
    2. 目标网站:
    3. https://bj.58.com/ershoufang/
    4. 需求:
    5. xpath抓取当前页面标题
    6. 模块:
    7. requests, lxml
    8. """
    9. import requests
    10. from lxml import etree
    11. url = 'https://bj.58.com/ershoufang/'
    12. headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'}
    13. response = requests.get(url, headers=headers).content.decode()
    14. title_list = etree.HTML(response).xpath('//div[@class="property-content-title"]/h3/text()')
    15. f = open('58北京2手房标题.txt', 'w', encoding='utf-8')
    16. for title in title_list:
    17. f.write(f'{title}\n')
    18. print(title, '下载完成')
    19. print(f'一共获取到{len(title_list)}个标题')
    20. f.close()