"""
目标网站:
https://bj.58.com/ershoufang/
需求:
xpath抓取当前页面标题
模块:
requests, lxml
"""
import requests
from lxml import etree
url = 'https://bj.58.com/ershoufang/'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'}
response = requests.get(url, headers=headers).content.decode()
title_list = etree.HTML(response).xpath('//div[@class="property-content-title"]/h3/text()')
f = open('58北京2手房标题.txt', 'w', encoding='utf-8')
for title in title_list:
f.write(f'{title}\n')
print(title, '下载完成')
print(f'一共获取到{len(title_list)}个标题')
f.close()