import requests
import csv
from lxml import etree
def read_page(url, headers):
res = requests.get(url, headers=headers)
html = res.text
html_ement = etree.HTML(html)
infos = html_ement.xpath('//div[@class="info clear"]')
info = []
for shuju in infos:
fangziDict = {}
name = shuju.xpath('.//a/text()')[0]
dizhi1 = shuju.xpath('.//a/text()')[1]
dizhi2 = shuju.xpath('.//a/text()')[2]
dizhi = dizhi1 + '-' + dizhi2
sheji1 = shuju.xpath('.//div/text()')[2]
sheji2 = shuju.xpath('.//div/text()')[3]
sheji = sheji1 + sheji2
jiage1 = shuju.xpath('.//span/text()')[-2]
jiage2 = shuju.xpath('.//span/text()')[-1]
jiage = jiage1 + '万' + ' ' + jiage2
fangziDict['房名'] = name
fangziDict['地址'] = dizhi
fangziDict['设计样式'] = sheji
fangziDict['价格'] = jiage
info.append(fangziDict)
# print(info)
return info
def bao_cun(info):
with open('ershoufang.csv', 'w', encoding='utf-8-sig', newline='') as f:
writer = csv.DictWriter(f, fieldnames=['房名', '地址', '设计样式', '价格'])
writer.writeheader()
writer.writerows(info)
url = 'https://cs.lianjia.com/ershoufang/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36'
}
info = read_page(url, headers)
bao_cun(info)