import requestsimport csvfrom lxml import etreedef read_page(url, headers): res = requests.get(url, headers=headers) html = res.text html_ement = etree.HTML(html) infos = html_ement.xpath('//div[@class="info clear"]') info = [] for shuju in infos: fangziDict = {} name = shuju.xpath('.//a/text()')[0] dizhi1 = shuju.xpath('.//a/text()')[1] dizhi2 = shuju.xpath('.//a/text()')[2] dizhi = dizhi1 + '-' + dizhi2 sheji1 = shuju.xpath('.//div/text()')[2] sheji2 = shuju.xpath('.//div/text()')[3] sheji = sheji1 + sheji2 jiage1 = shuju.xpath('.//span/text()')[-2] jiage2 = shuju.xpath('.//span/text()')[-1] jiage = jiage1 + '万' + ' ' + jiage2 fangziDict['房名'] = name fangziDict['地址'] = dizhi fangziDict['设计样式'] = sheji fangziDict['价格'] = jiage info.append(fangziDict) # print(info) return infodef bao_cun(info): with open('ershoufang.csv', 'w', encoding='utf-8-sig', newline='') as f: writer = csv.DictWriter(f, fieldnames=['房名', '地址', '设计样式', '价格']) writer.writeheader() writer.writerows(info)url = 'https://cs.lianjia.com/ershoufang/'headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36'}info = read_page(url, headers)bao_cun(info)