作业--浅安老师 - 第三次作业 - 《爬虫知识库》

import requests
import csv
from lxml import etree
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36'
}
lis = []
for i in range(1, 6):
    url = f'https://cs.lianjia.com/ershoufang/pg{i}/'
    response = requests.get(url=url, headers=headers).text
    html_element = etree.HTML(response)
    div_tags = html_element.xpath('//div[@class="info clear"]')
    for div_tag in div_tags:
        dic = {}
        title = div_tag.xpath('./div[@class="title"]/a/text()')[0]
        location = div_tag.xpath('./div[@class="flood"]/div//text()')[0] + div_tag.xpath('./div[@class="flood"]/div//text()')[1] + div_tag.xpath('./div[@class="flood"]/div//text()')[2]
        address = div_tag.xpath('./div[@class="address"]/div/text()')[0]
        priceInfo = div_tag.xpath('./div[@class="priceInfo"]/div//text()')[1] + div_tag.xpath('./div[@class="priceInfo"]/div//text()')[2] + ' ' + div_tag.xpath('./div[@class="priceInfo"]/div//text()')[3]
        dic['title'] = title
        dic['location'] = location
        dic['address'] = address
        dic['priceInfo'] = priceInfo
        lis.append(dic)
with open('房子.csv', 'w', encoding='utf-8', newline='') as f:
    wt = csv.DictWriter(f, fieldnames=['title', 'location', 'address', 'priceInfo'])
    wt.writeheader()
    wt.writerows(lis)