- 目标网站:https://aspx.sc.chinaz.com/query.aspx?keyword=%E5%85%8D%E8%B4%B9&issale=&classID=864&page=1
- 爬取要求:
- 1、翻页获取到页面的源码
- 2、用xpath解析数据,获取到页面所有模板名字和下载链接
- 3、把数据保存到csv
import requests
import csv
from lxml import etree
age = input(‘请输入页码:’)
url = f’https://aspx.sc.chinaz.com/query.aspx?keyword=%E5%85%8D%E8%B4%B9&issale=&classID=864&page={age}‘
print(url)
headers = {
‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ‘
‘Chrome/95.0.4638.69 Safari/537.36 ‘
}
response = requests.get(url,headers)
etree = etree.HTML(response.text)
title_list = etree.xpath(‘//a[@class=”title_wl”]/text()’)
href_list = etree.xpath(‘//a[@class=”title_wl”]/@href’)
data = []
for title, href in zip(title_list,href_list):
# href = title.xpath(‘./@href’)
# print(title,href)
data.append({
‘名称’: title,
‘链接’: ‘https:’+href
})
with open(‘站长.csv’, ‘w’) as f:
writer = csv.DictWriter(f, fieldnames=[‘名称’, ‘链接’])
writer.writeheader()
writer.writerows(data)