7.1 - 《第十三期爬虫》

import requests,time,csv
from lxml import etree

url = ‘https://aspx.sc.chinaz.com/query.aspx?keyword=%E5%85%8D%E8%B4%B9&issale=&classID=864&page=1‘

headers = {

'User-Agent': 'https://aspx.sc.chinaz.com/query.aspx?keyword=%E5%85%8D%E8%B4%B9&issale=&classID=864&page=1',<br />}

res = requests.get(url=url, headers=headers).content.decode(‘utf-8’)
# print(res)
html = etree.HTML(res)
div = html.xpath(‘/html/body/div[@class=”bggray clearfix”]/div[@class=”sc_warp mt20”]/div[@id=”main”]/div[@id=”container”]’)
new_url_list = []
for data in div:
new_url = data.xpath(‘//div[@class=”box col3 ws_block”]/p/a/@href’)
new_url_list.append(new_url)
# print(new_url_list[0])
data_list = []
for url_1 in new_url_list[0]:
time.sleep(0.5)
res_new = requests.get(url =f’https:{url_1}#down’,headers=headers).content.decode(‘utf-8’)
# print(res)
download_link = etree.HTML(res_new).xpath(‘/html/body/div[@class=”bggray clearfix”]/div[@class=”sc_warp clearfix”]/div[@class=”ppt_left fl”]/div[@class=”bgwhite”]/div[@class=”down_wrap”]/div[@class=”clearfix mt20 downlist”]/ul[@class=”clearfix”]/li[1]/a/@href’)
name = etree.HTML(res_new).xpath(‘/html/body/div[@class=”bggray clearfix”]/div[@class=”sc_warp clearfix”]/div[@class=”ppt_left fl”]/div[@class=”bgwhite”]/div[@class=”ppt_tit clearfix”]/h1//text()’)
data_list.append({‘名字’:str(name).replace(‘免费下载’,’’),’下载地址’:download_link})

with open(‘nn.csv’,’a’,newline=’’) as f:
header = (‘名字’, ‘下载地址’)
writer = csv.DictWriter(f,header)
writer.writeheader()
writer.writerows(data_list)
print(‘保存成功’)