- https://aspx.sc.chinaz.com/query.aspx?keyword=%E5%85%8D%E8%B4%B9&issale=&classID=864&page=1
# ● 爬取要求:
# ○ 1、翻页获取到页面的源码
# ○ 2、用xpath解析数据,获取到页面所有模板名字和下载链接
# ○ 3、把数据保存到csv
import requests
import csv
from lxml import etree
data_list = []
headers = {
‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36’
}
for n in range(1,7):
url = f’https://aspx.sc.chinaz.com/query.aspx?keyword=%E5%85%8D%E8%B4%B9&issale=&classID=864&page={n}‘
response = requests.get(url, headers=headers)
html = etree.HTML(response.content)
div_tag_list = html.xpath(‘//div[@id=”main”]/div[1]/div’)
for div_tag in div_tag_list:
item = {}
url2 = “https:” + div_tag.xpath(‘./a[1]/@href’)[0]
# print(url2)
res = requests.get(url2,headers=headers)
html2 = etree.HTML(res.content)
item[‘url’] = html2.xpath(‘//a[text()=”福建电信下载”]/@href’)[0]
item[‘name’] = div_tag.xpath(‘./p[1]/a[1]/text()’)[0].replace(‘免费’,’’).replace(“下载”, ‘’)
print(item)
data_list.append(item)">● 目标网站:https://aspx.sc.chinaz.com/query.aspx?keyword=%E5%85%8D%E8%B4%B9&issale=&classID=864&page=1
# ● 爬取要求:
# ○ 1、翻页获取到页面的源码
# ○ 2、用xpath解析数据,获取到页面所有模板名字和下载链接
# ○ 3、把数据保存到csv
import requests
import csv
from lxml import etree
data_list = []
headers = {
‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36’
}
for n in range(1,7):
url = f’https://aspx.sc.chinaz.com/query.aspx?keyword=%E5%85%8D%E8%B4%B9&issale=&classID=864&page={n}‘
response = requests.get(url, headers=headers)
html = etree.HTML(response.content)
div_tag_list = html.xpath(‘//div[@id=”main”]/div[1]/div’)
for div_tag in div_tag_list:
item = {}
url2 = “https:” + div_tag.xpath(‘./a[1]/@href’)[0]
# print(url2)
res = requests.get(url2,headers=headers)
html2 = etree.HTML(res.content)
item[‘url’] = html2.xpath(‘//a[text()=”福建电信下载”]/@href’)[0]
item[‘name’] = div_tag.xpath(‘./p[1]/a[1]/text()’)[0].replace(‘免费’,’’).replace(“下载”, ‘’)
print(item)
data_list.append(item)
● 目标网站:https://aspx.sc.chinaz.com/query.aspx?keyword=%E5%85%8D%E8%B4%B9&issale=&classID=864&page=1
# ● 爬取要求:
# ○ 1、翻页获取到页面的源码
# ○ 2、用xpath解析数据,获取到页面所有模板名字和下载链接
# ○ 3、把数据保存到csv
import requests
import csv
from lxml import etree
data_list = []
headers = {
‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36’
}
for n in range(1,7):
url = f’https://aspx.sc.chinaz.com/query.aspx?keyword=%E5%85%8D%E8%B4%B9&issale=&classID=864&page={n}‘
response = requests.get(url, headers=headers)
html = etree.HTML(response.content)
div_tag_list = html.xpath(‘//div[@id=”main”]/div[1]/div’)
for div_tag in div_tag_list:
item = {}
url2 = “https:” + div_tag.xpath(‘./a[1]/@href’)[0]
# print(url2)
res = requests.get(url2,headers=headers)
html2 = etree.HTML(res.content)
item[‘url’] = html2.xpath(‘//a[text()=”福建电信下载”]/@href’)[0]
item[‘name’] = div_tag.xpath(‘./p[1]/a[1]/text()’)[0].replace(‘免费’,’’).replace(“下载”, ‘’)
print(item)
data_list.append(item)
with open(‘mob.csv’, ‘w’, encoding=”utf-8-sig”, newline=’’) as f:
wt = csv.DictWriter(f,fieldnames=[‘url’,’name’])
wt.writeheader()
wt.writerows(data_list)