https://www.cnblogs.com/
https://www.cnblogs.com/#p2
https://www.cnblogs.com/#p3
‘’’
# 但发现data里有换页的数据则使用表单数据换行
# {
# “CategoryType”: “SiteHome”,
# “ParentCategoryId”: 0,
# “CategoryId”: 808,
# “PageIndex”: 3,
# “TotalPostCount”: 4000,
# “ItemListActionName”: “AggSitePostList”
# }">分析url
‘’’
https://www.cnblogs.com/
https://www.cnblogs.com/#p2
https://www.cnblogs.com/#p3
‘’’
# 但发现data里有换页的数据则使用表单数据换行
# {
# “CategoryType”: “SiteHome”,
# “ParentCategoryId”: 0,
# “CategoryId”: 808,
# “PageIndex”: 3,
# “TotalPostCount”: 4000,
# “ItemListActionName”: “AggSitePostList”
# }

第一题：
import requests
import json

headers = {
‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ‘
‘Chrome/96.0.4664.110 Safari/537.36 ‘
}

key = input(“你要翻译的单词：”)

data = {
‘from’: ‘auto’,
‘to’: ‘zh-CHS’,
‘client’: ‘web’,
‘text’: key,
‘uuid’: ‘e63fd041-d63f-4154-acff-28b2cae23bfc’,
‘pid’: ‘sogou-dict-vr’,
‘addSugg’: ‘on’
}

url = ‘https://fanyi.sogou.com/reventondc/suggV3‘

res = requests.post(url, headers=headers, data=data)

html = res.content.decode(‘utf-8’)

di = json.loads(html)

print(di[‘sugg’][0][‘v’])

第二题：
import requests

#p3
‘’’
# 但发现data里有换页的数据则使用表单数据换行
# {
# “CategoryType”: “SiteHome”,
# “ParentCategoryId”: 0,
# “CategoryId”: 808,
# “PageIndex”: 3,
# “TotalPostCount”: 4000,
# “ItemListActionName”: “AggSitePostList”
# }

class Spider:
def init(self, url, start, end):
self.url = url
self.start = start
self.end = end

def read_url(self, url, headers, data):<br />        res = requests.post(url, headers=headers,data=data)<br />        html = res.content.decode('utf-8')<br />        return html
def write_html(self, html, n):<br />        filename = '第' + str(n) + '页' + '.html'<br />        with open(filename, 'w', encoding='utf-8') as f:<br />            f.write(html)<br />            print(f'第{n}页写入成功！')
def main(self):<br />        headers = {<br />            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '<br />                          'Chrome/96.0.4664.110 Safari/537.36 '<br />        }<br />        for i in range(self.start, self.end+1):<br />            data = {<br />                      "CategoryType": "SiteHome",<br />                      "ParentCategoryId": 0,<br />                      "CategoryId": 808,<br />                      "PageIndex": i,<br />                      "TotalPostCount": 4000,<br />                      "ItemListActionName": "AggSitePostList"<br />                    }<br />            html = self.read_url(self.url, headers, data)<br />            self.write_html(html, i)

if name == ‘main‘:
sp = Spider(‘https://www.cnblogs.com/‘, 1, 3)
sp.main()

爬虫作业

王家驹-第二次作业-第十二期爬虫