作业1:
- 目标网站:https://fanyi.sogou.com/text
- 爬取要求:
- 1、输入要翻译的内容
- 2、通过post请求拿到json数据(可以用urllib模块,也可以用requests模块)
- 3、把翻译后的数据提取出来,直接打印到控制台就行
代码:
import requestsimport jsonfrom requests.exceptions import HTTPError,ReadTimeout,RequestExceptionclass Transpc:def __init__(self,search):self.search = searchdef trans(self):url = 'https://fanyi.sogou.com/api/transpc/text/result'data = {"from": "auto","to": "en","text": self.search,"client": "pc","fr": "browser_pc","needQc": 1,"s": "7d472ca6863377bfec86011a9486bdd2","uuid": "dd0ea5cc-64ab-40be-a1b4-e577bfbc965a","exchange": "false"}headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36","Host": "fanyi.sogou.com","Cookie":"ABTEST=7|1641552779|v17;SNUID=39ED2A00B7BD6C4C72F82FAFB889C6C5;IPLOC=CN3301;SUID=81559DB73481A20A0000000061D81B8B;wuid=1641552779646;FUV=31e661252502f474d57365768288e3d1;translate.sess=e33281fc-b170-49e6-992b-47f9c73aa08e;SUV=1641522943266;SGINPUT_UPSCREEN=1641522943282"}try:response = requests.post(url, data=data,headers=headers)result = json.loads(response.text)errorCode = result['data']['translate']['errorCode']if int(errorCode) != 10:return '请求失败'else:return result['data']['sentencesData']['trans_result'][0]['trans_text']except ReadTimeout as error:return errorexcept HTTPError as error:return errorexcept RequestException as error:return errorif __name__ == '__main__':search = input('请输入您要翻译的内容')transpc = Transpc(search)result = transpc.trans()print(f'翻译的结果为:{result}')
2. 作业2
- 目标网站:https://www.cnblogs.com/
爬取要求:
- 1、输入要搜索的内容
- 2、翻页爬取相关页面html代码
3、保存到本地 ``` import requests from requests.exceptions import HTTPError,ReadTimeout,RequestException class Cnblog: def init(self,search,page): self.search = search self.page = page
def getPageData(self):
if self.page == 1:url = 'https://zzk.cnblogs.com/s/blogpost?w=' + self.search
else:
url = f'https://zzk.cnblogs.com/s/blogpost?Keywords={self.search}&pageindex={self.page}'
response = requests.get(url) if response.status_code == 200:
with open(f'爬取第{self.page}页数据.html','w+',encoding='utf-8') as f:f.write(response.text)
else:
return response.reason
if name == ‘main‘: search = input(‘请输入您要查询的内容’) while True: page = input(‘请输入要搜索的页数,输入0退出爬虫’) if int(page) == 0: break else: print(f’开始爬取第{page}页数据’) obj = Cnblog(search, page) obj.getPageData() ```
