爬取博客园翻页页面

!/user/bin/env python
# -- coding: utf-8 --
# —————————————-
_’’’
@author:Jiang_bird
@file:爬取博客园翻页页面.py
@date:2022-02-14
@time:22:09
‘’’

_import requests
from fake_useragent import FakeUserAgent

def gethtml(url):
headers = {
‘user-agent’: FakeUserAgent().random,
‘cookie’: ‘_ga=GA1.2.1486139476.1637849118; gads=ID=9efbcc1e267281ba:T=1637849118:S=ALNI_MYz4h0xM544H9-fO0uIv961A3AFSg; .AspNetCore.Session=CfDJ8GsLOKiGtk1Au0UP1SouGdXb3HKnT1BDHwDIEgLqUsvJ74F96rg5Ar8AwPJy7bXztbNXBHgbFrr56LYBD4v52hrFnBSu2cNzi5vnk627xI528NA8EMK%2BbqzkhgxR2hrWBtHcsmJSzhfOjqeX3gNZ%2FMTmUTz9WBAsdkSIC062xt5q; utmc=59123430; Hm_lvt_866c9be12d4a814454792b1fd0fed295=1643113228,1643286992,1643329069,1644847728; Hm_lpvt_866c9be12d4a814454792b1fd0fed295=1644847728; _gid=GA1.2.237520141.1644847729; utma=59123430.1486139476.1637849118.1643106166.1644847993.2; utmz=59123430.1644847993.2.2.utmcsr=cnblogs.com|utmccn=(referral)|utmcmd=referral|utmcct=/; utmt=1; NotRobot=CfDJ8GsLOKiGtk1Au0UP1SouGdVuOo11iNFMV3io8HnsrEhSVc5Y-JkLmEm8SsnRY3SQPb7ku5wh6N_tUZKXPpln_ZTQ-Lx_NqDpgsdSNQZf1adcs2Avar7hDl8Ald3CP5uZiQ; utmb=59123430.2.10.1644847993’,
}
res = requests.get(url=url, headers=headers)
return res.text

def downhtml(html,page):
with open(f’博客园第{page}页.html’,’w’,encoding=’utf-8’)as f:
f.write(html)
print(f’博客园第{page}页.html,已下载’)

if name == ‘main‘:
key = input(‘input key:’)
page = int(input(‘input page:’))
for page in range(1,page+1):
url = f’https://zzk.cnblogs.com/s/blogpost?Keywords={key}&pageindex={page}
html = gethtml(url)
downhtml(html,page)

爬取一张图片

import requests
from fake_useragent import FakeUserAgent

url = ‘https://images.dmzj.com/img/chapterpic/23831/105750/15851968871613.jpg

headers = {
‘user-agent’: FakeUserAgent().random,
‘Referer’: ‘https://www.dmzj.com/info/danyuxuanyancai.html‘,
}
res = requests.get(url=url, headers=headers)
# print(res.content)
with open(‘tupian.jpg’,’wb’)as f:
f.write(res.content)
print(‘下载成功’)