"""
需求:正则抓取糗事百科13页的所有图片
"""
import requests
import re
import os
if __name__ == '__main__':
if not os.path.exists('糗图') :
os.mkdir('糗图')
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'}
for page in range(1,14):
url = f'https://www.qiushibaike.com/imgrank/page/{page}/'
page_text = requests.get(url, headers=headers).text
img_src_list = re.findall('<div class="thumb">.*?src="(.*?)" alt.*?</div>', page_text, re.S)
for pic_url in img_src_list:
pic_url = 'https:' + pic_url
img_data = requests.get(pic_url, headers=headers).content
filename = pic_url.split('/')[-1]
with open('糗图/' + filename, 'wb') as f:
f.write(img_data)
print(filename, '下载完成')
print(f'第{page}页已经下载完成!!!')
print('所有图片已经下载完毕!!')
"""
总结:
01. 判断是否存在这个路径
os.path.exists('路径')
02. 创建文件夹
os.mkdir('文件夹名')
"""