"""
目标网站:
https://pic.netbian.com/4kmeishi/
需求:
抓取当前页面美食图片
模块:
requests, lxml, os
"""
import requests, os
from lxml import etree
url = 'https://pic.netbian.com/4kmeishi/'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'}
if not os.path.exists('美食图片'):
os.mkdir('美食图片')
# 如果不清楚网页的编码可以在console中输入 document.charset查看
response = requests.get(url, headers=headers).content.decode('GBK')
el_list = etree.HTML(response).xpath('//ul[@class="clearfix"]/li/a')
for el in el_list:
pic_url = 'https://pic.netbian.com/' + el.xpath('./img/@src')[0]
img_name = el.xpath('./b/text()')[0]
img_path = f"美食图片/{img_name}.jpg"
pic = requests.get(pic_url, headers=headers).content
with open(img_path, 'wb')as f:
f.write(pic)
print(f'{img_name}下载成功')
print(f'一共获取到{len(el_list)}个图片')
"""
总结:
01.在开发者工具中的console中输入 document.charset可以查看网页的编码,如果结果为"UTF-8"那么获取响应直接用response.text否则用response.content.decode()
"""