Code - 08-彼岸图网 - 《Python》

"""
目标网站：
    https://pic.netbian.com/4kmeishi/
需求：
    抓取当前页面美食图片
模块：
    requests, lxml, os
"""
import requests, os
from lxml import etree
url = 'https://pic.netbian.com/4kmeishi/'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'}
if not os.path.exists('美食图片'):
    os.mkdir('美食图片')
# 如果不清楚网页的编码可以在console中输入 document.charset查看
response = requests.get(url, headers=headers).content.decode('GBK')
el_list = etree.HTML(response).xpath('//ul[@class="clearfix"]/li/a')
for el in el_list:
    pic_url = 'https://pic.netbian.com/' + el.xpath('./img/@src')[0]
    img_name = el.xpath('./b/text()')[0]
    img_path = f"美食图片/{img_name}.jpg"
    pic = requests.get(pic_url, headers=headers).content
    with open(img_path, 'wb')as f:
        f.write(pic)
        print(f'{img_name}下载成功')
print(f'一共获取到{len(el_list)}个图片')
"""
总结：
    01.在开发者工具中的console中输入 document.charset可以查看网页的编码，如果结果为"UTF-8"那么获取响应直接用response.text否则用response.content.decode()   
"""