"""目标网站: https://pic.netbian.com/4kmeishi/需求: 抓取当前页面美食图片模块: requests, lxml, os"""import requests, osfrom lxml import etreeurl = 'https://pic.netbian.com/4kmeishi/'headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'}if not os.path.exists('美食图片'): os.mkdir('美食图片')# 如果不清楚网页的编码可以在console中输入 document.charset查看response = requests.get(url, headers=headers).content.decode('GBK')el_list = etree.HTML(response).xpath('//ul[@class="clearfix"]/li/a')for el in el_list: pic_url = 'https://pic.netbian.com/' + el.xpath('./img/@src')[0] img_name = el.xpath('./b/text()')[0] img_path = f"美食图片/{img_name}.jpg" pic = requests.get(pic_url, headers=headers).content with open(img_path, 'wb')as f: f.write(pic) print(f'{img_name}下载成功')print(f'一共获取到{len(el_list)}个图片')"""总结: 01.在开发者工具中的console中输入 document.charset可以查看网页的编码,如果结果为"UTF-8"那么获取响应直接用response.text否则用response.content.decode() """