1. """
    2. 目标网站:
    3. https://pic.netbian.com/4kmeishi/
    4. 需求:
    5. 抓取当前页面美食图片
    6. 模块:
    7. requests, lxml, os
    8. """
    9. import requests, os
    10. from lxml import etree
    11. url = 'https://pic.netbian.com/4kmeishi/'
    12. headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'}
    13. if not os.path.exists('美食图片'):
    14. os.mkdir('美食图片')
    15. # 如果不清楚网页的编码可以在console中输入 document.charset查看
    16. response = requests.get(url, headers=headers).content.decode('GBK')
    17. el_list = etree.HTML(response).xpath('//ul[@class="clearfix"]/li/a')
    18. for el in el_list:
    19. pic_url = 'https://pic.netbian.com/' + el.xpath('./img/@src')[0]
    20. img_name = el.xpath('./b/text()')[0]
    21. img_path = f"美食图片/{img_name}.jpg"
    22. pic = requests.get(pic_url, headers=headers).content
    23. with open(img_path, 'wb')as f:
    24. f.write(pic)
    25. print(f'{img_name}下载成功')
    26. print(f'一共获取到{len(el_list)}个图片')
    27. """
    28. 总结:
    29. 01.在开发者工具中的console中输入 document.charset可以查看网页的编码,如果结果为"UTF-8"那么获取响应直接用response.text否则用response.content.decode()
    30. """