爬取图片作业
代码
import requestsfrom bs4 import BeautifulSoup# 解析url翻页操作# 'https://sc.chinaz.com/tupian/index_1.html' 这样不行'https://sc.chinaz.com/tupian/''https://sc.chinaz.com/tupian/index_2.html''https://sc.chinaz.com/tupian/index_3.html'class TuPian: def res_url(self, url): headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36' } res = requests.get(url, headers=headers) html = res.content.decode('utf-8') return html def parse_html(self, html, li): soup = BeautifulSoup(html, 'lxml') first_div = soup.find('div', id="container") a_tags = first_div.find_all('a') for i in range(0, len(a_tags), 3): item = {} # print(a_tags[i]) img = a_tags[i].find('img') # print(img) item['name'] = img['alt'] item['url'] = 'https:' + img['src2'] li.append(item) def write_data(self, li): for i in range(len(li)): item = li[i] name = item['name'] url = item['url'] res = requests.get(url) with open(name + '.png', 'wb') as f: f.write(res.content) print(f"第{i + 1}张爬取完成!") def main(self): li = [] basic_url = 'https://sc.chinaz.com/tupian/index_{}.html' num = int(input("请输入你要爬取的页数:")) for i in range(1, num + 1): if i == 1: html = self.res_url('https://sc.chinaz.com/tupian/') else: html = self.res_url(basic_url.format(i)) self.parse_html(html, li) self.write_data(li)if __name__ == '__main__': t = TuPian() t.main()
爬取的内容

图片实例


