
from bs4 import BeautifulSoup as bsimport requestsfrom fake_useragent import UserAgentimport osclass Tupian: def __init__(self,page): self.page = page self.base_url = 'https://sc.chinaz.com/' if int(page) == 1: self.url = self.base_url + 'tupian/' else: self.url = f'{self.base_url}tupian/index_{page}.html' self.dir = './images' if not os.path.exists(self.dir): os.mkdir(self.dir) def get_data(self): headers = { 'User-Agent':UserAgent().random, 'Referer':self.base_url } try: print(f'获取第{self.page}页图片') response = requests.get(self.url, headers=headers) if response.status_code == 200: response.encoding = 'utf-8' html = response.text soup = bs(html,'lxml') div_list = soup.find_all(name='div',class_='box picblock col3') for divs in div_list: img = divs.find('img') img_name = img['alt'] img_src = 'https://' + img['src2'].split('//')[-1] print(f'图片名称:{img_name},路径为:{img_src}') self.downImg(img_name,img_src) print(f'第{self.page}页数据爬取完成') else: print(response.reason) except Exception as error: print(error) # 下载图片 def downImg(self,img_name,img_src): try : response = requests.get(img_src) print(f'下载图片【{img_name}】...') if response.status_code == 200: with open(f'{self.dir}/{img_name}.jpg', 'wb') as file: file.write(response.content) except Exception as error: print('Error:',error)while True: # 翻页 page = input('请输入页数,输入非数字退出') if page.isdigit(): tupian = Tupian(page) tupian.get_data() else: break