from bs4 import BeautifulSoup as bsimport requestsfrom fake_useragent import UserAgentimport osclass Tupian: def __init__(self,cate,page): self.page = page self.cate = cate self.base_url = 'https://www.umeitu.com/' if int(page) == 1: self.url = self.base_url + self.cate else: self.url = f'{self.base_url}{self.cate}index_{page}.htm' if not os.path.exists('./' + self.cate): os.makedirs('./' + self.cate) def get_data(self): headers = { 'User-Agent':UserAgent().random, 'Referer':self.base_url } try: print(f'获取第{self.page}页图片') response = requests.get(self.url, headers=headers) if response.status_code == 200: response.encoding = 'utf-8' html = response.text soup = bs(html,'lxml') a_list = soup.find_all('a',class_='TypeBigPics') for a in a_list: img_src = a.find('img')['src'] img_name = a.find('span').string self.downImg(img_name,img_src) print(f'第{self.page}页数据爬取完成') else: print(response.reason) except Exception as error: print(error) # 下载图片 def downImg(self,img_name,img_src): response = requests.get(img_src) print(f'下载图片【{img_name}】...') if response.status_code == 200: with open(f'./{self.cate}{img_name}.jpg','wb') as file: file.write(response.content)cates = [ # 分类 'meinvtupian/siwameinv/', 'meinvtupian/meinvxiezhen/', 'meinvtupian/nayimeinv/', 'meinvtupian/jiepaimeinv/', 'meinvtupian/rentiyishu/']page = 0for cate in cates: page += 1 tupian = Tupian(cate,page) tupian.get_data()