import requestsfrom bs4 import BeautifulSoupimport osfrom hashlib import md5class meizi(object): def init(self): self.baseURL = 'https://www.mzitu.com/tag/youhuo/page/{}/' self.headers = {'If-None-Match': 'W/"5cc2cd8f-2c58"', "Referer": "http://www.mzitu.com/all/", 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 UBrowser/6.1.2107.204 SafarMozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36' } self.page = '2' def getPage(self): url = self.baseURL.format(self.page) data = self.getRequest(url) soup = BeautifulSoup(data, 'lxml') self.page = soup.select('.page-numbers')[-2].get_text() print(self.page) def getRequest(self, url): data = requests.get(url, headers=self.headers).content.decode() return data def getURL(self): urlList = [] for i in range(1, (int(self.page) + 1)): urlList.append(self.baseURL.format(str(i))) return urlList def xpath(self, data): soup = BeautifulSoup(data, 'lxml') list = soup.select('#pins li') for item in list: dataoriginal = item.select_one('img').get('data-original') self.saveImage(dataoriginal) def saveImage(self, imageURL, filePath='img'): respose = requests.get(imageURL, headers=self.headers) if respose.status_code == 200: data = respose.content try: if not os.path.exists(filePath): print('文件夹', filePath, '不存在,重新建立') os.makedirs(filePath) # 获得图片后缀 file_suffix = os.path.splitext(imageURL)[1] # 拼接图片名(包含路径) filename = '{}/{}{}'.format(filePath, md5(data).hexdigest(), file_suffix) with open(filename, 'wb')as f: f.write(data) except IOError as e: print('文件操作失败:' + e) except Exception as e: print('错误:' + e) def startRun(self): # self.getPage() urlList = self.getURL() for url in urlList: print(url) data = self.getRequest(url) self.xpath(data)if name == 'main': meizi().startRun()