1. import requests
    2. from bs4 import BeautifulSoup
    3. import os
    4. from hashlib import md5
    5. class meizi(object):
    6. def init(self):
    7. self.baseURL = 'https://www.mzitu.com/tag/youhuo/page/{}/'
    8. self.headers = {'If-None-Match': 'W/"5cc2cd8f-2c58"',
    9. "Referer": "http://www.mzitu.com/all/",
    10. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 UBrowser/6.1.2107.204 SafarMozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
    11. }
    12. self.page = '2'
    13. def getPage(self):
    14. url = self.baseURL.format(self.page)
    15. data = self.getRequest(url)
    16. soup = BeautifulSoup(data, 'lxml')
    17. self.page = soup.select('.page-numbers')[-2].get_text()
    18. print(self.page)
    19. def getRequest(self, url):
    20. data = requests.get(url, headers=self.headers).content.decode()
    21. return data
    22. def getURL(self):
    23. urlList = []
    24. for i in range(1, (int(self.page) + 1)):
    25. urlList.append(self.baseURL.format(str(i)))
    26. return urlList
    27. def xpath(self, data):
    28. soup = BeautifulSoup(data, 'lxml')
    29. list = soup.select('#pins li')
    30. for item in list:
    31. dataoriginal = item.select_one('img').get('data-original')
    32. self.saveImage(dataoriginal)
    33. def saveImage(self, imageURL, filePath='img'):
    34. respose = requests.get(imageURL, headers=self.headers)
    35. if respose.status_code == 200:
    36. data = respose.content
    37. try:
    38. if not os.path.exists(filePath):
    39. print('文件夹', filePath, '不存在,重新建立')
    40. os.makedirs(filePath)
    41. # 获得图片后缀
    42. file_suffix = os.path.splitext(imageURL)[1]
    43. # 拼接图片名(包含路径)
    44. filename = '{}/{}{}'.format(filePath, md5(data).hexdigest(),
    45. file_suffix)
    46. with open(filename, 'wb')as f:
    47. f.write(data)
    48. except IOError as e:
    49. print('文件操作失败:' + e)
    50. except Exception as e:
    51. print('错误:' + e)
    52. def startRun(self):
    53. # self.getPage()
    54. urlList = self.getURL()
    55. for url in urlList:
    56. print(url)
    57. data = self.getRequest(url)
    58. self.xpath(data)
    59. if name == 'main':
    60. meizi().startRun()