1. from bs4 import BeautifulSoup as bs
    2. import requests
    3. from fake_useragent import UserAgent
    4. import os
    5. class Tupian:
    6. def __init__(self,cate,page):
    7. self.page = page
    8. self.cate = cate
    9. self.base_url = 'https://www.umeitu.com/'
    10. if int(page) == 1:
    11. self.url = self.base_url + self.cate
    12. else:
    13. self.url = f'{self.base_url}{self.cate}index_{page}.htm'
    14. if not os.path.exists('./' + self.cate):
    15. os.makedirs('./' + self.cate)
    16. def get_data(self):
    17. headers = {
    18. 'User-Agent':UserAgent().random,
    19. 'Referer':self.base_url
    20. }
    21. try:
    22. print(f'获取第{self.page}页图片')
    23. response = requests.get(self.url, headers=headers)
    24. if response.status_code == 200:
    25. response.encoding = 'utf-8'
    26. html = response.text
    27. soup = bs(html,'lxml')
    28. a_list = soup.find_all('a',class_='TypeBigPics')
    29. for a in a_list:
    30. img_src = a.find('img')['src']
    31. img_name = a.find('span').string
    32. self.downImg(img_name,img_src)
    33. print(f'第{self.page}页数据爬取完成')
    34. else:
    35. print(response.reason)
    36. except Exception as error:
    37. print(error)
    38. # 下载图片
    39. def downImg(self,img_name,img_src):
    40. response = requests.get(img_src)
    41. print(f'下载图片【{img_name}】...')
    42. if response.status_code == 200:
    43. with open(f'./{self.cate}{img_name}.jpg','wb') as file:
    44. file.write(response.content)
    45. cates = [ # 分类
    46. 'meinvtupian/siwameinv/',
    47. 'meinvtupian/meinvxiezhen/',
    48. 'meinvtupian/nayimeinv/',
    49. 'meinvtupian/jiepaimeinv/',
    50. 'meinvtupian/rentiyishu/'
    51. ]
    52. page = 0
    53. for cate in cates:
    54. page += 1
    55. tupian = Tupian(cate,page)
    56. tupian.get_data()