86b5b63b0d8f6b9101fdd79ea9acc9c.png

    1. from bs4 import BeautifulSoup as bs
    2. import requests
    3. from fake_useragent import UserAgent
    4. import os
    5. class Tupian:
    6. def __init__(self,page):
    7. self.page = page
    8. self.base_url = 'https://sc.chinaz.com/'
    9. if int(page) == 1:
    10. self.url = self.base_url + 'tupian/'
    11. else:
    12. self.url = f'{self.base_url}tupian/index_{page}.html'
    13. self.dir = './images'
    14. if not os.path.exists(self.dir):
    15. os.mkdir(self.dir)
    16. def get_data(self):
    17. headers = {
    18. 'User-Agent':UserAgent().random,
    19. 'Referer':self.base_url
    20. }
    21. try:
    22. print(f'获取第{self.page}页图片')
    23. response = requests.get(self.url, headers=headers)
    24. if response.status_code == 200:
    25. response.encoding = 'utf-8'
    26. html = response.text
    27. soup = bs(html,'lxml')
    28. div_list = soup.find_all(name='div',class_='box picblock col3')
    29. for divs in div_list:
    30. img = divs.find('img')
    31. img_name = img['alt']
    32. img_src = 'https://' + img['src2'].split('//')[-1]
    33. print(f'图片名称:{img_name},路径为:{img_src}')
    34. self.downImg(img_name,img_src)
    35. print(f'第{self.page}页数据爬取完成')
    36. else:
    37. print(response.reason)
    38. except Exception as error:
    39. print(error)
    40. # 下载图片
    41. def downImg(self,img_name,img_src):
    42. try :
    43. response = requests.get(img_src)
    44. print(f'下载图片【{img_name}】...')
    45. if response.status_code == 200:
    46. with open(f'{self.dir}/{img_name}.jpg', 'wb') as file:
    47. file.write(response.content)
    48. except Exception as error:
    49. print('Error:',error)
    50. while True: # 翻页
    51. page = input('请输入页数,输入非数字退出')
    52. if page.isdigit():
    53. tupian = Tupian(page)
    54. tupian.get_data()
    55. else:
    56. break