![86b5b63b0d8f6b9101fdd79ea9acc9c.png](/uploads/projects/xiaomage-1qtgi@ybkgp5/3c2087c7af8d9df15fe46736446e7a13.png)
from bs4 import BeautifulSoup as bs
import requests
from fake_useragent import UserAgent
import os
class Tupian:
def __init__(self,page):
self.page = page
self.base_url = 'https://sc.chinaz.com/'
if int(page) == 1:
self.url = self.base_url + 'tupian/'
else:
self.url = f'{self.base_url}tupian/index_{page}.html'
self.dir = './images'
if not os.path.exists(self.dir):
os.mkdir(self.dir)
def get_data(self):
headers = {
'User-Agent':UserAgent().random,
'Referer':self.base_url
}
try:
print(f'获取第{self.page}页图片')
response = requests.get(self.url, headers=headers)
if response.status_code == 200:
response.encoding = 'utf-8'
html = response.text
soup = bs(html,'lxml')
div_list = soup.find_all(name='div',class_='box picblock col3')
for divs in div_list:
img = divs.find('img')
img_name = img['alt']
img_src = 'https://' + img['src2'].split('//')[-1]
print(f'图片名称:{img_name},路径为:{img_src}')
self.downImg(img_name,img_src)
print(f'第{self.page}页数据爬取完成')
else:
print(response.reason)
except Exception as error:
print(error)
# 下载图片
def downImg(self,img_name,img_src):
try :
response = requests.get(img_src)
print(f'下载图片【{img_name}】...')
if response.status_code == 200:
with open(f'{self.dir}/{img_name}.jpg', 'wb') as file:
file.write(response.content)
except Exception as error:
print('Error:',error)
while True: # 翻页
page = input('请输入页数,输入非数字退出')
if page.isdigit():
tupian = Tupian(page)
tupian.get_data()
else:
break