from bs4 import BeautifulSoup as bs
import requests
from fake_useragent import UserAgent
import os
class Tupian:
def __init__(self,cate,page):
self.page = page
self.cate = cate
self.base_url = 'https://www.umeitu.com/'
if int(page) == 1:
self.url = self.base_url + self.cate
else:
self.url = f'{self.base_url}{self.cate}index_{page}.htm'
if not os.path.exists('./' + self.cate):
os.makedirs('./' + self.cate)
def get_data(self):
headers = {
'User-Agent':UserAgent().random,
'Referer':self.base_url
}
try:
print(f'获取第{self.page}页图片')
response = requests.get(self.url, headers=headers)
if response.status_code == 200:
response.encoding = 'utf-8'
html = response.text
soup = bs(html,'lxml')
a_list = soup.find_all('a',class_='TypeBigPics')
for a in a_list:
img_src = a.find('img')['src']
img_name = a.find('span').string
self.downImg(img_name,img_src)
print(f'第{self.page}页数据爬取完成')
else:
print(response.reason)
except Exception as error:
print(error)
# 下载图片
def downImg(self,img_name,img_src):
response = requests.get(img_src)
print(f'下载图片【{img_name}】...')
if response.status_code == 200:
with open(f'./{self.cate}{img_name}.jpg','wb') as file:
file.write(response.content)
cates = [ # 分类
'meinvtupian/siwameinv/',
'meinvtupian/meinvxiezhen/',
'meinvtupian/nayimeinv/',
'meinvtupian/jiepaimeinv/',
'meinvtupian/rentiyishu/'
]
page = 0
for cate in cates:
page += 1
tupian = Tupian(cate,page)
tupian.get_data()