作业 - 小马哥-第六次作业第十二期爬虫 - 《爬虫代码集》

from bs4 import BeautifulSoup as bs
import requests
from fake_useragent import UserAgent
import os
class Tupian:
    def __init__(self,page):
        self.page = page
        self.base_url = 'https://sc.chinaz.com/'
        if int(page) == 1:
            self.url = self.base_url + 'tupian/'
        else:
            self.url = f'{self.base_url}tupian/index_{page}.html'
        self.dir = './images'
        if not os.path.exists(self.dir):
            os.mkdir(self.dir)
    def get_data(self):
        headers = {
            'User-Agent':UserAgent().random,
            'Referer':self.base_url
        }
        try:
            print(f'获取第{self.page}页图片')
            response = requests.get(self.url, headers=headers)
            if response.status_code == 200:
                response.encoding = 'utf-8'
                html = response.text
                soup = bs(html,'lxml')
                div_list = soup.find_all(name='div',class_='box picblock col3')
                for divs in div_list:
                    img = divs.find('img')
                    img_name = img['alt']
                    img_src = 'https://' + img['src2'].split('//')[-1]
                    print(f'图片名称:{img_name},路径为:{img_src}')
                    self.downImg(img_name,img_src)
                print(f'第{self.page}页数据爬取完成')
            else:
                print(response.reason)
        except Exception as error:
            print(error)
    # 下载图片
    def downImg(self,img_name,img_src):
        try :
            response = requests.get(img_src)
            print(f'下载图片【{img_name}】...')
            if response.status_code == 200:
                with open(f'{self.dir}/{img_name}.jpg', 'wb') as file:
                    file.write(response.content)
        except Exception as error:
            print('Error:',error)
while True: # 翻页
    page = input('请输入页数,输入非数字退出')
    if page.isdigit():
        tupian = Tupian(page)
        tupian.get_data()
    else:
        break
小马哥-第六次作业 第十二期爬虫

小马哥-第六次作业第十二期爬虫