import requestsfrom bs4 import BeautifulSoupimport jsonclass QiuVideoSpider(object): def init(self): self.baseURL = 'https://www.qiushibaike.com/video/page/{}/' self.URL = 'https://www.qiushibaike.com' self.list = [] self.page = '1' def get_URL(self): url_List = [] for i in range(1, (int(self.page) + 1)): url_List.append(self.baseURL.format(str(i))) return url_List def get_Request(self, url): data = requests.get(url).content.decode() return data def xpath(self, data): soup = BeautifulSoup(data, 'lxml') html_xpath = soup.select('.article') for xpath in html_xpath: xpathDic = {} # 1、头像 xpathDic['headerURL'] = 'https:' + xpath.select_one( '.author img').get('src') # 2、昵称 xpathDic['nick'] = xpath.select_one( '.author h2').get_text().replace('\n', '') # 3、标题 xpathDic['title'] = xpath.select_one('.content').get_text().replace( '\n', '') # 4、性别 xpathDic['age'] = xpath.select_one('.articleGender').get_text() # 5、视频链接 xpathDic['video'] = 'https:' + xpath.select_one('source').get('src') self.list.append(xpathDic) def get_Page(self): url = self.baseURL.format(self.page) data = self.get_Request(url) soup = BeautifulSoup(data, 'lxml') self.page = soup.select('.page-numbers')[-1].get_text().replace('\n', '') def save_data(self, data): json.dump(data, open('qiushibaikeVideo.json', 'w')) def start(self): self.get_Page() url_list = self.get_URL() for url in url_list: print(url) data = self.get_Request(url) self.xpath(data) self.save_data(self.list)if name == 'main': QiuVideoSpider().start()