
import requestsimport reimport jsonimport osfrom fake_useragent import UserAgentclass LiVideo:def __init__(self,cate_id):self.base_url = 'https://www.pearvideo.com/' # 主域名self.url = self.base_url + 'category_' + cate_id # 首页的请求地址self.file_path = 'category_' + cate_id + 'page.html' # 保存页面文件self.videoDir = './video-' + cate_id # 视频分类目录地址self.pre_video = self.base_url + 'videoStatus.jsp?contId=' # 单个视频请求地址self.checkDir(self.videoDir)self.checkDir(self.videoDir)# 创建视频目录def checkDir(self,dirPath):if not os.path.exists(dirPath):os.mkdir(dirPath)# 获取用户信息def getVideo(self):if os.path.exists(self.file_path):self.getVideoInfo()else:self.savePageHtml()# 保存页面def savePageHtml(self):response = requests.get(self.url)if response.status_code == 200:with open(self.file_path,'w',encoding='utf-8') as file:file.write(response.text)# 获取页面视频信息self.getVideoInfo()# 保存数据def getVideoInfo(self):videoes = []with open(self.file_path,'r',encoding='utf-8') as file:html = file.read()images = re.findall(r'<div class="verimg-view">.*? url\((.*?)\);">',html)titles = re.findall(r'<div class="vervideo-title">(.*?)</div>',html)videoIds = re.findall(r'<a href="video_(.*?)" class="vervideo-lilink actplay">',html)authos = re.findall(r'<a href=".*?" class="column">(.*?)</a>',html);for i in range(0,len(videoIds)):videoInfo = {'title':titles[i],'author':authos[i],'image':images[i],'videoId':videoIds[i]}videoes.append(videoInfo)if videoes:print('开始下载视频...')for video in videoes:requestVideoUrl = self.pre_video + video['videoId']headers = {'User-Agent': UserAgent().random,'Referer': self.base_url + 'video_' + video['videoId']}response = requests.get(requestVideoUrl, headers=headers)if response.status_code == 200:result = json.loads(response.text)# 得到视频url:https://video.pearvideo.com/mp4/third/20211229/1641655664960-10054243-161801-hd.mp4# 实际播放地址:url: https://video.pearvideo.com/mp4/third/20220113/cont-1749919-10054243-151326-hd.mp4videoUrl = result['videoInfo']['videos']['srcUrl']newVideoUrl = re.sub(r'/(\d+)-', f'/cont-{video["videoId"]}-', videoUrl)try:videoContent = requests.get(newVideoUrl)if response.status_code == 200:print(f'下载视频:{video["title"]}中')with open(f'{self.videoDir}/{video["title"]}.mp4', 'wb') as file:file.write(videoContent.content)except Exception as error:print(error)else:print(response.status_code)print('下载完毕!!!')else:print('该页面不存在')cate_id = input('请输入分类id')video = LiVideo(cate_id)video.getVideo()
地址: https://www.pearvideo.com/category_6 输入category_后的值
