import requests
import re
import json
import os
from fake_useragent import UserAgent
class LiVideo:
def __init__(self,cate_id):
self.base_url = 'https://www.pearvideo.com/' # 主域名
self.url = self.base_url + 'category_' + cate_id # 首页的请求地址
self.file_path = 'category_' + cate_id + 'page.html' # 保存页面文件
self.videoDir = './video-' + cate_id # 视频分类目录地址
self.pre_video = self.base_url + 'videoStatus.jsp?contId=' # 单个视频请求地址
self.checkDir(self.videoDir)
self.checkDir(self.videoDir)
# 创建视频目录
def checkDir(self,dirPath):
if not os.path.exists(dirPath):
os.mkdir(dirPath)
# 获取用户信息
def getVideo(self):
if os.path.exists(self.file_path):
self.getVideoInfo()
else:
self.savePageHtml()
# 保存页面
def savePageHtml(self):
response = requests.get(self.url)
if response.status_code == 200:
with open(self.file_path,'w',encoding='utf-8') as file:
file.write(response.text)
# 获取页面视频信息
self.getVideoInfo()
# 保存数据
def getVideoInfo(self):
videoes = []
with open(self.file_path,'r',encoding='utf-8') as file:
html = file.read()
images = re.findall(r'<div class="verimg-view">.*? url\((.*?)\);">',html)
titles = re.findall(r'<div class="vervideo-title">(.*?)</div>',html)
videoIds = re.findall(r'<a href="video_(.*?)" class="vervideo-lilink actplay">',html)
authos = re.findall(r'<a href=".*?" class="column">(.*?)</a>',html);
for i in range(0,len(videoIds)):
videoInfo = {'title':titles[i],'author':authos[i],'image':images[i],'videoId':videoIds[i]}
videoes.append(videoInfo)
if videoes:
print('开始下载视频...')
for video in videoes:
requestVideoUrl = self.pre_video + video['videoId']
headers = {
'User-Agent': UserAgent().random,
'Referer': self.base_url + 'video_' + video['videoId']
}
response = requests.get(requestVideoUrl, headers=headers)
if response.status_code == 200:
result = json.loads(response.text)
# 得到视频url:https://video.pearvideo.com/mp4/third/20211229/1641655664960-10054243-161801-hd.mp4
# 实际播放地址:url: https://video.pearvideo.com/mp4/third/20220113/cont-1749919-10054243-151326-hd.mp4
videoUrl = result['videoInfo']['videos']['srcUrl']
newVideoUrl = re.sub(r'/(\d+)-', f'/cont-{video["videoId"]}-', videoUrl)
try:
videoContent = requests.get(newVideoUrl)
if response.status_code == 200:
print(f'下载视频:{video["title"]}中')
with open(f'{self.videoDir}/{video["title"]}.mp4', 'wb') as file:
file.write(videoContent.content)
except Exception as error:
print(error)
else:
print(response.status_code)
print('下载完毕!!!')
else:
print('该页面不存在')
cate_id = input('请输入分类id')
video = LiVideo(cate_id)
video.getVideo()
地址: https://www.pearvideo.com/category_6 输入category_后的值