image.png

    1. import requests
    2. import re
    3. import json
    4. import os
    5. from fake_useragent import UserAgent
    6. class LiVideo:
    7. def __init__(self,cate_id):
    8. self.base_url = 'https://www.pearvideo.com/' # 主域名
    9. self.url = self.base_url + 'category_' + cate_id # 首页的请求地址
    10. self.file_path = 'category_' + cate_id + 'page.html' # 保存页面文件
    11. self.videoDir = './video-' + cate_id # 视频分类目录地址
    12. self.pre_video = self.base_url + 'videoStatus.jsp?contId=' # 单个视频请求地址
    13. self.checkDir(self.videoDir)
    14. self.checkDir(self.videoDir)
    15. # 创建视频目录
    16. def checkDir(self,dirPath):
    17. if not os.path.exists(dirPath):
    18. os.mkdir(dirPath)
    19. # 获取用户信息
    20. def getVideo(self):
    21. if os.path.exists(self.file_path):
    22. self.getVideoInfo()
    23. else:
    24. self.savePageHtml()
    25. # 保存页面
    26. def savePageHtml(self):
    27. response = requests.get(self.url)
    28. if response.status_code == 200:
    29. with open(self.file_path,'w',encoding='utf-8') as file:
    30. file.write(response.text)
    31. # 获取页面视频信息
    32. self.getVideoInfo()
    33. # 保存数据
    34. def getVideoInfo(self):
    35. videoes = []
    36. with open(self.file_path,'r',encoding='utf-8') as file:
    37. html = file.read()
    38. images = re.findall(r'<div class="verimg-view">.*? url\((.*?)\);">',html)
    39. titles = re.findall(r'<div class="vervideo-title">(.*?)</div>',html)
    40. videoIds = re.findall(r'<a href="video_(.*?)" class="vervideo-lilink actplay">',html)
    41. authos = re.findall(r'<a href=".*?" class="column">(.*?)</a>',html);
    42. for i in range(0,len(videoIds)):
    43. videoInfo = {'title':titles[i],'author':authos[i],'image':images[i],'videoId':videoIds[i]}
    44. videoes.append(videoInfo)
    45. if videoes:
    46. print('开始下载视频...')
    47. for video in videoes:
    48. requestVideoUrl = self.pre_video + video['videoId']
    49. headers = {
    50. 'User-Agent': UserAgent().random,
    51. 'Referer': self.base_url + 'video_' + video['videoId']
    52. }
    53. response = requests.get(requestVideoUrl, headers=headers)
    54. if response.status_code == 200:
    55. result = json.loads(response.text)
    56. # 得到视频url:https://video.pearvideo.com/mp4/third/20211229/1641655664960-10054243-161801-hd.mp4
    57. # 实际播放地址:url: https://video.pearvideo.com/mp4/third/20220113/cont-1749919-10054243-151326-hd.mp4
    58. videoUrl = result['videoInfo']['videos']['srcUrl']
    59. newVideoUrl = re.sub(r'/(\d+)-', f'/cont-{video["videoId"]}-', videoUrl)
    60. try:
    61. videoContent = requests.get(newVideoUrl)
    62. if response.status_code == 200:
    63. print(f'下载视频:{video["title"]}中')
    64. with open(f'{self.videoDir}/{video["title"]}.mp4', 'wb') as file:
    65. file.write(videoContent.content)
    66. except Exception as error:
    67. print(error)
    68. else:
    69. print(response.status_code)
    70. print('下载完毕!!!')
    71. else:
    72. print('该页面不存在')
    73. cate_id = input('请输入分类id')
    74. video = LiVideo(cate_id)
    75. video.getVideo()

    地址: https://www.pearvideo.com/category_6 输入category_后的值
    image.png