import re
from multiprocessing.dummy import Pool
import requests
from lxml import etree
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360S'
}
url='https://www.pearvideo.com/category_5'
page_text=requests.get(url=url,headers=headers).text
tree=etree.HTML(page_text)
li_list=tree.xpath('//ul[@id="listvideoListUl"]/li')
urls=[]
for li in li_list:
detail_url='https://www.pearvideo.com/'+li.xpath('./div/a/@href')[0]
name=li.xpath('./div/a/div[2]/text()')[0]+'.mp4'
detail_page_text=requests.get(url=detail_url,headers=headers).text
print(detail_page_text)
ex='srcUrl="(.*?)",vdoUrl'
video_url=re.findall(ex,detail_page_text)[0]
dic={
'name':name,
'url':video_url
}
urls.append(dic)
print(dic)
def get_viedo_data(url):
url=dic['url']
print(dic['name'],'正在下载.......')
data=requests.get(url=url,headers=headers).content
with open(dic['name'],'wb') as fp:
fp.write(dic['name'],'下载成功!!!')
pool=Pool(4)
pool.map(get_viedo_data,urls)
pool.close()
pool.join()