1. import re
    2. from multiprocessing.dummy import Pool
    3. import requests
    4. from lxml import etree
    5. headers={
    6. 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360S'
    7. }
    8. url='https://www.pearvideo.com/category_5'
    9. page_text=requests.get(url=url,headers=headers).text
    10. tree=etree.HTML(page_text)
    11. li_list=tree.xpath('//ul[@id="listvideoListUl"]/li')
    12. urls=[]
    13. for li in li_list:
    14. detail_url='https://www.pearvideo.com/'+li.xpath('./div/a/@href')[0]
    15. name=li.xpath('./div/a/div[2]/text()')[0]+'.mp4'
    16. detail_page_text=requests.get(url=detail_url,headers=headers).text
    17. print(detail_page_text)
    18. ex='srcUrl="(.*?)",vdoUrl'
    19. video_url=re.findall(ex,detail_page_text)[0]
    20. dic={
    21. 'name':name,
    22. 'url':video_url
    23. }
    24. urls.append(dic)
    25. print(dic)
    26. def get_viedo_data(url):
    27. url=dic['url']
    28. print(dic['name'],'正在下载.......')
    29. data=requests.get(url=url,headers=headers).content
    30. with open(dic['name'],'wb') as fp:
    31. fp.write(dic['name'],'下载成功!!!')
    32. pool=Pool(4)
    33. pool.map(get_viedo_data,urls)
    34. pool.close()
    35. pool.join()