1. import requests
    2. import time
    3. import os
    4. from lxml import etree
    5. for page in range(1, 3):
    6. print(f'正在抓取第{page}页=================')
    7. page_url= f'https://www.ximalaya.com/youshengshu/22963309/p{page}/'
    8. headers = {
    9. 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'
    10. }
    11. page_text = requests.get(url=page_url, headers=headers).text
    12. page_tree = etree.HTML(page_text)
    13. media_dir = page_tree.xpath('////h1[@class="title k_Z"]/text()')[0].split(' | ')[0]
    14. div_list = page_tree.xpath('//div[@class="text Mi_"]')
    15. if not os.path.exists(media_dir):
    16. os.mkdir(media_dir)
    17. # 这里使用了枚举法--enumerate(squ)已达到给每个小标题加上序列的目的
    18. for num, div in enumerate(div_list):
    19. media_title = div.xpath('./a/@title')[0] # media_title:重生八零:媳妇有点辣 0001 我是破鞋?
    20. media_index = (page-1) * 30 + num + 1
    21. # 当序号小于10时,在序号前面加上一个0
    22. if media_index < 10:
    23. media_index = '0' + str(media_index)
    24. media_path = f"{media_index} {media_title.split(' ')[-1]}.mp3"
    25. media_href = div.xpath('./a/@href')[0] # media_href:/youshengshu/22963309/456562317
    26. media_id = media_href.split('/')[-1] # media_id:456562317
    27. ajax_url = f'https://www.ximalaya.com/revision/play/v1/audio?id={media_id}&ptype=1'
    28. time.sleep(0.5)
    29. ajax_data = requests.get(url=ajax_url, headers=headers).json()
    30. media_url = ajax_data['data']['src']
    31. print(media_path, '开始下载...')
    32. time.sleep(0.5)
    33. media_data = requests.get(url=media_url, headers=headers).content
    34. with open(f'{media_dir}/{media_path}', 'wb') as fp:
    35. fp.write(media_data)
    36. print(media_path, '下载完成!')
    37. print(media_dir,f'所有章节已下载完毕 共{page}页 {media_index}章节!!!')
    38. """
    39. 注意:
    40. 01.if not os.path.exists(path)---判断是否有某个文件夹
    41. 02.os.mkdir(path)---创建文件夹
    42. 03.for in i, path in enumerate(squ)---enumerate()枚举法
    43. """