import requestsimport timeimport osfrom lxml import etreefor page in range(1, 3): print(f'正在抓取第{page}页=================') page_url= f'https://www.ximalaya.com/youshengshu/22963309/p{page}/' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36' } page_text = requests.get(url=page_url, headers=headers).text page_tree = etree.HTML(page_text) media_dir = page_tree.xpath('////h1[@class="title k_Z"]/text()')[0].split(' | ')[0] div_list = page_tree.xpath('//div[@class="text Mi_"]') if not os.path.exists(media_dir): os.mkdir(media_dir) # 这里使用了枚举法--enumerate(squ)已达到给每个小标题加上序列的目的 for num, div in enumerate(div_list): media_title = div.xpath('./a/@title')[0] # media_title:重生八零:媳妇有点辣 0001 我是破鞋? media_index = (page-1) * 30 + num + 1 # 当序号小于10时,在序号前面加上一个0 if media_index < 10: media_index = '0' + str(media_index) media_path = f"{media_index} {media_title.split(' ')[-1]}.mp3" media_href = div.xpath('./a/@href')[0] # media_href:/youshengshu/22963309/456562317 media_id = media_href.split('/')[-1] # media_id:456562317 ajax_url = f'https://www.ximalaya.com/revision/play/v1/audio?id={media_id}&ptype=1' time.sleep(0.5) ajax_data = requests.get(url=ajax_url, headers=headers).json() media_url = ajax_data['data']['src'] print(media_path, '开始下载...') time.sleep(0.5) media_data = requests.get(url=media_url, headers=headers).content with open(f'{media_dir}/{media_path}', 'wb') as fp: fp.write(media_data) print(media_path, '下载完成!')print(media_dir,f'所有章节已下载完毕 共{page}页 {media_index}章节!!!')"""注意: 01.if not os.path.exists(path)---判断是否有某个文件夹 02.os.mkdir(path)---创建文件夹 03.for in i, path in enumerate(squ)---enumerate()枚举法"""