import requestsimport refrom fake_useragent import UserAgentimport osimport xlsxwriterimport jsonclass KuwoMusic: def __init__(self,cate_id): self.cate_id = cate_id self.base_url = 'https://www.kuwo.cn' self.url = self.base_url + '/playlist_detail/' + cate_id self.headers = { 'User-Agent':UserAgent().random, 'Cookie':'Hm_lvt_cdb524f42f0ce19b169a8071123a4797=1642227195; _ga=GA1.2.1122063446.1642227195; _gid=GA1.2.280619156.1642227195; gtoken=j8VE8ybEY8ot; gid=085db92c-1032-4576-a03c-a8fca605c0c0; Hm_lpvt_cdb524f42f0ce19b169a8071123a4797=1642239672; kw_token=6O9JR1N139B' } # 创建文件夹 def checkDir(self,path): if not os.path.exists(path): os.mkdir(path) # 保存酷我音乐页面 def saveMusicHtml(self): self.headers['Referer'] = self.url if not os.path.exists(f'{self.cate_id}-music.html'): response = requests.get(self.url, headers=self.headers) if response.status_code == 200: with open(f'{self.cate_id}-music.html', 'w', encoding='utf-8') as file: file.write(response.content.decode('utf-8')) else: print(response.reason) self.getMusicInfo() else: self.getMusicInfo() # 获取音乐信息 def getMusicInfo(self): with open(f'{self.cate_id}-music.html','r',encoding='utf-8') as file: html = file.read() allLi = re.findall('<li class="song_item flex_c" data-v-1344465b>.*?</li>',html) songTitles = [] songArtistes = [] times = [] links = [] songLyrices = [] for li in allLi: songName = re.findall('<div class="song_name flex_c" .*?<a title="(.*?)" href="(.*?)".*?data-v-1344465b>(.*?)</a>.*?</div>',li)[0] songArtist = re.findall('<div class="song_artist" .*?<span title="(.*?)" data-v-1344465b>(.*?)</span></div>',li)[0] time = re.findall('<div class="song_time" data-v-1344465b.*?<span data-v-1344465b>(.*?)</span></div>',li)[0] #print(f'音乐名称为:【{songName[0]}】,歌手:{songArtist[0]},时长为:{time},链接为:{self.base_url + songName[1]}') songLyric = self.getMusicDetail(songName[0],self.base_url + songName[1]) print(f'下载歌曲{songName[0]}中...') # 下载音乐 songId = songName[1].split('/')[-1] # 歌曲ID值 isDownMusic = self.downMusic(songName[0],songId) if isDownMusic == True: songTitles.append(songName[0]) songArtistes.append(songArtist[0]) times.append(time) links.append(self.base_url + songName[1]) songLyrices.append(songLyric) if songTitles: print('导出数据中...') fileName = f'{cate_id}-歌词.xlsx' workbook = xlsxwriter.Workbook(fileName) # 创建一个sheet worksheet = workbook.add_worksheet() bold = workbook.add_format({'bold': 1}) # # --------1、准备数据并写入excel--------------- # # 向excel中写入数据,建立图标时要用到 headings = ['序号', '歌名', '歌手', '时长', '链接', '歌词'] data = [ [i for i in range(0, len(songTitles))], songTitles, songArtistes, times, links, songLyrices, ] # 写入表头 worksheet.write_row('A1', headings, bold) # 写入数据 worksheet.write_column('A2', data[0]) worksheet.write_column('B2', data[1]) worksheet.write_column('C2', data[2]) worksheet.write_column('D2', data[3]) worksheet.write_column('E2', data[4]) worksheet.write_column('F2', data[5]) workbook.close() print('程序执行完毕') # 下载音乐 def downMusic(self,songName,songId): url = f'https://www.kuwo.cn/api/v1/www/music/playUrl' params = { 'mid':songId, 'type':'music', 'httpsStatus':1, 'reqId':'b53f83a1-75e2-11ec-9e65-9b1b46a0f773' } response = requests.get(url,params=params,headers=self.headers) if response.status_code == 200: result = json.loads(response.content.decode('utf-8')) if result['code'] == 200: response = requests.get(result['data']['url']) self.checkDir('music') with open(f'music/{songName}.mp3','wb') as file: file.write(response.content) return True # 获取歌曲-歌词 def getMusicDetail(self,songName,link): self.headers['Referer'] = link response = requests.get(link,headers=self.headers) self.checkDir('detail') if response.status_code == 200: if os.path.exists(f'detail/{songName}-歌词.html'): with open(f'detail/{songName}-歌词.html', 'r', encoding='utf-8') as file: html = file.read() else: html = response.content.decode('utf-8') with open(f'detail/{songName}-歌词.html', 'w', encoding='utf-8') as file: file.write(html) allP = re.findall('<div id="lyric" class="lyric" .*? data-v-34783d0c>.*?<div data-v-34783d0c>(.*?)</div></div>',html) for p in allP: songLyrics = re.findall('<p data-v-34783d0c.*?>(.*?)</p>',p) songLyrics = ','.join(songLyrics) return songLyrics else: print(response.reason)#cate_id = input('请输入歌曲分类id')cate_id = '3301373706'music = KuwoMusic(cate_id)music.saveMusicHtml()