1. import requests
    2. import re
    3. from fake_useragent import UserAgent
    4. import os
    5. import xlsxwriter
    6. import json
    7. class KuwoMusic:
    8. def __init__(self,cate_id):
    9. self.cate_id = cate_id
    10. self.base_url = 'https://www.kuwo.cn'
    11. self.url = self.base_url + '/playlist_detail/' + cate_id
    12. self.headers = {
    13. 'User-Agent':UserAgent().random,
    14. 'Cookie':'Hm_lvt_cdb524f42f0ce19b169a8071123a4797=1642227195; _ga=GA1.2.1122063446.1642227195; _gid=GA1.2.280619156.1642227195; gtoken=j8VE8ybEY8ot; gid=085db92c-1032-4576-a03c-a8fca605c0c0; Hm_lpvt_cdb524f42f0ce19b169a8071123a4797=1642239672; kw_token=6O9JR1N139B'
    15. }
    16. # 创建文件夹
    17. def checkDir(self,path):
    18. if not os.path.exists(path):
    19. os.mkdir(path)
    20. # 保存酷我音乐页面
    21. def saveMusicHtml(self):
    22. self.headers['Referer'] = self.url
    23. if not os.path.exists(f'{self.cate_id}-music.html'):
    24. response = requests.get(self.url, headers=self.headers)
    25. if response.status_code == 200:
    26. with open(f'{self.cate_id}-music.html', 'w', encoding='utf-8') as file:
    27. file.write(response.content.decode('utf-8'))
    28. else:
    29. print(response.reason)
    30. self.getMusicInfo()
    31. else:
    32. self.getMusicInfo()
    33. # 获取音乐信息
    34. def getMusicInfo(self):
    35. with open(f'{self.cate_id}-music.html','r',encoding='utf-8') as file:
    36. html = file.read()
    37. allLi = re.findall('<li class="song_item flex_c" data-v-1344465b>.*?</li>',html)
    38. songTitles = []
    39. songArtistes = []
    40. times = []
    41. links = []
    42. songLyrices = []
    43. for li in allLi:
    44. songName = re.findall('<div class="song_name flex_c" .*?<a title="(.*?)" href="(.*?)".*?data-v-1344465b>(.*?)</a>.*?</div>',li)[0]
    45. songArtist = re.findall('<div class="song_artist" .*?<span title="(.*?)" data-v-1344465b>(.*?)</span></div>',li)[0]
    46. time = re.findall('<div class="song_time" data-v-1344465b.*?<span data-v-1344465b>(.*?)</span></div>',li)[0]
    47. #print(f'音乐名称为:【{songName[0]}】,歌手:{songArtist[0]},时长为:{time},链接为:{self.base_url + songName[1]}')
    48. songLyric = self.getMusicDetail(songName[0],self.base_url + songName[1])
    49. print(f'下载歌曲{songName[0]}中...')
    50. # 下载音乐
    51. songId = songName[1].split('/')[-1] # 歌曲ID值
    52. isDownMusic = self.downMusic(songName[0],songId)
    53. if isDownMusic == True:
    54. songTitles.append(songName[0])
    55. songArtistes.append(songArtist[0])
    56. times.append(time)
    57. links.append(self.base_url + songName[1])
    58. songLyrices.append(songLyric)
    59. if songTitles:
    60. print('导出数据中...')
    61. fileName = f'{cate_id}-歌词.xlsx'
    62. workbook = xlsxwriter.Workbook(fileName)
    63. # 创建一个sheet
    64. worksheet = workbook.add_worksheet()
    65. bold = workbook.add_format({'bold': 1})
    66. # # --------1、准备数据并写入excel---------------
    67. # # 向excel中写入数据,建立图标时要用到
    68. headings = ['序号', '歌名', '歌手', '时长', '链接', '歌词']
    69. data = [
    70. [i for i in range(0, len(songTitles))],
    71. songTitles,
    72. songArtistes,
    73. times,
    74. links,
    75. songLyrices,
    76. ]
    77. # 写入表头
    78. worksheet.write_row('A1', headings, bold)
    79. # 写入数据
    80. worksheet.write_column('A2', data[0])
    81. worksheet.write_column('B2', data[1])
    82. worksheet.write_column('C2', data[2])
    83. worksheet.write_column('D2', data[3])
    84. worksheet.write_column('E2', data[4])
    85. worksheet.write_column('F2', data[5])
    86. workbook.close()
    87. print('程序执行完毕')
    88. # 下载音乐
    89. def downMusic(self,songName,songId):
    90. url = f'https://www.kuwo.cn/api/v1/www/music/playUrl'
    91. params = {
    92. 'mid':songId,
    93. 'type':'music',
    94. 'httpsStatus':1,
    95. 'reqId':'b53f83a1-75e2-11ec-9e65-9b1b46a0f773'
    96. }
    97. response = requests.get(url,params=params,headers=self.headers)
    98. if response.status_code == 200:
    99. result = json.loads(response.content.decode('utf-8'))
    100. if result['code'] == 200:
    101. response = requests.get(result['data']['url'])
    102. self.checkDir('music')
    103. with open(f'music/{songName}.mp3','wb') as file:
    104. file.write(response.content)
    105. return True
    106. # 获取歌曲-歌词
    107. def getMusicDetail(self,songName,link):
    108. self.headers['Referer'] = link
    109. response = requests.get(link,headers=self.headers)
    110. self.checkDir('detail')
    111. if response.status_code == 200:
    112. if os.path.exists(f'detail/{songName}-歌词.html'):
    113. with open(f'detail/{songName}-歌词.html', 'r', encoding='utf-8') as file:
    114. html = file.read()
    115. else:
    116. html = response.content.decode('utf-8')
    117. with open(f'detail/{songName}-歌词.html', 'w', encoding='utf-8') as file:
    118. file.write(html)
    119. allP = re.findall('<div id="lyric" class="lyric" .*? data-v-34783d0c>.*?<div data-v-34783d0c>(.*?)</div></div>',html)
    120. for p in allP:
    121. songLyrics = re.findall('<p data-v-34783d0c.*?>(.*?)</p>',p)
    122. songLyrics = ','.join(songLyrics)
    123. return songLyrics
    124. else:
    125. print(response.reason)
    126. #cate_id = input('请输入歌曲分类id')
    127. cate_id = '3301373706'
    128. music = KuwoMusic(cate_id)
    129. music.saveMusicHtml()