1. # -*- coding: UTF-8 -*-
    2. import requests, json, re
    3. from selenium import webdriver
    4. from selenium.webdriver.common.by import By
    5. from selenium.webdriver.support.ui import WebDriverWait
    6. from selenium.webdriver.support import expected_conditions as EC
    7. from random import randint
    8. # 文件路径
    9. MUSIC_PATH='music/'
    10. # chrome driver路径
    11. DRIVERS_PATH='drivers/chromedriver-mac'
    12. class ParseQQmusic:
    13. def __init__(self,ranking):
    14. self.url =ranking
    15. self.music_name =None
    16. self.headers = {
    17. 'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
    18. 'Referer': 'https://u.y.qq.com'
    19. }
    20. self.session = requests.session()
    21. def download_music(self,songmid):
    22. data = {
    23. "req": {
    24. "module": "CDN.SrfCdnDispatchServer",
    25. "method": "GetCdnDispatch",
    26. "param": {
    27. "guid": '',
    28. "calltype": 0,
    29. "userip": ""
    30. }
    31. },
    32. "req_0": {
    33. "module": "vkey.GetVkeyServer",
    34. "method": "CgiGetVkey",
    35. "param": {
    36. "guid": str(random_with_N_digits(9)),
    37. "songmid": [songmid],
    38. "songtype": [0],
    39. "uin": "0",
    40. "loginflag": 1,
    41. "platform": "20"
    42. }
    43. },
    44. "comm": {
    45. "uin": 0,
    46. "format": "json",
    47. "ct": 24,
    48. "cv": 0
    49. }
    50. }
    51. api = 'http://u.y.qq.com/cgi-bin/musicu.fcg'
    52. jsondata = json.dumps(data)
    53. response = self.session.post(api, data=jsondata)
    54. resJson = json.loads(response.content, encoding='utf-8')
    55. sip = resJson.get('req_0').get('data').get('sip')[0]
    56. purl = resJson.get('req_0').get('data').get('midurlinfo')[0].get('purl')
    57. if purl != None and purl.strip():
    58. print(self.music_name)
    59. download_url= sip + purl
    60. # 下载到本地
    61. music = self.session.get(download_url,headers=self.headers)
    62. # 文件名去除特殊符号
    63. with open(MUSIC_PATH + "{}.m4a".format(re.sub(r'[\s+|@<>:\\"/]', '', self.music_name)), "wb") as m:
    64. m.write(music.content)
    65. def view_html(self):
    66. # qq音乐页面是js加载的,这里用chrome headless模式访问
    67. option = webdriver.ChromeOptions()
    68. option.add_argument('headless')
    69. driver = webdriver.Chrome(DRIVERS_PATH, chrome_options=option)
    70. # 排行榜页面
    71. driver.get(self.url)
    72. print(driver.title)
    73. try:
    74. # 等待播放列表加载完毕
    75. WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "songlist__songname_txt")))
    76. lis = driver.find_elements_by_class_name('songlist__songname_txt')
    77. pattern = re.compile(r'https://y.qq.com/n/yqq/song/(\S+).html')
    78. for i in range(lis.__len__()):
    79. li = lis.__getitem__(i)
    80. a = li.find_element_by_class_name('js_song')
    81. # 获得songid
    82. href = a.get_attribute('href')
    83. music_name = a.get_attribute('title')
    84. self.music_name=music_name
    85. m = pattern.match(href)
    86. self.download_music(m.group(1))
    87. finally:
    88. driver.quit()
    89. def start(self):
    90. self.view_html()
    91. def random_with_N_digits(n):
    92. range_start = 10**(n-1)
    93. range_end = (10**n)-1
    94. return randint(range_start, range_end)
    95. if __name__ == '__main__':
    96. # 目标 抖音排行榜
    97. qm = ParseQQmusic('https://y.qq.com/n/yqq/toplist/60.html#stat=y_new.toplist.menu.60')
    98. qm.start()