# -*- coding: UTF-8 -*-import requests, json, refrom selenium import webdriverfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.support import expected_conditions as ECfrom random import randint# 文件路径MUSIC_PATH='music/'# chrome driver路径DRIVERS_PATH='drivers/chromedriver-mac'class ParseQQmusic: def __init__(self,ranking): self.url =ranking self.music_name =None self.headers = { 'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1', 'Referer': 'https://u.y.qq.com' } self.session = requests.session() def download_music(self,songmid): data = { "req": { "module": "CDN.SrfCdnDispatchServer", "method": "GetCdnDispatch", "param": { "guid": '', "calltype": 0, "userip": "" } }, "req_0": { "module": "vkey.GetVkeyServer", "method": "CgiGetVkey", "param": { "guid": str(random_with_N_digits(9)), "songmid": [songmid], "songtype": [0], "uin": "0", "loginflag": 1, "platform": "20" } }, "comm": { "uin": 0, "format": "json", "ct": 24, "cv": 0 } } api = 'http://u.y.qq.com/cgi-bin/musicu.fcg' jsondata = json.dumps(data) response = self.session.post(api, data=jsondata) resJson = json.loads(response.content, encoding='utf-8') sip = resJson.get('req_0').get('data').get('sip')[0] purl = resJson.get('req_0').get('data').get('midurlinfo')[0].get('purl') if purl != None and purl.strip(): print(self.music_name) download_url= sip + purl # 下载到本地 music = self.session.get(download_url,headers=self.headers) # 文件名去除特殊符号 with open(MUSIC_PATH + "{}.m4a".format(re.sub(r'[\s+|@<>:\\"/]', '', self.music_name)), "wb") as m: m.write(music.content) def view_html(self): # qq音乐页面是js加载的,这里用chrome headless模式访问 option = webdriver.ChromeOptions() option.add_argument('headless') driver = webdriver.Chrome(DRIVERS_PATH, chrome_options=option) # 排行榜页面 driver.get(self.url) print(driver.title) try: # 等待播放列表加载完毕 WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "songlist__songname_txt"))) lis = driver.find_elements_by_class_name('songlist__songname_txt') pattern = re.compile(r'https://y.qq.com/n/yqq/song/(\S+).html') for i in range(lis.__len__()): li = lis.__getitem__(i) a = li.find_element_by_class_name('js_song') # 获得songid href = a.get_attribute('href') music_name = a.get_attribute('title') self.music_name=music_name m = pattern.match(href) self.download_music(m.group(1)) finally: driver.quit() def start(self): self.view_html()def random_with_N_digits(n): range_start = 10**(n-1) range_end = (10**n)-1 return randint(range_start, range_end)if __name__ == '__main__': # 目标 抖音排行榜 qm = ParseQQmusic('https://y.qq.com/n/yqq/toplist/60.html#stat=y_new.toplist.menu.60') qm.start()