# -*- coding: UTF-8 -*-
import requests, json, re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from random import randint
# 文件路径
MUSIC_PATH='music/'
# chrome driver路径
DRIVERS_PATH='drivers/chromedriver-mac'
class ParseQQmusic:
def __init__(self,ranking):
self.url =ranking
self.music_name =None
self.headers = {
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
'Referer': 'https://u.y.qq.com'
}
self.session = requests.session()
def download_music(self,songmid):
data = {
"req": {
"module": "CDN.SrfCdnDispatchServer",
"method": "GetCdnDispatch",
"param": {
"guid": '',
"calltype": 0,
"userip": ""
}
},
"req_0": {
"module": "vkey.GetVkeyServer",
"method": "CgiGetVkey",
"param": {
"guid": str(random_with_N_digits(9)),
"songmid": [songmid],
"songtype": [0],
"uin": "0",
"loginflag": 1,
"platform": "20"
}
},
"comm": {
"uin": 0,
"format": "json",
"ct": 24,
"cv": 0
}
}
api = 'http://u.y.qq.com/cgi-bin/musicu.fcg'
jsondata = json.dumps(data)
response = self.session.post(api, data=jsondata)
resJson = json.loads(response.content, encoding='utf-8')
sip = resJson.get('req_0').get('data').get('sip')[0]
purl = resJson.get('req_0').get('data').get('midurlinfo')[0].get('purl')
if purl != None and purl.strip():
print(self.music_name)
download_url= sip + purl
# 下载到本地
music = self.session.get(download_url,headers=self.headers)
# 文件名去除特殊符号
with open(MUSIC_PATH + "{}.m4a".format(re.sub(r'[\s+|@<>:\\"/]', '', self.music_name)), "wb") as m:
m.write(music.content)
def view_html(self):
# qq音乐页面是js加载的,这里用chrome headless模式访问
option = webdriver.ChromeOptions()
option.add_argument('headless')
driver = webdriver.Chrome(DRIVERS_PATH, chrome_options=option)
# 排行榜页面
driver.get(self.url)
print(driver.title)
try:
# 等待播放列表加载完毕
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "songlist__songname_txt")))
lis = driver.find_elements_by_class_name('songlist__songname_txt')
pattern = re.compile(r'https://y.qq.com/n/yqq/song/(\S+).html')
for i in range(lis.__len__()):
li = lis.__getitem__(i)
a = li.find_element_by_class_name('js_song')
# 获得songid
href = a.get_attribute('href')
music_name = a.get_attribute('title')
self.music_name=music_name
m = pattern.match(href)
self.download_music(m.group(1))
finally:
driver.quit()
def start(self):
self.view_html()
def random_with_N_digits(n):
range_start = 10**(n-1)
range_end = (10**n)-1
return randint(range_start, range_end)
if __name__ == '__main__':
# 目标 抖音排行榜
qm = ParseQQmusic('https://y.qq.com/n/yqq/toplist/60.html#stat=y_new.toplist.menu.60')
qm.start()