爬取酷我音乐热歌榜
import requests
import re
import csv
# 分析页面url
'''
第一页:http://www.kuwo.cn/api/www/bang/bang/musicList?bangId=93&pn=1&rn=30&httpsStatus=1&reqId=82dbed60-74fa-11ec-b1c4-eb380bf2a161
第二页:http://www.kuwo.cn/api/www/bang/bang/musicList?bangId=93&pn=2&rn=30&httpsStatus=1&reqId=82dbed60-74fa-11ec-b1c4-eb380bf2a161
referer反爬
csrf和cookie
'''
class KuWOSpider:
def __init__(self):
self.headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36',
'referer': 'http://www.kuwo.cn/rankList',
'csrf': 'FB96B28A1J',
'cookie': 'Hm_lvt_cdb524f42f0ce19b169a8071123a4797=1642136341; _ga=GA1.2.905152829.1642136341; _gid=GA1.2.1336775727.1642136341; Hm_lpvt_cdb524f42f0ce19b169a8071123a4797=1642137803; _gat=1; kw_token=FB96B28A1J'
}
def read_url(self, url):
res = requests.get(url, headers=self.headers)
html = res.content.decode('utf-8')
return html
def pare_url(self, html, li):
for i in range(30):
item = {}
item['singers'] = re.findall(r'"artist":"(.*?)"', html)[i]
item['name'] = re.findall(r'"album":"(.*?)"', html)[i]
# 歌曲链接为:'http://www.kuwo.cn/play_detail/206079303',可以取出id与 前面的url链接
item['link'] = 'http://www.kuwo.cn/play_detail/' + re.findall(r'"rid":(.*?),', html)[i]
li.append(item)
def write_html(self, header, li):
with open('酷我音乐top300.csv', 'w', encoding='utf-8') as f:
w = csv.DictWriter(f, header)
w.writeheader()
w.writerows(li)
print("写入成功!")
def main(self):
header = ['singers', 'name', 'link']
li = []
for i in range(1, 11):
html = self.read_url(
f'http://www.kuwo.cn/api/www/bang/bang/musicList?bangId=93&pn={i}&rn=30&httpsStatus=1&reqId=82dbed60-74fa-11ec-b1c4-eb380bf2a161')
self.pare_url(html, li)
# print(li)
self.write_html(header, li)
if __name__ == '__main__':
k = KuWOSpider()
k.main()
结果展示
