爬取酷我音乐热歌榜
import requestsimport reimport csv# 分析页面url'''第一页:http://www.kuwo.cn/api/www/bang/bang/musicList?bangId=93&pn=1&rn=30&httpsStatus=1&reqId=82dbed60-74fa-11ec-b1c4-eb380bf2a161第二页:http://www.kuwo.cn/api/www/bang/bang/musicList?bangId=93&pn=2&rn=30&httpsStatus=1&reqId=82dbed60-74fa-11ec-b1c4-eb380bf2a161referer反爬csrf和cookie'''class KuWOSpider: def __init__(self): self.headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36', 'referer': 'http://www.kuwo.cn/rankList', 'csrf': 'FB96B28A1J', 'cookie': 'Hm_lvt_cdb524f42f0ce19b169a8071123a4797=1642136341; _ga=GA1.2.905152829.1642136341; _gid=GA1.2.1336775727.1642136341; Hm_lpvt_cdb524f42f0ce19b169a8071123a4797=1642137803; _gat=1; kw_token=FB96B28A1J' } def read_url(self, url): res = requests.get(url, headers=self.headers) html = res.content.decode('utf-8') return html def pare_url(self, html, li): for i in range(30): item = {} item['singers'] = re.findall(r'"artist":"(.*?)"', html)[i] item['name'] = re.findall(r'"album":"(.*?)"', html)[i] # 歌曲链接为:'http://www.kuwo.cn/play_detail/206079303',可以取出id与 前面的url链接 item['link'] = 'http://www.kuwo.cn/play_detail/' + re.findall(r'"rid":(.*?),', html)[i] li.append(item) def write_html(self, header, li): with open('酷我音乐top300.csv', 'w', encoding='utf-8') as f: w = csv.DictWriter(f, header) w.writeheader() w.writerows(li) print("写入成功!") def main(self): header = ['singers', 'name', 'link'] li = [] for i in range(1, 11): html = self.read_url( f'http://www.kuwo.cn/api/www/bang/bang/musicList?bangId=93&pn={i}&rn=30&httpsStatus=1&reqId=82dbed60-74fa-11ec-b1c4-eb380bf2a161') self.pare_url(html, li) # print(li) self.write_html(header, li)if __name__ == '__main__': k = KuWOSpider() k.main()
结果展示
