爬取酷我音乐热歌榜

  1. import requests
  2. import re
  3. import csv
  4. # 分析页面url
  5. '''
  6. 第一页:http://www.kuwo.cn/api/www/bang/bang/musicList?bangId=93&pn=1&rn=30&httpsStatus=1&reqId=82dbed60-74fa-11ec-b1c4-eb380bf2a161
  7. 第二页:http://www.kuwo.cn/api/www/bang/bang/musicList?bangId=93&pn=2&rn=30&httpsStatus=1&reqId=82dbed60-74fa-11ec-b1c4-eb380bf2a161
  8. referer反爬
  9. csrf和cookie
  10. '''
  11. class KuWOSpider:
  12. def __init__(self):
  13. self.headers = {
  14. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36',
  15. 'referer': 'http://www.kuwo.cn/rankList',
  16. 'csrf': 'FB96B28A1J',
  17. 'cookie': 'Hm_lvt_cdb524f42f0ce19b169a8071123a4797=1642136341; _ga=GA1.2.905152829.1642136341; _gid=GA1.2.1336775727.1642136341; Hm_lpvt_cdb524f42f0ce19b169a8071123a4797=1642137803; _gat=1; kw_token=FB96B28A1J'
  18. }
  19. def read_url(self, url):
  20. res = requests.get(url, headers=self.headers)
  21. html = res.content.decode('utf-8')
  22. return html
  23. def pare_url(self, html, li):
  24. for i in range(30):
  25. item = {}
  26. item['singers'] = re.findall(r'"artist":"(.*?)"', html)[i]
  27. item['name'] = re.findall(r'"album":"(.*?)"', html)[i]
  28. # 歌曲链接为:'http://www.kuwo.cn/play_detail/206079303',可以取出id与 前面的url链接
  29. item['link'] = 'http://www.kuwo.cn/play_detail/' + re.findall(r'"rid":(.*?),', html)[i]
  30. li.append(item)
  31. def write_html(self, header, li):
  32. with open('酷我音乐top300.csv', 'w', encoding='utf-8') as f:
  33. w = csv.DictWriter(f, header)
  34. w.writeheader()
  35. w.writerows(li)
  36. print("写入成功!")
  37. def main(self):
  38. header = ['singers', 'name', 'link']
  39. li = []
  40. for i in range(1, 11):
  41. html = self.read_url(
  42. f'http://www.kuwo.cn/api/www/bang/bang/musicList?bangId=93&pn={i}&rn=30&httpsStatus=1&reqId=82dbed60-74fa-11ec-b1c4-eb380bf2a161')
  43. self.pare_url(html, li)
  44. # print(li)
  45. self.write_html(header, li)
  46. if __name__ == '__main__':
  47. k = KuWOSpider()
  48. k.main()

结果展示

image.png