import csv
import re
import requests
url = 'https://www.kugou.com/yy/html/rank.html'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36'
}
res = requests.get(url, headers)
html = res.text
result = re.match(r'.*?(<div class="pc_temp_songlist pc_rank_songlist_short">(.*?)</div>)', html, re.S)
ul = result.group(1)
li = re.findall(r'<li.*?>.*?</li>', ul, re.S)
pattern = re.compile(r'<li .*?title="(.*?)-(.*?)".*?>.*?<a.*?href="(.*?)".*?</a>.*?</li>', re.S)
lst = []
for i in li:
li = pattern.match(i)
tu = (li.group(1), li.group(2), li.group(3))
print(tu)
lst.append(tu)
# 写入
with open('酷狗音乐飙升榜.csv', 'w', encoding='utf-8-sig', newline='') as f:
writer = csv.writer(f)
writer.writerow(['歌名', '歌手', '详情网页'])
writer.writerows(lst)