1. import requests
    2. import re
    3. import csv
    4. url = 'https://www.kugou.com/yy/html/rank.html'
    5. headers = {
    6. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36 Edg/100.0.1185.29'
    7. }
    8. response = requests.get(url, headers=headers)
    9. # response.encoding = 'utf-8'
    10. html = response.text
    11. # print(html)
    12. result = re.match(r'.*(<ul>.*?</ul>).*', html, re.S)
    13. # print(result.group(1))
    14. res = result.group(1)
    15. lis = re.findall(r'<li class.*?>.*?<span class="pc_temp_tips_r">', res, re.S)
    16. # print(lis)
    17. pattern = re.compile(r'<li class=" " title="(.*?)" data-index=".*?">.*?<.*?href="(.*?)" .*?>.*?.*?</span>.*?<span class="pc_temp_tips_r">', re.S)
    18. lst = []
    19. for i in lis:
    20. r = pattern.match(i)
    21. zu = (r.group(1), r.group(2)) # 元组元素
    22. lst.append(zu)
    23. # print(lst)
    24. with open('song.csv', 'w', encoding='utf-8') as f:
    25. writer = csv.writer(f)
    26. writer.writerow(['歌手--歌名', '歌曲链接'])
    27. writer.writerows(lst)