作业--浅安老师 - 第二次作业（天气预报完善） - 《爬虫知识库》

import requests
import csv
import re
url = 'http://www.weather.com.cn/weather/101270101.shtml'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36 Edg/100.0.1185.29'
}
res = requests.get(url, headers=headers)
res.encoding = 'utf-8'
html = res.text
result = re.match(r'.*(<ul class="t clearfix">.*?</ul>).*', html, re.S)  # 因为存在换行，所有要添加一个re.S 让它获取多行
ul = result.group(1)
lis = re.findall(r'<li.*?>.*?</li>', ul, re.S)
pattern = re.compile(r'<li .*?>.*?<h1>(.*?)</h1>.*?<p .*?>(.*?)</p>.*?<span>(.*?)</span>/<i>(.*?)</i>.*?<i>(<.*?)</i>.*?</li>', re.S)
lst = []
for i in lis:
    r = pattern.match(i)
    if r:
        zu = (r.group(1), r.group(2), r.group(3)+'/'+r.group(4), r.group(5))
        lst.append(zu)
with open('weather.csv', 'w', encoding='utf-8', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['日期', '天气', '温度'])
    writer.writerows(lst)