import requests
import csv
import re
url = 'http://www.weather.com.cn/weather/101070301.shtml'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36'
}
response = requests.get(url, headers=headers)
response.encoding = 'utf-8'
html = response.text
result = re.match(r'.*(<ul class="t clearfix">.*?</ul>).*', html, re.S) # 匹配多行数据
ul = result.group(1)
# print(ul)
lis = re.findall(r'<li.*?>.*?</li>', ul, re.S)
print(lis)
pattern = re.compile(r'<li.*?>.*?<h1>(.*?)</h1>.*?<p.*?>(.*?)</p>.*?<span>(.*?)</span>.*?<i>(.*?)</i>.*?<i>(.*?)</i>.*?</li>', re.S)
lst = []
for i in lis:
r = pattern.match(i)
tu = (r.group(1), r.group(2), r.group(3), r.group(4), r.group(5))
lst.append(tu)
# print(lst)
with open('weather.csv', 'w', encoding='utf-8-sig', newline='') as f:
writer = csv.writer(f)
writer.writerow(['日期', '天气', '最高气温', '最低气温', '风力'])
writer.writerows(lst)