import requests
from fake_useragent import UserAgent
from bs4 import BeautifulSoup as bs
import csv
from bs4 import BeautifulSoup
"""
翻页:
华北:http://www.weather.com.cn/textFC/hb.shtml
东北:http://www.weather.com.cn/textFC/db.shtml
华东: http://www.weather.com.cn/textFC/hd.shtml
"""
class WeatherSpider:
def __init__(self):
self.headers = {
'User-Agent':UserAgent().random
}
self.data_list = []
# 发送请求
def get_source(self,url):
try:
response = requests.get(url, headers=self.headers)
if response.status_code == 200:
return response.content.decode('utf-8')
except Exception as error:
print(error)
# 解析数据
def parse_html(self,html):
soup = bs(html,'lxml')
# 找到class='conMidtab'的div标签,只需要找一个
conMidtab = soup.find(class_='conMidtab')
# 2 找到table标签
tables = conMidtab.find_all('table')
for table in tables:
#3、找到table标签下面的所有tr
trs = table.find_all('tr')[2:]
for index,tr in enumerate(trs):
item = {}
# 找到tr标签下面的所有的td,其中第一个td是城市名,倒数第二个td是最低温度
tds = tr.find_all('td')
if index == 0:
city_td = list(tds[1].stripped_strings)[0]
else:
city_td = list(tds[0].stripped_strings)[0]
temps_td = list(tds[-2].stripped_strings)[0]
item['city'] = city_td
item['temp'] = temps_td
self.data_list.append(item)
def save_data(self):
pass
weather = WeatherSpider()
dicts = ['hb','db','hd','hz','hn','xb','xn','gat']
for dict in dicts:
url = f'http://www.weather.com.cn/textFC/{dict}.shtml'
html = weather.get_source(url)
weather.parse_html(html)
print(weather.data_list)