import requestsfrom fake_useragent import UserAgentfrom bs4 import BeautifulSoup as bsimport csvfrom bs4 import BeautifulSoup"""翻页:华北:http://www.weather.com.cn/textFC/hb.shtml东北:http://www.weather.com.cn/textFC/db.shtml华东: http://www.weather.com.cn/textFC/hd.shtml"""class WeatherSpider: def __init__(self): self.headers = { 'User-Agent':UserAgent().random } self.data_list = [] # 发送请求 def get_source(self,url): try: response = requests.get(url, headers=self.headers) if response.status_code == 200: return response.content.decode('utf-8') except Exception as error: print(error) # 解析数据 def parse_html(self,html): soup = bs(html,'lxml') # 找到class='conMidtab'的div标签,只需要找一个 conMidtab = soup.find(class_='conMidtab') # 2 找到table标签 tables = conMidtab.find_all('table') for table in tables: #3、找到table标签下面的所有tr trs = table.find_all('tr')[2:] for index,tr in enumerate(trs): item = {} # 找到tr标签下面的所有的td,其中第一个td是城市名,倒数第二个td是最低温度 tds = tr.find_all('td') if index == 0: city_td = list(tds[1].stripped_strings)[0] else: city_td = list(tds[0].stripped_strings)[0] temps_td = list(tds[-2].stripped_strings)[0] item['city'] = city_td item['temp'] = temps_td self.data_list.append(item) def save_data(self): passweather = WeatherSpider()dicts = ['hb','db','hd','hz','hn','xb','xn','gat']for dict in dicts: url = f'http://www.weather.com.cn/textFC/{dict}.shtml' html = weather.get_source(url) weather.parse_html(html)print(weather.data_list)