1. import requests
    2. from fake_useragent import UserAgent
    3. from bs4 import BeautifulSoup as bs
    4. import csv
    5. from bs4 import BeautifulSoup
    6. """
    7. 翻页:
    8. 华北:http://www.weather.com.cn/textFC/hb.shtml
    9. 东北:http://www.weather.com.cn/textFC/db.shtml
    10. 华东: http://www.weather.com.cn/textFC/hd.shtml
    11. """
    12. class WeatherSpider:
    13. def __init__(self):
    14. self.headers = {
    15. 'User-Agent':UserAgent().random
    16. }
    17. self.data_list = []
    18. # 发送请求
    19. def get_source(self,url):
    20. try:
    21. response = requests.get(url, headers=self.headers)
    22. if response.status_code == 200:
    23. return response.content.decode('utf-8')
    24. except Exception as error:
    25. print(error)
    26. # 解析数据
    27. def parse_html(self,html):
    28. soup = bs(html,'lxml')
    29. # 找到class='conMidtab'的div标签,只需要找一个
    30. conMidtab = soup.find(class_='conMidtab')
    31. # 2 找到table标签
    32. tables = conMidtab.find_all('table')
    33. for table in tables:
    34. #3、找到table标签下面的所有tr
    35. trs = table.find_all('tr')[2:]
    36. for index,tr in enumerate(trs):
    37. item = {}
    38. # 找到tr标签下面的所有的td,其中第一个td是城市名,倒数第二个td是最低温度
    39. tds = tr.find_all('td')
    40. if index == 0:
    41. city_td = list(tds[1].stripped_strings)[0]
    42. else:
    43. city_td = list(tds[0].stripped_strings)[0]
    44. temps_td = list(tds[-2].stripped_strings)[0]
    45. item['city'] = city_td
    46. item['temp'] = temps_td
    47. self.data_list.append(item)
    48. def save_data(self):
    49. pass
    50. weather = WeatherSpider()
    51. dicts = ['hb','db','hd','hz','hn','xb','xn','gat']
    52. for dict in dicts:
    53. url = f'http://www.weather.com.cn/textFC/{dict}.shtml'
    54. html = weather.get_source(url)
    55. weather.parse_html(html)
    56. print(weather.data_list)