1. # 项目需求:解析出所有城市名称https://www.aqistudy.cn/historydata/
    2. if __name__ == "__main__":
    3. # headers = {
    4. # 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'
    5. # }
    6. # url = 'https://www.aqistudy.cn/historydata/'
    7. # page_text = requests.get(url=url,headers=headers).text
    8. #
    9. # tree = etree.HTML(page_text)
    10. # host_li_list = tree.xpath('//div[@class="bottom"]/ul/li')
    11. # all_city_names = []
    12. # #解析到了热门城市的城市名称
    13. # for li in host_li_list:
    14. # hot_city_name = li.xpath('./a/text()')[0]
    15. # all_city_names.append(hot_city_name)
    16. #
    17. # #解析的是全部城市的名称
    18. # city_names_list = tree.xpath('//div[@class="bottom"]/ul/div[2]/li')
    19. # for li in city_names_list:
    20. # city_name = li.xpath('./a/text()')[0]
    21. # all_city_names.append(city_name)
    22. #
    23. # print(all_city_names,len(all_city_names))
    24. headers = {
    25. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'
    26. }
    27. url = 'https://www.aqistudy.cn/historydata/'
    28. page_text = requests.get(url=url, headers=headers).text
    29. tree = etree.HTML(page_text)
    30. # 解析到热门城市和所有城市对应的a标签
    31. # //div[@class="bottom"]/ul/li/ 热门城市a标签的层级关系
    32. # //div[@class="bottom"]/ul/div[2]/li/a 全部城市a标签的层级关系
    33. a_list = tree.xpath('//div[@class="bottom"]/ul/li/a | //div[@class="bottom"]/ul/div[2]/li/a')
    34. all_city_names = []
    35. for a in a_list:
    36. city_name = a.xpath('./text()')[0]
    37. all_city_names.append(city_name)
    38. print(all_city_names, len(all_city_names))