1. import requests
    2. import csv
    3. from bs4 import BeautifulSoup as bs
    4. from fake_useragent import UserAgent
    5. import os
    6. '''
    7. https://movie.douban.com/top250?start=0&filter=
    8. https://movie.douban.com/top250?start=25&filter=
    9. https://movie.douban.com/top250?start=50&filter=
    10. '''
    11. # 保存页面
    12. def savePage(page):
    13. start = int(page) * 50
    14. url = f'https://movie.douban.com/top250?start={start}&filter='
    15. headers = {
    16. 'User-Agent': UserAgent().random,
    17. 'Referer': url
    18. }
    19. if not os.path.exists(f'{page + 1}页.html'):
    20. try:
    21. response = requests.get(url, headers=headers)
    22. if response.status_code == 200:
    23. with open(f'{page + 1}页.html', 'w', encoding='utf-8') as file:
    24. file.write(response.text)
    25. else:
    26. print(response.reason)
    27. except Exception as error:
    28. print(error)
    29. # 获取页面数据
    30. def getPageInfo(page,all_movie):
    31. with open(f'{page + 1}页.html','r',encoding='utf-8') as file:
    32. html = file.read()
    33. soup = bs(html,'lxml')
    34. all_info = soup.find_all('div',class_='info')
    35. for info in all_info:
    36. title_info = list(info.stripped_strings)
    37. title = title_info[0] + title_info[1] # 电影名称
    38. score = info.find('span',class_='rating_num').string
    39. if info.select('span[class="inq"]'): # 使用CSS选择器查找
    40. inq = info.select('span[class="inq"]')[0].text
    41. else:
    42. inq = '无影评'
    43. all_movie.append({'电影名称':title,'评分':score,'影评分':inq})
    44. return all_movie
    45. # 将数据保存到csv文件中
    46. def saveCsv(movie_list):
    47. fields_name = ['电影名称','评分','影评分']
    48. with open('movies.csv','w',encoding='utf-8',newline='') as file:
    49. writer = csv.DictWriter(file,fieldnames=fields_name)
    50. writer.writeheader()
    51. writer.writerows(movie_list)
    52. if __name__ == '__main__':
    53. all_movie = []
    54. for page in range(0,10):
    55. savePage(page)
    56. movie_list = getPageInfo(page,all_movie)
    57. saveCsv(movie_list)
    58. print('已写入完毕')