import requestsimport csvfrom bs4 import BeautifulSoup as bsfrom fake_useragent import UserAgentimport os'''https://movie.douban.com/top250?start=0&filter=https://movie.douban.com/top250?start=25&filter=https://movie.douban.com/top250?start=50&filter='''# 保存页面def savePage(page): start = int(page) * 50 url = f'https://movie.douban.com/top250?start={start}&filter=' headers = { 'User-Agent': UserAgent().random, 'Referer': url } if not os.path.exists(f'{page + 1}页.html'): try: response = requests.get(url, headers=headers) if response.status_code == 200: with open(f'{page + 1}页.html', 'w', encoding='utf-8') as file: file.write(response.text) else: print(response.reason) except Exception as error: print(error)# 获取页面数据def getPageInfo(page,all_movie): with open(f'{page + 1}页.html','r',encoding='utf-8') as file: html = file.read() soup = bs(html,'lxml') all_info = soup.find_all('div',class_='info') for info in all_info: title_info = list(info.stripped_strings) title = title_info[0] + title_info[1] # 电影名称 score = info.find('span',class_='rating_num').string if info.select('span[class="inq"]'): # 使用CSS选择器查找 inq = info.select('span[class="inq"]')[0].text else: inq = '无影评' all_movie.append({'电影名称':title,'评分':score,'影评分':inq}) return all_movie# 将数据保存到csv文件中def saveCsv(movie_list): fields_name = ['电影名称','评分','影评分'] with open('movies.csv','w',encoding='utf-8',newline='') as file: writer = csv.DictWriter(file,fieldnames=fields_name) writer.writeheader() writer.writerows(movie_list)if __name__ == '__main__': all_movie = [] for page in range(0,10): savePage(page) movie_list = getPageInfo(page,all_movie) saveCsv(movie_list) print('已写入完毕')