• 目标网站:https://movie.douban.com/top250
    • 爬取要求:
      • 1、翻页获取到页面的源码
      • 2、用bs4解析数据,获取到页面所有电影名、评分、和页面链接
      • 3、把数据保存到csv

    import requests
    from bs4 import BeautifulSoup
    import csv
    begin = int(input(‘请输入开始页:’))
    end = int(input(‘请输入结束页:’))
    for age in range(begin, end+1):

    1. url = f'https://movie.douban.com/top250?start={age*25}'<br /> print(url)
    2. headers = {<br /> 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '<br /> 'Chrome/95.0.4638.69 Safari/537.36 '<br /> }<br /> response = requests.get(url, headers=headers)<br /> # print(response.text)<br /> html_doc = response.text<br /> soup = BeautifulSoup(html_doc, 'lxml')<br /> div = soup.find_all('div', class_='hd')<br /> span = soup.find_all('span', class_='rating_num')<br /> # print(div,span)<br /> data = []<br /> for name, other in zip(div,span):<br /> # print(name, other)<br /> title = name.find('span').string<br /> link = name.find('a').get('href')<br /> score = other.text<br /> print(title,link, score)<br /> data.append({<br /> '电影名': title,<br /> '链接': link,<br /> '评分': score<br /> })<br /> with open('douban.csv', 'w') as f:<br /> writer = csv.DictWriter(f, fieldnames=['电影名', '链接', '评分'])<br /> writer.writeheader()<br /> writer.writerows(data)