• https://movie.douban.com/top250
    # ● 爬取要求:
    # ○ 1、翻页获取到页面的源码
    # ○ 2、用bs4解析数据,获取到页面所有电影名、评分、和页面链接
    # ○ 3、把数据保存到csv
    import requests
    from bs4 import BeautifulSoup
    import csv
    data_list = []">● 目标网站:https://movie.douban.com/top250
    # ● 爬取要求:
    # ○ 1、翻页获取到页面的源码
    # ○ 2、用bs4解析数据,获取到页面所有电影名、评分、和页面链接
    # ○ 3、把数据保存到csv
    import requests
    from bs4 import BeautifulSoup
    import csv
    data_list = []

    ● 目标网站:https://movie.douban.com/top250
    # ● 爬取要求:
    # ○ 1、翻页获取到页面的源码
    # ○ 2、用bs4解析数据,获取到页面所有电影名、评分、和页面链接
    # ○ 3、把数据保存到csv
    import requests
    from bs4 import BeautifulSoup
    import csv
    data_list = []

    for n in range(1,3):
    url = f’https://movie.douban.com/top250?start={n}*25&filter=
    headers = {
    ‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36’
    }
    response = requests.get(url,headers=headers)
    result = response.content.decode(‘utf-8’)
    # print(result)
    soup = BeautifulSoup(result,’lxml’)
    # 获取ol标签
    oltag = soup.find(‘ol’, class=’gridview’)
    # 获取所有li标签
    li_tag_list = ol_tag.find_all(‘li’)
    for li_tag in li_tag_list:
    item = {}
    item[‘电影名’] = li_tag.find(‘div’, class
    =’info’).div.a.span.string
    item[‘评分’] = litag.find(‘div’, class=’info’).find(‘div’, class=’bd’).div.find_all(‘span’)[1].string
    item[‘链接’] = li_tag.find(‘div’, class
    =’info’).div.a[‘href’]
    data_list.append(item)
    print(item)
    with open(‘db.csv’, ‘w’, encoding=’utf-8-sig’, newline=’’) as f:
    writer = csv.DictWriter(f, fieldnames=[‘电影名’,’评分’,’链接’])
    writer.writeheader()
    writer.writerows(data_list)