• https://www.cnblogs.com/
    # ● 爬取要求:
    # ○ 1、输入要搜索的内容
    # ○ 2、翻页爬取相关页面html代码
    # ○ 3、保存到本地
    import requests">1.作业二
    # ● 目标网站:https://www.cnblogs.com/
    # ● 爬取要求:
    # ○ 1、输入要搜索的内容
    # ○ 2、翻页爬取相关页面html代码
    # ○ 3、保存到本地
    import requests
  • https://zzk.cnblogs.com/s/blogpost?Keywords=python&pageindex=1
    # ua 伪装
    headers = {
    ‘user-agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36’,
    ‘cookie’: ‘_gid=GA1.2.522223452.1644990859; gads=ID=19b29d9d008329ac:T=1644990889:S=ALNI_MYgTn7vl1YgmgxQbAWv_CzL0YrnIA; utmz=59123430.1644990907.1.1.utmcsr=cnblogs.com|utmccn=(referral)|utmcmd=referral|utmcct=/; NotRobot=CfDJ8GsLOKiGtk1Au0UP1SouGdWXSSbPJFpRXDfcyWHhDJJjOp6Zd5JzV3pmg-vBAICRzicAqC4PdLAOhYoeZ2r81iekL4dWnqHzmFeS1wmYKhVAD0etHN1mZKfm5Ic-eKVO7A; Hm_lvt_866c9be12d4a814454792b1fd0fed295=1644990858,1644994817; _ga_3Q0DVSGN10=GS1.1.1644994830.1.1.1644994898.60; _ga=GA1.2.1389332791.1644990859; utma=59123430.1389332791.1644990859.1644990907.1645012342.2; utmc=59123430; utmt=1; utmb=59123430.1.10.1645012342’
    }
    # 发送请求
    response = requests.get(url, headers=headers)
    # print(response.text)
    # 持久化保存
    with open(f’第{pageindex}页.html’, ‘w’, encoding=’utf-8’ ) as f:
    f.write(response.text)">指定url
    Keyword = input(‘输入内容:’)
    pageindex = input(‘输入页码:’)
    url = ‘https://zzk.cnblogs.com/s/blogpost?Keywords=python&pageindex=1
    # ua 伪装
    headers = {
    ‘user-agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36’,
    ‘cookie’: ‘_gid=GA1.2.522223452.1644990859; gads=ID=19b29d9d008329ac:T=1644990889:S=ALNI_MYgTn7vl1YgmgxQbAWv_CzL0YrnIA; utmz=59123430.1644990907.1.1.utmcsr=cnblogs.com|utmccn=(referral)|utmcmd=referral|utmcct=/; NotRobot=CfDJ8GsLOKiGtk1Au0UP1SouGdWXSSbPJFpRXDfcyWHhDJJjOp6Zd5JzV3pmg-vBAICRzicAqC4PdLAOhYoeZ2r81iekL4dWnqHzmFeS1wmYKhVAD0etHN1mZKfm5Ic-eKVO7A; Hm_lvt_866c9be12d4a814454792b1fd0fed295=1644990858,1644994817; _ga_3Q0DVSGN10=GS1.1.1644994830.1.1.1644994898.60; _ga=GA1.2.1389332791.1644990859; utma=59123430.1389332791.1644990859.1644990907.1645012342.2; utmc=59123430; utmt=1; utmb=59123430.1.10.1645012342’
    }
    # 发送请求
    response = requests.get(url, headers=headers)
    # print(response.text)
    # 持久化保存
    with open(f’第{pageindex}页.html’, ‘w’, encoding=’utf-8’ ) as f:
    f.write(response.text)

    1.作业二
    # ● 目标网站:https://www.cnblogs.com/
    # ● 爬取要求:
    # ○ 1、输入要搜索的内容
    # ○ 2、翻页爬取相关页面html代码
    # ○ 3、保存到本地
    import requests

    指定url
    Keyword = input(‘输入内容:’)
    pageindex = input(‘输入页码:’)
    url = ‘https://zzk.cnblogs.com/s/blogpost?Keywords=python&pageindex=1
    # ua 伪装
    headers = {
    ‘user-agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36’,
    ‘cookie’: ‘_gid=GA1.2.522223452.1644990859; gads=ID=19b29d9d008329ac:T=1644990889:S=ALNI_MYgTn7vl1YgmgxQbAWv_CzL0YrnIA; utmz=59123430.1644990907.1.1.utmcsr=cnblogs.com|utmccn=(referral)|utmcmd=referral|utmcct=/; NotRobot=CfDJ8GsLOKiGtk1Au0UP1SouGdWXSSbPJFpRXDfcyWHhDJJjOp6Zd5JzV3pmg-vBAICRzicAqC4PdLAOhYoeZ2r81iekL4dWnqHzmFeS1wmYKhVAD0etHN1mZKfm5Ic-eKVO7A; Hm_lvt_866c9be12d4a814454792b1fd0fed295=1644990858,1644994817; _ga_3Q0DVSGN10=GS1.1.1644994830.1.1.1644994898.60; _ga=GA1.2.1389332791.1644990859; utma=59123430.1389332791.1644990859.1644990907.1645012342.2; utmc=59123430; utmt=1; utmb=59123430.1.10.1645012342’
    }
    # 发送请求
    response = requests.get(url, headers=headers)
    # print(response.text)
    # 持久化保存
    with open(f’第{pageindex}页.html’, ‘w’, encoding=’utf-8’ ) as f:
    f.write(response.text)