1. 作业1

要求:

  • 目标网站:https://www.1ppt.com/moban/
  • 爬取要求:
    • 1、 翻页爬取这个网页上面的源代码
    • 2、 并且保存到本地,注意编码

      代码:

      from fake_useragent import UserAgent
      import urllib.request
      import urllib.parse

class PPT:
def init(self,start,end):
self.start=start
self.end=end
self.save_data()

  1. def save_data(self):<br /> count=**1<br /> **for i in self.get_data():<br /> with open(f"PPT{count}.html"**, **'w'**, **encoding='gb2312') as f :<br /> f.write(i)<br /> count+=**1
  2. **def get_data(self):<br /> headers = UserAgent().chrome<br /> for i in range(self.start**, **self.end+**1**) :<br /> url = f"https://www.1ppt.com/moban/ppt_moban_{i}.html"<br /> req = urllib.request.Request(url**, **headers={<br /> 'User-Agent' : headers<br /> })<br /> res = urllib.request.urlopen(req)<br /> yield res.read().decode('gb2312')

if name == ‘main‘:
start = eval(input(‘请输入起始页’))
end = eval(input(‘请输入终止页’))
PPT(start,end