"""目标网站: https://www.shicimingju.com/book/sanguoyanyi.html需求: bs4爬取三国演义所有章节标题和内容模块: requests, bs4"""import requests, osfrom bs4 import BeautifulSoupurl = 'https://www.shicimingju.com/book/sanguoyanyi.html'headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'}# 01. 抓取首页的章节目录toc_text = requests.get(url, headers=headers).content.decode()# 实例化bs4对象并加载页面数据soup = BeautifulSoup(toc_text, 'lxml')a_list = soup.select('.book-mulu ul li a') # 此时获得所有的a标签对象(列表)f = open('三国演义.txt', 'w', encoding='utf-8')for a in a_list: novel_title = a.text toc_url = 'https://www.shicimingju.com' + a['href'] novel_text = requests.get(toc_url, headers=headers).content.decode() soup1 = BeautifulSoup(novel_text, 'lxml') novel = soup1.find(class_='chapter_content').text f.write(novel_title +'\n' + novel) print(novel_title, '已下载完成')print('全部章节已经下载完成~')f.close()