Code - 06-三国演义 - 《Python》

"""
目标网站：
    https://www.shicimingju.com/book/sanguoyanyi.html
需求：
    bs4爬取三国演义所有章节标题和内容
模块：
    requests, bs4
"""
import requests, os
from bs4 import BeautifulSoup
url = 'https://www.shicimingju.com/book/sanguoyanyi.html'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'}
# 01. 抓取首页的章节目录
toc_text = requests.get(url, headers=headers).content.decode()
# 实例化bs4对象并加载页面数据
soup = BeautifulSoup(toc_text, 'lxml')
a_list = soup.select('.book-mulu ul li a') # 此时获得所有的a标签对象（列表）
f = open('三国演义.txt', 'w', encoding='utf-8')
for a in a_list:
    novel_title = a.text
    toc_url = 'https://www.shicimingju.com' + a['href']
    novel_text = requests.get(toc_url, headers=headers).content.decode()
    soup1 = BeautifulSoup(novel_text, 'lxml')
    novel = soup1.find(class_='chapter_content').text
    f.write(novel_title +'\n' + novel)
    print(novel_title, '已下载完成')
print('全部章节已经下载完成~')
f.close()