# http://www.17989.com/xiaohua/5.htm# 标题和正文from lxml import etreeimport requestsfrom fake_useragent import UserAgent# 获取网页源代码def download(url): headers = { 'User-Agent':UserAgent().random } html = requests.get(url=url,headers=headers) html.encoding='utf-=8' return html.text# 获取内容def get_data(html): xml = etree.HTML(html) li_list = xml.xpath('//div[@class="module articlelist"]/ul/li') j = 1 for li in li_list: title = li.xpath('.//div[@class="hd"]/text()')[0] content = li.xpath('.//pre/text()') print(f'第{i}页、第{j}条——标题【{title}】 内容为:{content}') data_list.append({'title':title,'content':content}) j += 1 return data_list# 获取翻页def get_next_page(html): xml = etree.HTML(html) href = xml.xpath('//div[@class="page"]/a[text()="下一页"]/@href')[0] return "http://www.17989.com" + hrefif __name__ == '__main__': url = 'http://www.17989.com/xiaohua/1.htm' i = 1 data_list = [] while i < 10: html = download(url) get_data(html) url = get_next_page(html) i += 1 if not url: break print(data_list)