from lxml import etreeimport requestsfrom useragent.useragent import get_useragenturl = 'https://www.qiushibaike.com/text/page/1/'headers = {'User-Agent': get_useragent()}resp = requests.get(url = url, headers = headers)html = resp.texte = etree.HTML(html)# xpath 规则查找content = e.xpath('//div[@class="content"]/span/text()')print(content)print(len(content))
