- # 方法二:精确定位
# print(soup.find_all(‘a’,id=’summer’)[0].string)
# print(soup.find_all(‘a’,href=’summer.html’)[0].string)
# print(soup.find_all(‘a’,{‘id’:’summer’})[0].string)
# print(soup.find_all(‘a’,{‘href’:’summer.html’})[0].string)
# print(soup.find_all(string=’夏天’)[0].string) - css选择获取节点
print(soup.select(‘a’)[0].string)
print(soup.select(‘ul a’)[2].string)
print(soup.select(‘.season’)[3].string)
print(soup.select(‘.red’)[0].string)
print(soup.select_one(‘.red’).string)
print(soup.select(‘ul .season’)[3].string)
print(soup.select(‘#summer’)[0].string)
print(soup.select(‘li > .season’)[2].string)
print(soup.select(‘ul > li’)[2].string)
1.获取古诗文网网页内容**
import requests
url=’http://www.gushiwen.com/‘
r=requests.get(url)
r.encoding=’UTF-8’
print(type(r))
print(r.status_code)
print(type(r.text))
print(r.text)
print(r.cookies)
2.下载古诗文网logo
import requests
from PIL import Image
r=requests.get(“https://www.gushiwen.com/tpl/static/images/allico.png”))
with open(‘allico.png’,’wb’)as f:
f.write(r.content)
im=Image.open(‘allico.png’)
im.show()

from bs4 import BeautifulSoup
html=”””
soup=BeautifulSoup(html,’lxml’)
print(soup.a.string)
print(soup.li.string)
print(soup.a[‘href’])
print(soup.a[‘class’])
print(soup.a.attrs)
html=”””
- 春天<>
- 夏天<>
- 秋天<>
- 冬天<>”””
使用fing_all获取节点
from bs4 import BeautifulSoup
soup=BeautifulSoup(html,’lxml’)# 方法一:找出所有季节
print(soup.findall(‘a’)[0].string)
print(soup.find_all(‘a’,class=’season’)[1].string)
print(soup.findall(‘a’,{‘class’:’season’})[2].string)
print(soup.find_all(‘a’,attrs={‘class’:’season’})[3].string)
# 方法二:精确定位
# print(soup.find_all(‘a’,id=’summer’)[0].string)
# print(soup.find_all(‘a’,href=’summer.html’)[0].string)
# print(soup.find_all(‘a’,{‘id’:’summer’})[0].string)
# print(soup.find_all(‘a’,{‘href’:’summer.html’})[0].string)
# print(soup.find_all(string=’夏天’)[0].string)css选择获取节点
print(soup.select(‘a’)[0].string)
print(soup.select(‘ul a’)[2].string)
print(soup.select(‘.season’)[3].string)
print(soup.select(‘.red’)[0].string)
print(soup.select_one(‘.red’).string)
print(soup.select(‘ul .season’)[3].string)
print(soup.select(‘#summer’)[0].string)
print(soup.select(‘li > .season’)[2].string)
print(soup.select(‘ul > li’)[2].string)from bs4 import BeautifulSoup
soup = BeautifulSoup(‘Hello
BeautifulSoup
‘,’lxml’)
print(soup)
print(soup.p)
print(soup.p.name)
print(soup.p.string)html=”””
- 春天 “””soup=BeautifulSoup(html,’lxml’)
一年有四个季节:
print(soup.a.string)
print(soup.li.string)
print(soup.a[‘href’])
print(soup.a[‘class’])
print(soup.a.attrs)