- # 方法二:精确定位
# print(soup.find_all(‘a’,id=’summer’)[0].string)
# print(soup.find_all(‘a’,href=’summer.html’)[0].string)
# print(soup.find_all(‘a’,{‘id’:’summer’})[0].string)
# print(soup.find_all(‘a’,{‘href’:’summer.html’})[0].string)
# print(soup.find_all(string=’夏天’)[0].string) - css选择获取节点
print(soup.select(‘a’)[0].string)
print(soup.select(‘ul a’)[2].string)
print(soup.select(‘.season’)[3].string)
print(soup.select(‘.red’)[0].string)
print(soup.select_one(‘.red’).string)
print(soup.select(‘ul .season’)[3].string)
print(soup.select(‘#summer’)[0].string)
print(soup.select(‘li > .season’)[2].string)
print(soup.select(‘ul > li’)[2].string)
1.获取古诗文网网页内容**
import requests
url=’http://www.gushiwen.com/‘
r=requests.get(url)
r.encoding=’UTF-8’
print(type(r))
print(r.status_code)
print(type(r.text))
print(r.text)
print(r.cookies)
2.下载古诗文网logo
import requests
from PIL import Image
r=requests.get(“https://www.gushiwen.com/tpl/static/images/allico.png”))
with open(‘allico.png’,’wb’)as f:
f.write(r.content)
im=Image.open(‘allico.png’)
im.show()
![}I%UG0QWKJNUHGFHOSJ8BW.png
import requests
from PIL import Image
r=requests.get(“http://www.siso.edu.cn/dfiles/11358/upload/xc.png”))
with open(‘xc.png’,’wb’)as f:
f.write(r.content)
im=Image.open(‘xc.png’)
im.show()
3.BeautifulSoup4解析网页
from bs4 import BeautifulSoup
soup=BeautifulSoup(‘
Hello
BeautifulSoup
‘,’lxml’)print(soup)
print(type(soup.p))
print(soup.p)
print(soup.p.name)
print(soup.p.string)
from bs4 import BeautifulSoup
html=”””
soup=BeautifulSoup(html,’lxml’)
print(soup.a.string)
print(soup.li.string)
print(soup.a[‘href’])
print(soup.a[‘class’])
print(soup.a.attrs)
html=”””
- 春天<>
- 夏天<>
- 秋天<>
- 冬天<>”””
使用fing_all获取节点
from bs4 import BeautifulSoup
soup=BeautifulSoup(html,’lxml’)# 方法一:找出所有季节
print(soup.findall(‘a’)[0].string)
print(soup.find_all(‘a’,class=’season’)[1].string)
print(soup.findall(‘a’,{‘class’:’season’})[2].string)
print(soup.find_all(‘a’,attrs={‘class’:’season’})[3].string)
![Q7ZM3`O~{U(S24YKRJA)AG.png](https://cdn.nlark.com/yuque/0/2020/png/1004038/1604626964501-cd0b4493-ed4f-480d-b885-149c78500860.png#align=left&display=inline&height=212&margin=%5Bobject%20Object%5D&name=Q7ZM3%60O~_%7BU%28S24YKRJA%29AG.png&originHeight=212&originWidth=584&size=27787&status=done&style=none&width=584)![IL36VJ%EZW9Y}G_4CG@X{{W.png](https://cdn.nlark.com/yuque/0/2020/png/1004038/1604626972808-0989fc12-3877-42e4-85b7-19ef44f344b3.png#align=left&display=inline&height=141&margin=%5Bobject%20Object%5D&name=IL36VJ%25EZW9Y%7DG_4CG%40X%7B%7BW.png&originHeight=141&originWidth=75&size=2159&status=done&style=none&width=75)# 方法二:精确定位
# print(soup.find_all(‘a’,id=’summer’)[0].string)
# print(soup.find_all(‘a’,href=’summer.html’)[0].string)
# print(soup.find_all(‘a’,{‘id’:’summer’})[0].string)
# print(soup.find_all(‘a’,{‘href’:’summer.html’})[0].string)
# print(soup.find_all(string=’夏天’)[0].string)css选择获取节点
print(soup.select(‘a’)[0].string)
print(soup.select(‘ul a’)[2].string)
print(soup.select(‘.season’)[3].string)
print(soup.select(‘.red’)[0].string)
print(soup.select_one(‘.red’).string)
print(soup.select(‘ul .season’)[3].string)
print(soup.select(‘#summer’)[0].string)
print(soup.select(‘li > .season’)[2].string)
print(soup.select(‘ul > li’)[2].string)from bs4 import BeautifulSoup
soup = BeautifulSoup(‘Hello
BeautifulSoup
‘,’lxml’)
print(soup)
print(soup.p)
print(soup.p.name)
print(soup.p.string)html=”””
- 春天 “””soup=BeautifulSoup(html,’lxml’)
一年有四个季节:
print(soup.a.string)
print(soup.li.string)
print(soup.a[‘href’])
print(soup.a[‘class’])
print(soup.a.attrs)