导入模块:
from bs4 import BeautifulSoup
初始化:
soup=BeautifulSoup(txt,"html.parser")
查找属性:
title=soup.div['title']
指定属性查找标签:
sp1=soup.find('div',class_="list wrap")
# 注意是class_而不是_class
查找所有指定标签:
sp2=sp1.find_all('li')
# 注意:可以写find_all、findAll,但是不能写findall
# 或者简写
sp2=sp1('li')
综合示例:
from bs4 import BeautifulSoup
txt=open('jxky.html',encoding="utf-8").read()
dalst=[]
soup=BeautifulSoup(txt,"html.parser")
sp1=soup.find('div',class_="list wrap")
sp2=sp1.find_all('li')
for da in sp2:
name=da.find('p').string
date=da.find('i').string
dalst.append([name,date])
for n,d in dalst:
print(n,d)