BeautifulSoup用法


中文文档手册

https://beautifulsoup.readthedocs.io/zh_CN/v4.4.0/

find_all()

find_all( name , attrs , recursive , string , **kwargs )

find_all() 方法搜索当前tag的所有tag子节点,并判断是否符合过滤器的条件.这里有几个例子:

soup.find_all("title")
# [The Dormouse's story]

soup.find_all("p", "title")
# [

The Dormouse's story

] soup.find_all("a") # [Elsie, # Lacie, # Tillie] soup.find_all(id="link2") # [Lacie] import re soup.find(string=re.compile("sisters")) # u'Once upon a time there were three little sisters; and their names were\n' //使用多个条件过滤 soup.find_all(href=re.compile("elsie"), id='link1') # [three] //自定义属性的过滤方法 data_soup.find_all(attrs={"data-foo": "value"}) # [
foo!
] //根据class的名字过滤标签 soup.find_all("a", class_="sister") # [Elsie, # Lacie, # Tillie] soup.find_all(class_=re.compile("itl")) # [

The Dormouse's story

] def has_six_characters(css_class): return css_class is not None and len(css_class) == 6 soup.find_all(class_=has_six_characters) # [Elsie, # Lacie, # Tillie] css_soup = BeautifulSoup('

') css_soup.find_all("p", class_="strikeout") # [

] css_soup.find_all("p", class_="body") # [

] //使用string参数根据字符串中的内容进行过滤 soup.find_all("a", string="Elsie") # [Elsie]