[Python] BeautifulSoup模块用法演示
from bs4 import BeautifulSoup, element
# 演示用html文本
html = """
The Dormouse's story
The Dormouse's story
Once upon a time there were three little sisters; and their names were
,
Lacie and
Tillie;
and they lived at the bottom of a well.
...
"""
soup = BeautifulSoup(html, "html.parser")
# print(soup.prettify())
p1 = soup.find('p')
print(f'1a: {p1.text}')
print(f'1b: {soup.p.text}')
print(f'1c: {soup.p.string}')
print(f'2: {soup.title}')
print(f'3: {soup.head}')
print(f'4: {soup.p}')
print(f'5: {soup.head.name}')
print(f'6: {soup.head.text.strip()}')
print(f'7: {soup.p.attrs}')
soup.p['class'] = "newClass"
print(f"8a: {soup.p['class']}")
print(f"8b: {soup.p.get('class')}")
print(f'9a: {soup.a.attrs["href"]}')
print(f'9b: {soup.a["href"]}')
e1 = soup.find(class_='story')
print(f'10a: {e1}')
print(f'10b: {e1.text}') # 不同于string
print('-' * 60)
ap_list = soup.find_all(['a', 'p'])
print(len(ap_list))
for ap in ap_list:
print(ap)
print('+' * 60)
print(soup.find_all(text=["Elsie", "Lacie"]))
print('!' * 60)
li = soup.find_all(id='link1')
print(len(li))
print(li)
print(f'11a: {li[0].text}')
print(f'11b: {li[0].string}')
if type(li[0].string) == element.Comment:
print('这是注释')
print(soup.find_all(attrs={"href": "http://example.com/lacie"}))
print(soup.select("head > title"))
print(soup.select('p #link1'))
输出结果
"D:\Program Files\Python\python.exe" C:/Users/issuser/PycharmProjects/pythonProject/10/soup001.py
1a: The Dormouse's story
1b: The Dormouse's story
1c: The Dormouse's story
2: The Dormouse's story
3:
The Dormouse's story
4: The Dormouse's story
5: head
6: The Dormouse's story
7: {'class': ['title'], 'name': 'dromouse'}
8a: newClass
8b: newClass
9a: http://example.com/elsie
9b: http://example.com/elsie
10a: Once upon a time there were three little sisters; and their names were
,
Lacie and
Tillie;
and they lived at the bottom of a well.
10b: Once upon a time there were three little sisters; and their names were
,
Lacie and
Tillie;
and they lived at the bottom of a well.
------------------------------------------------------------
6
The Dormouse's story
Once upon a time there were three little sisters; and their names were
,
Lacie and
Tillie;
and they lived at the bottom of a well.
Lacie
Tillie
...
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
['Lacie']
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1
[]
11a:
11b: Elsie
这是注释
[Lacie]
[The Dormouse's story ]
[]