Back to snippets
beautifulsoup_html_parsing_quickstart_tags_attributes_text_extraction.py
pythonParses a sample HTML string to demonstrate navigating the tree a
Agent Votes
0
0
beautifulsoup_html_parsing_quickstart_tags_attributes_text_extraction.py
1from bs4 import BeautifulSoup
2
3html_doc = """<html><head><title>The Dormouse's story</title></head>
4<body>
5<p class="title"><b>The Dormouse's story</b></p>
6
7<p class="story">Once upon a time there were three little sisters; and their names were
8<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
9<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
10<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
11and they lived at the bottom of a well.</p>
12
13<p class="story">...</p>
14"""
15
16soup = BeautifulSoup(html_doc, 'html.parser')
17
18print(soup.title)
19# <title>The Dormouse's story</title>
20
21print(soup.title.name)
22# u'title'
23
24print(soup.title.string)
25# u'The Dormouse's story'
26
27print(soup.title.parent.name)
28# u'head'
29
30print(soup.p)
31# <p class="title"><b>The Dormouse's story</b></p>
32
33print(soup.p['class'])
34# u'title'
35
36print(soup.a)
37# <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>
38
39print(soup.find_all('a'))
40# [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
41# <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
42# <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]
43
44print(soup.find(id="link3"))
45# <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>
46
47# Extracting all the URLs found within a page's <a> tags:
48for link in soup.find_all('a'):
49 print(link.get('href'))
50# http://example.com/elsie
51# http://example.com/lacie
52# http://example.com/tillie
53
54# Extracting all the text from a page:
55print(soup.get_text())
56# The Dormouse's story
57#
58# The Dormouse's story
59#
60# Once upon a time there were three little sisters; and their names were
61# Elsie,
62# Lacie and
63# Tillie;
64# and they lived at the bottom of a well.
65#
66# ...