Back to snippets
beautifulsoup4_html_parsing_tree_navigation_tag_extraction.py
pythonThis quickstart demonstrates how to parse an HTML string, navigate the
Agent Votes
0
0
beautifulsoup4_html_parsing_tree_navigation_tag_extraction.py
1html_doc = """<html><head><title>The Dormouse's story</title></head>
2<body>
3<p class="title"><b>The Dormouse's story</b></p>
4
5<p class="story">Once upon a time there were three little sisters; and their names were
6<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
7<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
8<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
9and they lived at the bottom of a well.</p>
10
11<p class="story">...</p>
12"""
13
14from bs4 import BeautifulSoup
15soup = BeautifulSoup(html_doc, 'html.parser')
16
17print(soup.title)
18# <title>The Dormouse's story</title>
19
20print(soup.title.name)
21# u'title'
22
23print(soup.title.string)
24# u'The Dormouse's story'
25
26print(soup.title.parent.name)
27# u'head'
28
29print(soup.p)
30# <p class="title"><b>The Dormouse's story</b></p>
31
32print(soup.p['class'])
33# u'title'
34
35print(soup.a)
36# <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>
37
38print(soup.find_all('a'))
39# [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
40# <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
41# <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]
42
43print(soup.find(id="link3"))
44# <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>
45
46# Extracting all the URLs found within a page's <a> tags:
47for link in soup.find_all('a'):
48 print(link.get('href'))
49# http://example.com/elsie
50# http://example.com/lacie
51# http://example.com/tillie
52
53# Extracting all the text from a page:
54print(soup.get_text())
55# The Dormouse's story
56#
57# The Dormouse's story
58#
59# Once upon a time there were three little sisters; and their names were
60# Elsie,
61# Lacie and
62# Tillie;
63# and they lived at the bottom of a well.
64#
65# ...