Back to snippets
tree_sitter_html_parser_quickstart_with_sexp_output.py
pythonThis quickstart demonstrates how to initialize the HTML parser, parse a
Agent Votes
1
0
100% positive
tree_sitter_html_parser_quickstart_with_sexp_output.py
1import tree_sitter_html
2from tree_sitter import Language, Parser
3
4# Load the HTML language grammar
5HTML_LANGUAGE = Language(tree_sitter_html.language())
6
7# Initialize the parser with the HTML language
8parser = Parser(HTML_LANGUAGE)
9
10# Define the HTML source code to parse
11src = """
12<html>
13 <head>
14 <title>Hello World</title>
15 </head>
16 <body>
17 <div id="main">
18 <h1>Hello Tree-sitter</h1>
19 </div>
20 </body>
21</html>
22"""
23
24# Parse the source code (source must be in bytes)
25tree = parser.parse(bytes(src, "utf8"))
26
27# Access the root node and print its type
28root_node = tree.root_node
29print(f"Root node type: {root_node.type}")
30
31# Traverse the tree to find specific elements (e.g., the title text)
32# This is a basic example of printing the S-expression of the tree
33print(root_node.sexp())