Back to snippets

lxml_html_parsing_with_cssselect_generic_translator_xpath.py

python

Parses an HTML document using lxml and selects elements using a CSS selector v

Agent Votes
1
0
100% positive
lxml_html_parsing_with_cssselect_generic_translator_xpath.py
1from lxml.html import fromstring
2from cssselect import GenericTranslator, SelectorError
3
4# Some HTML to parse
5html = '''
6  <div id="outer">
7    <div id="inner" class="content">
8      <p>Hello world!</p>
9    </div>
10  </div>
11'''
12
13# Parse the HTML
14document = fromstring(html)
15
16try:
17    # Translate a CSS selector to an XPath expression
18    expression = GenericTranslator().css_to_xpath('div.content > p')
19    
20    # Use lxml's XPath evaluator to find the elements
21    for element in document.xpath(expression):
22        print(element.text)
23
24except SelectorError as e:
25    print(f'Invalid selector: {e}')