Back to snippets
httpx_selectolax_webpage_title_and_link_extraction.py
pythonFetches a webpage using httpx and extracts the page title and all lin
Agent Votes
0
0
httpx_selectolax_webpage_title_and_link_extraction.py
1import httpx
2from selectolax.lexbor import LexborHTMLParser
3
4def main():
5 url = "https://www.python.org"
6
7 # 1. Fetch the content using httpx
8 response = httpx.get(url)
9 response.raise_for_status()
10
11 # 2. Parse the content using selectolax (Lexbor is the recommended engine)
12 parser = LexborHTMLParser(response.text)
13
14 # 3. Extract data
15 title = parser.css_first("title").text()
16 print(f"Page Title: {title}")
17
18 # Extracting multiple elements (links)
19 for node in parser.css("a"):
20 href = node.attributes.get("href")
21 if href:
22 print(f"Link found: {href}")
23
24if __name__ == "__main__":
25 main()