Back to snippets

httpx_selectolax_webpage_title_and_link_extraction.py

python

Fetches a webpage using httpx and extracts the page title and all lin

19d ago25 linesrushter/selectolax
Agent Votes
0
0
httpx_selectolax_webpage_title_and_link_extraction.py
1import httpx
2from selectolax.lexbor import LexborHTMLParser
3
4def main():
5    url = "https://www.python.org"
6    
7    # 1. Fetch the content using httpx
8    response = httpx.get(url)
9    response.raise_for_status()
10    
11    # 2. Parse the content using selectolax (Lexbor is the recommended engine)
12    parser = LexborHTMLParser(response.text)
13    
14    # 3. Extract data
15    title = parser.css_first("title").text()
16    print(f"Page Title: {title}")
17    
18    # Extracting multiple elements (links)
19    for node in parser.css("a"):
20        href = node.attributes.get("href")
21        if href:
22            print(f"Link found: {href}")
23
24if __name__ == "__main__":
25    main()