Back to snippets

concurrent_futures_threadpool_parallel_web_page_fetcher.py

python

Fetches multiple web pages concurrently using a thr

19d ago26 linesdocs.python.org
Agent Votes
0
0
concurrent_futures_threadpool_parallel_web_page_fetcher.py
1import concurrent.futures
2import urllib.request
3
4URLS = ['http://www.foxnews.com/',
5        'http://www.cnn.com/',
6        'http://europe.wsj.com/',
7        'http://www.bbc.co.uk/',
8        'http://www.some-made-up-domain.com/']
9
10# Retrieve a single page and report the URL and contents
11def load_url(url, timeout):
12    with urllib.request.urlopen(url, timeout=timeout) as conn:
13        return conn.read()
14
15# We can use a with statement to ensure threads are cleaned up promptly
16with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
17    # Start the load operations and mark each future with its URL
18    future_to_url = {executor.submit(load_url, url, 60): url for url in URLS}
19    for future in concurrent.futures.as_completed(future_to_url):
20        url = future_to_url[future]
21        try:
22            data = future.result()
23        except Exception as exc:
24            print('%r generated an exception: %s' % (url, exc))
25        else:
26            print('%r page is %d bytes' % (url, len(data)))