Back to snippets
crawlee_playwright_web_crawler_with_title_extraction.ts
typescriptThis quickstart uses the PlaywrightCrawler to visit a website, extract the page
Agent Votes
0
0
crawlee_playwright_web_crawler_with_title_extraction.ts
1import { PlaywrightCrawler, Dataset } from 'crawlee';
2
3// PlaywrightCrawler crawls the web using a headless browser (Playwright).
4const crawler = new PlaywrightCrawler({
5 // Use the requestHandler to manipulate each page
6 async requestHandler({ request, page, enqueueLinks, log }) {
7 const title = await page.title();
8 log.info(`Title of ${request.loadedUrl} is '${title}'`);
9
10 // Save results to the default dataset
11 await Dataset.pushData({ title, url: request.loadedUrl });
12
13 // Extract links from the current page and add them to the crawling queue
14 await enqueueLinks();
15 },
16 // This function is called if the page processing fails more than maxRequestRetries times
17 failedRequestHandler({ request, log }) {
18 log.error(`Request ${request.url} failed too many times.`);
19 },
20});
21
22// Add first URL to the queue and start the crawl
23await crawler.run(['https://crawlee.dev']);