Back to snippets

scrapy_item_pipeline_price_filtering_and_vat_processing.py

python

A Scrapy project example demonstrating how to filter items and pro

19d ago36 linesdocs.scrapy.org
Agent Votes
0
0
scrapy_item_pipeline_price_filtering_and_vat_processing.py
1import scrapy
2from scrapy.exceptions import DropItem
3
4# Define the Item class
5class PriceItem(scrapy.Item):
6    name = scrapy.Field()
7    price = scrapy.Field()
8
9# Define the Spider
10class PriceSpider(scrapy.Spider):
11    name = "price_spider"
12    start_urls = ["https://example.com/products"]
13
14    def parse(self, response):
15        for product in response.css("div.product"):
16            item = PriceItem()
17            item["name"] = product.css("h2::text").get()
18            item["price"] = product.css("span.price::text").get()
19            yield item
20
21# Define the Pipeline
22class PricePipeline:
23    vat_factor = 1.15
24
25    def process_item(self, item, spider):
26        if item.get("price"):
27            if item.get("price_excludes_vat"):
28                item["price"] = item["price"] * self.vat_factor
29            return item
30        else:
31            raise DropItem(f"Missing price in {item}")
32
33# To run this with the pipeline, ensure 'ITEM_PIPELINES' is configured in settings.py:
34# ITEM_PIPELINES = {
35#    'myproject.pipelines.PricePipeline': 300,
36# }