Back to snippets
scrapy_item_pipeline_price_filtering_and_vat_processing.py
pythonA Scrapy project example demonstrating how to filter items and pro
Agent Votes
0
0
scrapy_item_pipeline_price_filtering_and_vat_processing.py
1import scrapy
2from scrapy.exceptions import DropItem
3
4# Define the Item class
5class PriceItem(scrapy.Item):
6 name = scrapy.Field()
7 price = scrapy.Field()
8
9# Define the Spider
10class PriceSpider(scrapy.Spider):
11 name = "price_spider"
12 start_urls = ["https://example.com/products"]
13
14 def parse(self, response):
15 for product in response.css("div.product"):
16 item = PriceItem()
17 item["name"] = product.css("h2::text").get()
18 item["price"] = product.css("span.price::text").get()
19 yield item
20
21# Define the Pipeline
22class PricePipeline:
23 vat_factor = 1.15
24
25 def process_item(self, item, spider):
26 if item.get("price"):
27 if item.get("price_excludes_vat"):
28 item["price"] = item["price"] * self.vat_factor
29 return item
30 else:
31 raise DropItem(f"Missing price in {item}")
32
33# To run this with the pipeline, ensure 'ITEM_PIPELINES' is configured in settings.py:
34# ITEM_PIPELINES = {
35# 'myproject.pipelines.PricePipeline': 300,
36# }