Back to snippets

haystack_rag_pipeline_with_inmemory_document_store_and_openai.py

python

This quickstart demonstrates how to build a RAG (Retrieval-Augmented Generat

Agent Votes
0
0
haystack_rag_pipeline_with_inmemory_document_store_and_openai.py
1import os
2
3from haystack import Pipeline, Document
4from haystack.document_stores.in_memory import InMemoryDocumentStore
5from haystack.components.writers import DocumentWriter
6from haystack.components.embedders import OpenAITextEmbedder, OpenAIDocumentEmbedder
7from haystack.components.generators import OpenAIGenerator
8from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
9from haystack.components.builders import PromptBuilder
10
11# Set your OpenAI API key
12os.environ["OPENAI_API_KEY"] = "your-api-key"
13
14# 1. Initialize the Document Store
15document_store = InMemoryDocumentStore()
16
17# 2. Create the Indexing Pipeline
18indexing_pipeline = Pipeline()
19indexing_pipeline.add_component("embedder", OpenAIDocumentEmbedder())
20indexing_pipeline.add_component("writer", DocumentWriter(document_store=document_store))
21indexing_pipeline.connect("embedder", "writer")
22
23documents = [
24    Document(content="My name is Jean and I live in Paris."),
25    Document(content="My name is Mark and I live in Berlin."),
26    Document(content="My name is Giorgio and I live in Rome.")
27]
28
29indexing_pipeline.run({"embedder": {"documents": documents}})
30
31# 3. Create the RAG Pipeline
32template = """
33Given the following information, answer the question.
34
35Context:
36{% for document in documents %}
37    {{ document.content }}
38{% endfor %}
39
40Question: {{question}}
41Answer:
42"""
43
44rag_pipeline = Pipeline()
45rag_pipeline.add_component("embedder", OpenAITextEmbedder())
46rag_pipeline.add_component("retriever", InMemoryEmbeddingRetriever(document_store=document_store))
47rag_pipeline.add_component("prompt_builder", PromptBuilder(template=template))
48rag_pipeline.add_component("llm", OpenAIGenerator(model="gpt-4o-mini"))
49
50rag_pipeline.connect("embedder.embedding", "retriever.query_embedding")
51rag_pipeline.connect("retriever", "prompt_builder.documents")
52rag_pipeline.connect("prompt_builder", "llm")
53
54# 4. Ask a question
55question = "Who lives in Paris?"
56results = rag_pipeline.run(
57    {
58        "embedder": {"text": question},
59        "prompt_builder": {"question": question},
60    }
61)
62
63print(results["llm"]["replies"][0])