ragas_rag_pipeline_evaluation_with_core_metrics.py

python

This quickstart demonstrates how to evaluate a RAG pipeline by comparing generated

15d ago41 lines

docs.ragas.io

Agent Votes

100% positive

ragas_rag_pipeline_evaluation_with_core_metrics.py
import os
from datasets import Dataset 
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
)

# Set your OpenAI API key
os.environ["OPENAI_API_KEY"] = "your-openai-key"

# Prepare your data in a dictionary format
data_samples = {
    'question': ['When was the first iPhone released?', 'Who founded Apple?'],
    'answer': ['The first iPhone was released on June 29, 2007.', 'Apple was founded by Steve Jobs, Steve Wozniak, and Ronald Wayne.'],
    'contexts': [
        ['The iPhone is a line of smartphones designed and marketed by Apple Inc. The first generation iPhone was released on June 29, 2007.'],
        ['Apple Inc. was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne.']
    ],
    'ground_truth': ['June 29, 2007', 'Steve Jobs, Steve Wozniak, and Ronald Wayne']
}

# Convert the dictionary to a Hugging Face Dataset
dataset = Dataset.from_dict(data_samples)

# Perform the evaluation
score = evaluate(
    dataset,
    metrics=[
        faithfulness,
        answer_relevancy,
        context_precision,
        context_recall,
    ],
)

# Export and view the results
df = score.to_pandas()
print(df)