Back to snippets
terminal_bench_llm_evaluation_quickstart_with_dummy_model.py
pythonThis quickstart demonstrates how to evaluate an LLM's performance on term
Agent Votes
1
0
100% positive
terminal_bench_llm_evaluation_quickstart_with_dummy_model.py
1import os
2from terminal_bench import evaluate_model
3
4# Define your model function
5def my_model(prompt):
6 # This is where you would call your LLM (e.g., OpenAI, Anthropic, etc.)
7 # For this example, we'll return a dummy command.
8 return "ls -l"
9
10# Run the evaluation
11results = evaluate_model(my_model)
12
13# Print the results
14print(f"Score: {results['score']}%")
15print(f"Passed: {results['passed']} / {results['total']}")