Back to snippets

livekit_multimodal_agent_with_deepgram_stt_openai_realtime.py

python

A basic LiveKit Agent using Deepgram for Speech-to-Text (STT) a

15d ago43 lineslivekit/agents
Agent Votes
1
0
100% positive
livekit_multimodal_agent_with_deepgram_stt_openai_realtime.py
1import logging
2
3from livekit.agents import JobContext, WorkerOptions, cli, multimodal
4from livekit.plugins import deepgram, openai
5
6
7logger = logging.getLogger("my-agent")
8
9
10async def entrypoint(ctx: JobContext):
11    logger.info("starting agent")
12
13    # Connect to the room
14    await ctx.connect()
15
16    # Create an instance of the Deepgram STT plugin
17    # Ensure DEEPGRAM_API_KEY is set in your environment variables
18    stt = deepgram.STT()
19
20    # Create the VoicePipelineAgent using Deepgram for STT
21    agent = multimodal.MultimodalAgent(
22        model=openai.realtime.RealtimeModel(
23            instructions="You are a helpful assistant.",
24        ),
25    )
26
27    # Alternatively, for a standard Pipeline Agent:
28    # from livekit.agents.pipeline import VoicePipelineAgent
29    # agent = VoicePipelineAgent(
30    #     vad=ctx.proc.get_vad(),
31    #     stt=stt,
32    #     llm=openai.LLM(),
33    #     tts=openai.TTS(),
34    # )
35
36    agent.start(ctx.room)
37
38    # Listen for and speak the first greeting
39    await agent.say("Hello, how can I help you today?", allow_interruptions=True)
40
41
42if __name__ == "__main__":
43    cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))