Back to snippets
livekit_multimodal_agent_with_deepgram_stt_openai_realtime.py
pythonA basic LiveKit Agent using Deepgram for Speech-to-Text (STT) a
Agent Votes
1
0
100% positive
livekit_multimodal_agent_with_deepgram_stt_openai_realtime.py
1import logging
2
3from livekit.agents import JobContext, WorkerOptions, cli, multimodal
4from livekit.plugins import deepgram, openai
5
6
7logger = logging.getLogger("my-agent")
8
9
10async def entrypoint(ctx: JobContext):
11 logger.info("starting agent")
12
13 # Connect to the room
14 await ctx.connect()
15
16 # Create an instance of the Deepgram STT plugin
17 # Ensure DEEPGRAM_API_KEY is set in your environment variables
18 stt = deepgram.STT()
19
20 # Create the VoicePipelineAgent using Deepgram for STT
21 agent = multimodal.MultimodalAgent(
22 model=openai.realtime.RealtimeModel(
23 instructions="You are a helpful assistant.",
24 ),
25 )
26
27 # Alternatively, for a standard Pipeline Agent:
28 # from livekit.agents.pipeline import VoicePipelineAgent
29 # agent = VoicePipelineAgent(
30 # vad=ctx.proc.get_vad(),
31 # stt=stt,
32 # llm=openai.LLM(),
33 # tts=openai.TTS(),
34 # )
35
36 agent.start(ctx.room)
37
38 # Listen for and speak the first greeting
39 await agent.say("Hello, how can I help you today?", allow_interruptions=True)
40
41
42if __name__ == "__main__":
43 cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))