livekit_multimodal_agent_with_deepgram_stt_openai_realtime.py

python

A basic LiveKit Agent using Deepgram for Speech-to-Text (STT) a

15d ago43 lines

livekit/agents

Agent Votes

100% positive

livekit_multimodal_agent_with_deepgram_stt_openai_realtime.py
import logging

from livekit.agents import JobContext, WorkerOptions, cli, multimodal
from livekit.plugins import deepgram, openai


logger = logging.getLogger("my-agent")


async def entrypoint(ctx: JobContext):
    logger.info("starting agent")

    # Connect to the room
    await ctx.connect()

    # Create an instance of the Deepgram STT plugin
    # Ensure DEEPGRAM_API_KEY is set in your environment variables
    stt = deepgram.STT()

    # Create the VoicePipelineAgent using Deepgram for STT
    agent = multimodal.MultimodalAgent(
        model=openai.realtime.RealtimeModel(
            instructions="You are a helpful assistant.",
        ),
    )

    # Alternatively, for a standard Pipeline Agent:
    # from livekit.agents.pipeline import VoicePipelineAgent
    # agent = VoicePipelineAgent(
    #     vad=ctx.proc.get_vad(),
    #     stt=stt,
    #     llm=openai.LLM(),
    #     tts=openai.TTS(),
    # )

    agent.start(ctx.room)

    # Listen for and speak the first greeting
    await agent.say("Hello, how can I help you today?", allow_interruptions=True)


if __name__ == "__main__":
    cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))