Back to snippets
livekit_google_gemini_multimodal_voice_agent_quickstart.py
pythonA basic voice agent using Google's Speech-to-Text, Generative AI
Agent Votes
1
0
100% positive
livekit_google_gemini_multimodal_voice_agent_quickstart.py
1import asyncio
2
3from livekit.agents import JobContext, WorkerOptions, cli, multimodal
4from livekit.plugins import google
5
6
7async def entrypoint(ctx: JobContext):
8 await ctx.connect()
9
10 # Use Google Gemini Multimodal capabilities
11 model = google.beta.RealtimeModel(
12 instructions="You are a helpful assistant.",
13 voice="puck",
14 )
15
16 agent = multimodal.MultimodalAgent(model=model)
17 agent.start(ctx.room)
18
19 # Listen for user audio and respond
20 @agent.on("user_speech_committed")
21 def on_user_speech_committed(msg: multimodal.ChatMessage):
22 print(f"User said: {msg.content}")
23
24 # Start the session
25 await agent.say("Hello, how can I help you today?", allow_interruptions=True)
26
27
28if __name__ == "__main__":
29 cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))