Skip to content

Commit

Permalink
dev: fix examples (#496)
Browse files Browse the repository at this point in the history
  • Loading branch information
nbsp authored Jul 24, 2024
1 parent c0ab65f commit 8467194
Show file tree
Hide file tree
Showing 8 changed files with 48 additions and 79 deletions.
9 changes: 2 additions & 7 deletions examples/minimal_worker.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging

from livekit.agents import JobContext, JobRequest, WorkerOptions, cli
from livekit.agents import JobContext, WorkerOptions, cli


async def entrypoint(ctx: JobContext):
Expand All @@ -9,10 +9,5 @@ async def entrypoint(ctx: JobContext):
# Add your agent logic here!


async def request_fnc(req: JobRequest) -> None:
logging.info("received request %s", req)
await req.accept(entrypoint)


if __name__ == "__main__":
cli.run_app(WorkerOptions(request_fnc))
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
10 changes: 4 additions & 6 deletions examples/simple-color/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging

from livekit import rtc
from livekit.agents import JobContext, JobRequest, WorkerOptions, cli
from livekit.agents import JobContext, WorkerOptions, cli

WIDTH = 640
HEIGHT = 480
Expand All @@ -12,6 +12,8 @@


async def entrypoint(job: JobContext):
await job.connect()

room = job.room
source = rtc.VideoSource(WIDTH, HEIGHT)
track = rtc.LocalVideoTrack.create_video_track("single-color", source)
Expand All @@ -31,9 +33,5 @@ async def _draw_color():
asyncio.create_task(_draw_color())


async def request_fnc(req: JobRequest) -> None:
await req.accept(entrypoint)


if __name__ == "__main__":
cli.run_app(WorkerOptions(request_fnc=request_fnc))
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
11 changes: 4 additions & 7 deletions examples/speech-to-text/deepgram_stt.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import asyncio
import logging

from livekit import agents, rtc
from livekit import rtc
from livekit.agents import (
JobContext,
JobRequest,
WorkerOptions,
cli,
stt,
Expand Down Expand Up @@ -45,6 +44,8 @@ async def transcribe_track(participant: rtc.RemoteParticipant, track: rtc.Track)
async for ev in audio_stream:
stt_stream.push_frame(ev.frame)

await job.connect(auto_subscribe="audio_only")

@job.room.on("track_subscribed")
def on_track_subscribed(
track: rtc.Track,
Expand All @@ -55,9 +56,5 @@ def on_track_subscribed(
tasks.append(asyncio.create_task(transcribe_track(participant, track)))


async def request_fnc(req: JobRequest) -> None:
await req.accept(entrypoint, auto_subscribe=agents.AutoSubscribe.AUDIO_ONLY)


if __name__ == "__main__":
cli.run_app(WorkerOptions(request_fnc=request_fnc))
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
26 changes: 10 additions & 16 deletions examples/text-to-speech/elevenlabs_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Optional

from livekit import rtc
from livekit.agents import JobContext, JobRequest, WorkerOptions, cli, tts
from livekit.agents import JobContext, WorkerOptions, cli
from livekit.plugins import elevenlabs


Expand Down Expand Up @@ -46,17 +46,19 @@ async def entrypoint(job: JobContext):
track = rtc.LocalAudioTrack.create_audio_track("agent-mic", source)
options = rtc.TrackPublishOptions()
options.source = rtc.TrackSource.SOURCE_MICROPHONE

await job.connect()
await job.room.local_participant.publish_track(track, options)

await asyncio.sleep(1)
logging.info('Saying "Bonjour, comment allez-vous?"')
async for output in tts_11labs.synthesize("Bonjour, comment allez-vous?"):
await source.capture_frame(output.data)
await source.capture_frame(output.frame)

await asyncio.sleep(1)
logging.info('Saying "Au revoir."')
async for output in tts_11labs.synthesize("Au revoir."):
await source.capture_frame(output.data)
await source.capture_frame(output.frame)

await asyncio.sleep(1)
streamed_text = (
Expand All @@ -69,31 +71,23 @@ async def entrypoint(job: JobContext):
): # split into chunk just for the demonstration
stream.push_text(chunk)

stream.mark_segment_end()
stream.flush()
stream.end_input()

playout_q = asyncio.Queue[Optional[rtc.AudioFrame]]()

async def _synth_task():
async for ev in stream:
if ev.type != tts.SynthesisEventType.AUDIO:
continue
assert ev.audio is not None

playout_q.put_nowait(ev.audio.data)
playout_q.put_nowait(ev.frame)

playout_q.put_nowait(None)

synth_task = asyncio.create_task(_synth_task())
playout_task = asyncio.create_task(_playout_task(playout_q, source))

await stream.aclose(wait=True)
await asyncio.gather(synth_task, playout_task)


async def request_fnc(req: JobRequest) -> None:
logging.info("received request %s", req)
await req.accept(entrypoint)
await stream.aclose()


if __name__ == "__main__":
cli.run_app(WorkerOptions(request_fnc=request_fnc))
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
15 changes: 6 additions & 9 deletions examples/text-to-speech/openai_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging

from livekit import rtc
from livekit.agents import JobContext, JobRequest, WorkerOptions, cli
from livekit.agents import JobContext, WorkerOptions, cli
from livekit.plugins import openai


Expand All @@ -15,23 +15,20 @@ async def entrypoint(job: JobContext):
track = rtc.LocalAudioTrack.create_audio_track("agent-mic", source)
options = rtc.TrackPublishOptions()
options.source = rtc.TrackSource.SOURCE_MICROPHONE

await job.connect()
await job.room.local_participant.publish_track(track, options)

await asyncio.sleep(1)
logging.info('Saying "Hello!"')
async for output in tts.synthesize("Hello!"):
await source.capture_frame(output.data)
await source.capture_frame(output.frame)

await asyncio.sleep(1)
logging.info('Saying "Goodbye."')
async for output in tts.synthesize("Goodbye."):
await source.capture_frame(output.data)


async def request_fnc(req: JobRequest) -> None:
logging.info("received request %s", req)
await req.accept(entrypoint)
await source.capture_frame(output.frame)


if __name__ == "__main__":
cli.run_app(WorkerOptions(request_fnc=request_fnc))
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
33 changes: 14 additions & 19 deletions examples/text-to-speech/sync_tts_transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from livekit import rtc
from livekit.agents import (
JobContext,
JobRequest,
WorkerOptions,
cli,
transcription,
Expand Down Expand Up @@ -58,7 +57,7 @@ async def _eg_streamed_tts_stream(
tts_stream.push_text(chunk)
tts_forwarder.push_text(chunk)

tts_stream.mark_segment_end()
tts_stream.flush()
tts_forwarder.mark_text_segment_end()

second_streamed_text = "This is another segment that will be streamed"
Expand All @@ -67,25 +66,23 @@ async def _eg_streamed_tts_stream(
tts_stream.push_text(chunk)
tts_forwarder.push_text(chunk)

tts_stream.mark_segment_end()
tts_stream.flush()
tts_stream.end_input()
tts_forwarder.mark_text_segment_end()

playout_q = asyncio.Queue[Optional[rtc.AudioFrame]]()

async def _synth_task():
async for ev in tts_stream:
if ev.type != tts.SynthesisEventType.AUDIO:
continue
assert ev.audio is not None
playout_q.put_nowait(ev.audio.data)
playout_q.put_nowait(ev.frame)

playout_q.put_nowait(None)

synth_task = asyncio.create_task(_synth_task())
playout_task = asyncio.create_task(_playout_task(playout_q, source))

await tts_stream.aclose(wait=True)
await asyncio.gather(synth_task, playout_task)
await tts_stream.aclose()
await tts_forwarder.aclose()


Expand All @@ -107,8 +104,8 @@ async def _eg_single_segment(
playout_task = asyncio.create_task(_playout_task(playout_q, source))

async for output in tts_11labs.synthesize(text):
tts_forwarder.push_audio(output.data)
playout_q.put_nowait(output.data)
tts_forwarder.push_audio(output.frame)
playout_q.put_nowait(output.frame)

tts_forwarder.mark_audio_segment_end()
playout_q.put_nowait(None)
Expand All @@ -122,7 +119,7 @@ async def _eg_deferred_playout(
):
"""example with deferred playout (We have a synthesized audio before starting to play it)"""
tts_forwarder = transcription.TTSSegmentsForwarder(
room=ctx.room, participant=ctx.room.local_participant, auto_playout=False
room=ctx.room, participant=ctx.room.local_participant
)

text = "Hello world, this is a single segment with deferred playout"
Expand All @@ -133,8 +130,8 @@ async def _eg_deferred_playout(
playout_q = asyncio.Queue[Optional[rtc.AudioFrame]]()

async for output in tts_11labs.synthesize(text):
tts_forwarder.push_audio(output.data)
playout_q.put_nowait(output.data)
tts_forwarder.push_audio(output.frame)
playout_q.put_nowait(output.frame)

tts_forwarder.mark_audio_segment_end()

Expand All @@ -145,6 +142,7 @@ async def _eg_deferred_playout(
playout_task = asyncio.create_task(_playout_task(playout_q, source))

playout_q.put_nowait(None)
tts_forwarder.segment_playout_finished()
await playout_task
await tts_forwarder.aclose()

Expand All @@ -158,6 +156,8 @@ async def entrypoint(ctx: JobContext):
source = rtc.AudioSource(tts_11labs.sample_rate, tts_11labs.num_channels)
track = rtc.LocalAudioTrack.create_audio_track("agent-mic", source)
options = rtc.TrackPublishOptions(source=rtc.TrackSource.SOURCE_MICROPHONE)

await ctx.connect()
await ctx.room.local_participant.publish_track(track, options)

# start the transcription examples
Expand All @@ -169,10 +169,5 @@ async def entrypoint(ctx: JobContext):
await _eg_deferred_playout(ctx, tts_11labs, source)


async def request_fnc(req: JobRequest) -> None:
logging.info("received request %s", req)
await req.accept(entrypoint)


if __name__ == "__main__":
cli.run_app(WorkerOptions(request_fnc=request_fnc))
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
11 changes: 4 additions & 7 deletions examples/voice-assistant/function_calling.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
from typing import Annotated

from livekit.agents import JobContext, JobRequest, WorkerOptions, cli, llm
from livekit.agents import JobContext, WorkerOptions, cli, llm
from livekit.agents.voice_assistant import VoiceAssistant
from livekit.plugins import deepgram, openai, silero

Expand Down Expand Up @@ -95,6 +95,8 @@ async def _will_synthesize_assistant_reply(
will_synthesize_assistant_reply=_will_synthesize_assistant_reply,
)

await ctx.connect()

# Start the assistant. This will automatically publish a microphone track and listen to the first participant
# it finds in the current room. If you need to specify a particular participant, use the participant parameter.
assistant.start(ctx.room)
Expand All @@ -103,10 +105,5 @@ async def _will_synthesize_assistant_reply(
await assistant.say("Hey, how can I help you today?")


async def request_fnc(req: JobRequest) -> None:
logging.info("received request %s", req)
await req.accept(entrypoint)


if __name__ == "__main__":
cli.run_app(WorkerOptions(request_fnc))
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
12 changes: 4 additions & 8 deletions examples/voice-assistant/simple-rag/assistant.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import asyncio
import logging
import pickle

from livekit.agents import JobContext, JobRequest, WorkerOptions, cli, llm
from livekit.agents import JobContext, WorkerOptions, cli, llm
from livekit.agents.voice_assistant import VoiceAssistant
from livekit.plugins import deepgram, openai, rag, silero

Expand Down Expand Up @@ -51,16 +50,13 @@ async def _will_synthesize_assistant_answer(
will_synthesize_assistant_reply=_will_synthesize_assistant_answer,
plotting=True,
)

await ctx.connect()
assistant.start(ctx.room)

await asyncio.sleep(1)
await assistant.say("Hey, how can I help you today?", allow_interruptions=True)


async def request_fnc(req: JobRequest) -> None:
logging.info("received request %s", req)
await req.accept(entrypoint)


if __name__ == "__main__":
cli.run_app(WorkerOptions(request_fnc))
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))

0 comments on commit 8467194

Please sign in to comment.