diff --git a/examples/audio.py b/examples/audio.py index 73491090f5..85f47bfb06 100755 --- a/examples/audio.py +++ b/examples/audio.py @@ -1,5 +1,6 @@ -#!/usr/bin/env python +#!/usr/bin/env rye run python +import time from pathlib import Path from openai import OpenAI @@ -11,6 +12,8 @@ def main() -> None: + stream_to_speakers() + # Create text-to-speech audio file with openai.audio.speech.with_streaming_response.create( model="tts-1", @@ -34,5 +37,28 @@ def main() -> None: print(translation.text) +def stream_to_speakers() -> None: + import pyaudio + + player_stream = pyaudio.PyAudio().open(format=pyaudio.paInt16, channels=1, rate=24000, output=True) + + start_time = time.time() + + with openai.audio.speech.with_streaming_response.create( + model="tts-1", + voice="alloy", + response_format="pcm", # similar to WAV, but without a header chunk at the start. + input="""I see skies of blue and clouds of white + The bright blessed days, the dark sacred nights + And I think to myself + What a wonderful world""", + ) as response: + print(f"Time to first byte: {int((time.time() - start_time) * 1000)}ms") + for chunk in response.iter_bytes(chunk_size=1024): + player_stream.write(chunk) + + print(f"Done in {int((time.time() - start_time) * 1000)}ms.") + + if __name__ == "__main__": main() diff --git a/pyproject.toml b/pyproject.toml index 50fac10e84..5bdca2b69d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,7 +61,8 @@ dev-dependencies = [ "dirty-equals>=0.6.0", "importlib-metadata>=6.7.0", "azure-identity >=1.14.1", - "types-tqdm > 4" + "types-tqdm > 4", + "types-pyaudio > 0" ] [tool.rye.scripts] diff --git a/requirements-dev.lock b/requirements-dev.lock index 97f664e7c1..fa95964d07 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -126,6 +126,7 @@ tomli==2.0.1 # via pytest tqdm==4.66.1 # via openai +types-pyaudio==0.2.16.20240106 types-pytz==2024.1.0.20240203 # via pandas-stubs types-tqdm==4.66.0.2