From 97d73cf89935ca6098bb889a92f0ec2cdff16989 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 17 Dec 2024 18:09:06 +0000 Subject: [PATCH] feat: add Realtime API support (#1958) More information on the Realtime API can be found here: https://platform.openai.com/docs/guides/realtime --- .stats.yml | 2 +- api.md | 51 ++ pyproject.toml | 7 +- requirements-dev.lock | 2 + requirements.lock | 2 + src/openai/_client.py | 26 + src/openai/lib/azure.py | 14 + .../resources/beta/realtime/realtime.py | 852 ++++++++++++++++++ src/openai/types/__init__.py | 1 + src/openai/types/beta/realtime/__init__.py | 74 ++ .../realtime/conversation_created_event.py | 27 + .../types/beta/realtime/conversation_item.py | 61 ++ .../realtime/conversation_item_content.py | 28 + .../conversation_item_content_param.py | 27 + .../conversation_item_create_event.py | 28 + .../conversation_item_create_event_param.py | 28 + .../conversation_item_created_event.py | 25 + .../conversation_item_delete_event.py | 19 + .../conversation_item_delete_event_param.py | 18 + .../conversation_item_deleted_event.py | 18 + ...put_audio_transcription_completed_event.py | 26 + ..._input_audio_transcription_failed_event.py | 39 + .../beta/realtime/conversation_item_param.py | 62 ++ .../conversation_item_truncate_event.py | 32 + .../conversation_item_truncate_event_param.py | 31 + .../conversation_item_truncated_event.py | 24 + src/openai/types/beta/realtime/error_event.py | 36 + .../input_audio_buffer_append_event.py | 23 + .../input_audio_buffer_append_event_param.py | 22 + .../input_audio_buffer_clear_event.py | 16 + .../input_audio_buffer_clear_event_param.py | 15 + .../input_audio_buffer_cleared_event.py | 15 + .../input_audio_buffer_commit_event.py | 16 + .../input_audio_buffer_commit_event_param.py | 15 + .../input_audio_buffer_committed_event.py | 21 + ...input_audio_buffer_speech_started_event.py | 26 + ...input_audio_buffer_speech_stopped_event.py | 25 + .../realtime/rate_limits_updated_event.py | 33 + .../beta/realtime/realtime_client_event.py | 32 + .../realtime/realtime_client_event_param.py | 30 + .../beta/realtime/realtime_connect_params.py | 11 + .../types/beta/realtime/realtime_response.py | 42 + .../beta/realtime/realtime_response_status.py | 39 + .../beta/realtime/realtime_response_usage.py | 52 ++ .../beta/realtime/realtime_server_event.py | 72 ++ .../realtime/response_audio_delta_event.py | 30 + .../realtime/response_audio_done_event.py | 27 + .../response_audio_transcript_delta_event.py | 30 + .../response_audio_transcript_done_event.py | 30 + .../beta/realtime/response_cancel_event.py | 22 + .../realtime/response_cancel_event_param.py | 21 + .../response_content_part_added_event.py | 45 + .../response_content_part_done_event.py | 45 + .../beta/realtime/response_create_event.py | 115 +++ .../realtime/response_create_event_param.py | 116 +++ .../beta/realtime/response_created_event.py | 19 + .../beta/realtime/response_done_event.py | 19 + ...nse_function_call_arguments_delta_event.py | 30 + ...onse_function_call_arguments_done_event.py | 30 + .../response_output_item_added_event.py | 25 + .../response_output_item_done_event.py | 25 + .../realtime/response_text_delta_event.py | 30 + .../beta/realtime/response_text_done_event.py | 30 + src/openai/types/beta/realtime/session.py | 148 +++ .../beta/realtime/session_created_event.py | 19 + .../beta/realtime/session_update_event.py | 158 ++++ .../realtime/session_update_event_param.py | 166 ++++ .../beta/realtime/session_updated_event.py | 19 + .../types/websocket_connection_options.py | 36 + tests/api_resources/beta/test_realtime.py | 17 + 70 files changed, 3313 insertions(+), 4 deletions(-) create mode 100644 src/openai/types/beta/realtime/conversation_created_event.py create mode 100644 src/openai/types/beta/realtime/conversation_item.py create mode 100644 src/openai/types/beta/realtime/conversation_item_content.py create mode 100644 src/openai/types/beta/realtime/conversation_item_content_param.py create mode 100644 src/openai/types/beta/realtime/conversation_item_create_event.py create mode 100644 src/openai/types/beta/realtime/conversation_item_create_event_param.py create mode 100644 src/openai/types/beta/realtime/conversation_item_created_event.py create mode 100644 src/openai/types/beta/realtime/conversation_item_delete_event.py create mode 100644 src/openai/types/beta/realtime/conversation_item_delete_event_param.py create mode 100644 src/openai/types/beta/realtime/conversation_item_deleted_event.py create mode 100644 src/openai/types/beta/realtime/conversation_item_input_audio_transcription_completed_event.py create mode 100644 src/openai/types/beta/realtime/conversation_item_input_audio_transcription_failed_event.py create mode 100644 src/openai/types/beta/realtime/conversation_item_param.py create mode 100644 src/openai/types/beta/realtime/conversation_item_truncate_event.py create mode 100644 src/openai/types/beta/realtime/conversation_item_truncate_event_param.py create mode 100644 src/openai/types/beta/realtime/conversation_item_truncated_event.py create mode 100644 src/openai/types/beta/realtime/error_event.py create mode 100644 src/openai/types/beta/realtime/input_audio_buffer_append_event.py create mode 100644 src/openai/types/beta/realtime/input_audio_buffer_append_event_param.py create mode 100644 src/openai/types/beta/realtime/input_audio_buffer_clear_event.py create mode 100644 src/openai/types/beta/realtime/input_audio_buffer_clear_event_param.py create mode 100644 src/openai/types/beta/realtime/input_audio_buffer_cleared_event.py create mode 100644 src/openai/types/beta/realtime/input_audio_buffer_commit_event.py create mode 100644 src/openai/types/beta/realtime/input_audio_buffer_commit_event_param.py create mode 100644 src/openai/types/beta/realtime/input_audio_buffer_committed_event.py create mode 100644 src/openai/types/beta/realtime/input_audio_buffer_speech_started_event.py create mode 100644 src/openai/types/beta/realtime/input_audio_buffer_speech_stopped_event.py create mode 100644 src/openai/types/beta/realtime/rate_limits_updated_event.py create mode 100644 src/openai/types/beta/realtime/realtime_client_event.py create mode 100644 src/openai/types/beta/realtime/realtime_client_event_param.py create mode 100644 src/openai/types/beta/realtime/realtime_connect_params.py create mode 100644 src/openai/types/beta/realtime/realtime_response.py create mode 100644 src/openai/types/beta/realtime/realtime_response_status.py create mode 100644 src/openai/types/beta/realtime/realtime_response_usage.py create mode 100644 src/openai/types/beta/realtime/realtime_server_event.py create mode 100644 src/openai/types/beta/realtime/response_audio_delta_event.py create mode 100644 src/openai/types/beta/realtime/response_audio_done_event.py create mode 100644 src/openai/types/beta/realtime/response_audio_transcript_delta_event.py create mode 100644 src/openai/types/beta/realtime/response_audio_transcript_done_event.py create mode 100644 src/openai/types/beta/realtime/response_cancel_event.py create mode 100644 src/openai/types/beta/realtime/response_cancel_event_param.py create mode 100644 src/openai/types/beta/realtime/response_content_part_added_event.py create mode 100644 src/openai/types/beta/realtime/response_content_part_done_event.py create mode 100644 src/openai/types/beta/realtime/response_create_event.py create mode 100644 src/openai/types/beta/realtime/response_create_event_param.py create mode 100644 src/openai/types/beta/realtime/response_created_event.py create mode 100644 src/openai/types/beta/realtime/response_done_event.py create mode 100644 src/openai/types/beta/realtime/response_function_call_arguments_delta_event.py create mode 100644 src/openai/types/beta/realtime/response_function_call_arguments_done_event.py create mode 100644 src/openai/types/beta/realtime/response_output_item_added_event.py create mode 100644 src/openai/types/beta/realtime/response_output_item_done_event.py create mode 100644 src/openai/types/beta/realtime/response_text_delta_event.py create mode 100644 src/openai/types/beta/realtime/response_text_done_event.py create mode 100644 src/openai/types/beta/realtime/session.py create mode 100644 src/openai/types/beta/realtime/session_created_event.py create mode 100644 src/openai/types/beta/realtime/session_update_event.py create mode 100644 src/openai/types/beta/realtime/session_update_event_param.py create mode 100644 src/openai/types/beta/realtime/session_updated_event.py create mode 100644 src/openai/types/websocket_connection_options.py create mode 100644 tests/api_resources/beta/test_realtime.py diff --git a/.stats.yml b/.stats.yml index e3a0040a5a..12219ccaa1 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,2 +1,2 @@ configured_endpoints: 69 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-779ea2754025daf5e18eb8ceb203ec321692636bc3a999338556a479178efa6c.yml +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-0d64ca9e45f51b4279f87b205eeb3a3576df98407698ce053f2e2302c1c08df1.yml diff --git a/api.md b/api.md index 91b2a9c2fd..ace93e0559 100644 --- a/api.md +++ b/api.md @@ -239,6 +239,57 @@ Methods: ## Realtime +Types: + +```python +from openai.types.beta.realtime import ( + ConversationCreatedEvent, + ConversationItem, + ConversationItemContent, + ConversationItemCreateEvent, + ConversationItemCreatedEvent, + ConversationItemDeleteEvent, + ConversationItemDeletedEvent, + ConversationItemInputAudioTranscriptionCompletedEvent, + ConversationItemInputAudioTranscriptionFailedEvent, + ConversationItemTruncateEvent, + ConversationItemTruncatedEvent, + ErrorEvent, + InputAudioBufferAppendEvent, + InputAudioBufferClearEvent, + InputAudioBufferClearedEvent, + InputAudioBufferCommitEvent, + InputAudioBufferCommittedEvent, + InputAudioBufferSpeechStartedEvent, + InputAudioBufferSpeechStoppedEvent, + RateLimitsUpdatedEvent, + RealtimeClientEvent, + RealtimeResponse, + RealtimeResponseStatus, + RealtimeResponseUsage, + RealtimeServerEvent, + ResponseAudioDeltaEvent, + ResponseAudioDoneEvent, + ResponseAudioTranscriptDeltaEvent, + ResponseAudioTranscriptDoneEvent, + ResponseCancelEvent, + ResponseContentPartAddedEvent, + ResponseContentPartDoneEvent, + ResponseCreateEvent, + ResponseCreatedEvent, + ResponseDoneEvent, + ResponseFunctionCallArgumentsDeltaEvent, + ResponseFunctionCallArgumentsDoneEvent, + ResponseOutputItemAddedEvent, + ResponseOutputItemDoneEvent, + ResponseTextDeltaEvent, + ResponseTextDoneEvent, + SessionCreatedEvent, + SessionUpdateEvent, + SessionUpdatedEvent, +) +``` + ### Sessions Types: diff --git a/pyproject.toml b/pyproject.toml index e03d4e798f..f83aff6fee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,9 +35,6 @@ classifiers = [ "License :: OSI Approved :: Apache Software License" ] -[project.optional-dependencies] -datalib = ["numpy >= 1", "pandas >= 1.2.3", "pandas-stubs >= 1.1.0.11"] - [project.urls] Homepage = "https://github.com/openai/openai-python" Repository = "https://github.com/openai/openai-python" @@ -45,6 +42,10 @@ Repository = "https://github.com/openai/openai-python" [project.scripts] openai = "openai.cli:main" +[project.optional-dependencies] +realtime = ["websockets >= 13, < 15"] +datalib = ["numpy >= 1", "pandas >= 1.2.3", "pandas-stubs >= 1.1.0.11"] + [tool.rye] managed = true # version pins are in requirements-dev.lock diff --git a/requirements-dev.lock b/requirements-dev.lock index 2cf6ab5ea9..94cf6aca07 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -185,5 +185,7 @@ urllib3==2.2.1 # via requests virtualenv==20.24.5 # via nox +websockets==14.1 + # via openai zipp==3.17.0 # via importlib-metadata diff --git a/requirements.lock b/requirements.lock index 826f0bc927..c10449ac20 100644 --- a/requirements.lock +++ b/requirements.lock @@ -64,3 +64,5 @@ typing-extensions==4.12.2 # via pydantic-core tzdata==2024.1 # via pandas +websockets==14.1 + # via openai diff --git a/src/openai/_client.py b/src/openai/_client.py index 5419e88f06..c784694f20 100644 --- a/src/openai/_client.py +++ b/src/openai/_client.py @@ -63,6 +63,14 @@ class OpenAI(SyncAPIClient): organization: str | None project: str | None + websocket_base_url: str | httpx.URL | None + """Base URL for WebSocket connections. + + If not specified, the default base URL will be used, with 'wss://' replacing the + 'http://' or 'https://' scheme. For example: 'http://example.com' becomes + 'wss://example.com' + """ + def __init__( self, *, @@ -70,6 +78,7 @@ def __init__( organization: str | None = None, project: str | None = None, base_url: str | httpx.URL | None = None, + websocket_base_url: str | httpx.URL | None = None, timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN, max_retries: int = DEFAULT_MAX_RETRIES, default_headers: Mapping[str, str] | None = None, @@ -111,6 +120,8 @@ def __init__( project = os.environ.get("OPENAI_PROJECT_ID") self.project = project + self.websocket_base_url = websocket_base_url + if base_url is None: base_url = os.environ.get("OPENAI_BASE_URL") if base_url is None: @@ -172,6 +183,7 @@ def copy( api_key: str | None = None, organization: str | None = None, project: str | None = None, + websocket_base_url: str | httpx.URL | None = None, base_url: str | httpx.URL | None = None, timeout: float | Timeout | None | NotGiven = NOT_GIVEN, http_client: httpx.Client | None = None, @@ -208,6 +220,7 @@ def copy( api_key=api_key or self.api_key, organization=organization or self.organization, project=project or self.project, + websocket_base_url=websocket_base_url or self.websocket_base_url, base_url=base_url or self.base_url, timeout=self.timeout if isinstance(timeout, NotGiven) else timeout, http_client=http_client, @@ -277,6 +290,14 @@ class AsyncOpenAI(AsyncAPIClient): organization: str | None project: str | None + websocket_base_url: str | httpx.URL | None + """Base URL for WebSocket connections. + + If not specified, the default base URL will be used, with 'wss://' replacing the + 'http://' or 'https://' scheme. For example: 'http://example.com' becomes + 'wss://example.com' + """ + def __init__( self, *, @@ -284,6 +305,7 @@ def __init__( organization: str | None = None, project: str | None = None, base_url: str | httpx.URL | None = None, + websocket_base_url: str | httpx.URL | None = None, timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN, max_retries: int = DEFAULT_MAX_RETRIES, default_headers: Mapping[str, str] | None = None, @@ -325,6 +347,8 @@ def __init__( project = os.environ.get("OPENAI_PROJECT_ID") self.project = project + self.websocket_base_url = websocket_base_url + if base_url is None: base_url = os.environ.get("OPENAI_BASE_URL") if base_url is None: @@ -386,6 +410,7 @@ def copy( api_key: str | None = None, organization: str | None = None, project: str | None = None, + websocket_base_url: str | httpx.URL | None = None, base_url: str | httpx.URL | None = None, timeout: float | Timeout | None | NotGiven = NOT_GIVEN, http_client: httpx.AsyncClient | None = None, @@ -422,6 +447,7 @@ def copy( api_key=api_key or self.api_key, organization=organization or self.organization, project=project or self.project, + websocket_base_url=websocket_base_url or self.websocket_base_url, base_url=base_url or self.base_url, timeout=self.timeout if isinstance(timeout, NotGiven) else timeout, http_client=http_client, diff --git a/src/openai/lib/azure.py b/src/openai/lib/azure.py index 54122dbecb..13d9f31838 100644 --- a/src/openai/lib/azure.py +++ b/src/openai/lib/azure.py @@ -76,6 +76,7 @@ def __init__( azure_ad_token: str | None = None, azure_ad_token_provider: AzureADTokenProvider | None = None, organization: str | None = None, + websocket_base_url: str | httpx.URL | None = None, timeout: float | Timeout | None | NotGiven = NOT_GIVEN, max_retries: int = DEFAULT_MAX_RETRIES, default_headers: Mapping[str, str] | None = None, @@ -94,6 +95,7 @@ def __init__( azure_ad_token: str | None = None, azure_ad_token_provider: AzureADTokenProvider | None = None, organization: str | None = None, + websocket_base_url: str | httpx.URL | None = None, timeout: float | Timeout | None | NotGiven = NOT_GIVEN, max_retries: int = DEFAULT_MAX_RETRIES, default_headers: Mapping[str, str] | None = None, @@ -112,6 +114,7 @@ def __init__( azure_ad_token: str | None = None, azure_ad_token_provider: AzureADTokenProvider | None = None, organization: str | None = None, + websocket_base_url: str | httpx.URL | None = None, timeout: float | Timeout | None | NotGiven = NOT_GIVEN, max_retries: int = DEFAULT_MAX_RETRIES, default_headers: Mapping[str, str] | None = None, @@ -131,6 +134,7 @@ def __init__( azure_ad_token_provider: AzureADTokenProvider | None = None, organization: str | None = None, project: str | None = None, + websocket_base_url: str | httpx.URL | None = None, base_url: str | None = None, timeout: float | Timeout | None | NotGiven = NOT_GIVEN, max_retries: int = DEFAULT_MAX_RETRIES, @@ -214,6 +218,7 @@ def __init__( default_headers=default_headers, default_query=default_query, http_client=http_client, + websocket_base_url=websocket_base_url, _strict_response_validation=_strict_response_validation, ) self._api_version = api_version @@ -227,6 +232,7 @@ def copy( api_key: str | None = None, organization: str | None = None, project: str | None = None, + websocket_base_url: str | httpx.URL | None = None, api_version: str | None = None, azure_ad_token: str | None = None, azure_ad_token_provider: AzureADTokenProvider | None = None, @@ -247,6 +253,7 @@ def copy( api_key=api_key, organization=organization, project=project, + websocket_base_url=websocket_base_url, base_url=base_url, timeout=timeout, http_client=http_client, @@ -314,6 +321,7 @@ def __init__( azure_ad_token_provider: AsyncAzureADTokenProvider | None = None, organization: str | None = None, project: str | None = None, + websocket_base_url: str | httpx.URL | None = None, timeout: float | Timeout | None | NotGiven = NOT_GIVEN, max_retries: int = DEFAULT_MAX_RETRIES, default_headers: Mapping[str, str] | None = None, @@ -333,6 +341,7 @@ def __init__( azure_ad_token_provider: AsyncAzureADTokenProvider | None = None, organization: str | None = None, project: str | None = None, + websocket_base_url: str | httpx.URL | None = None, timeout: float | Timeout | None | NotGiven = NOT_GIVEN, max_retries: int = DEFAULT_MAX_RETRIES, default_headers: Mapping[str, str] | None = None, @@ -352,6 +361,7 @@ def __init__( azure_ad_token_provider: AsyncAzureADTokenProvider | None = None, organization: str | None = None, project: str | None = None, + websocket_base_url: str | httpx.URL | None = None, timeout: float | Timeout | None | NotGiven = NOT_GIVEN, max_retries: int = DEFAULT_MAX_RETRIES, default_headers: Mapping[str, str] | None = None, @@ -372,6 +382,7 @@ def __init__( organization: str | None = None, project: str | None = None, base_url: str | None = None, + websocket_base_url: str | httpx.URL | None = None, timeout: float | Timeout | None | NotGiven = NOT_GIVEN, max_retries: int = DEFAULT_MAX_RETRIES, default_headers: Mapping[str, str] | None = None, @@ -454,6 +465,7 @@ def __init__( default_headers=default_headers, default_query=default_query, http_client=http_client, + websocket_base_url=websocket_base_url, _strict_response_validation=_strict_response_validation, ) self._api_version = api_version @@ -467,6 +479,7 @@ def copy( api_key: str | None = None, organization: str | None = None, project: str | None = None, + websocket_base_url: str | httpx.URL | None = None, api_version: str | None = None, azure_ad_token: str | None = None, azure_ad_token_provider: AsyncAzureADTokenProvider | None = None, @@ -487,6 +500,7 @@ def copy( api_key=api_key, organization=organization, project=project, + websocket_base_url=websocket_base_url, base_url=base_url, timeout=timeout, http_client=http_client, diff --git a/src/openai/resources/beta/realtime/realtime.py b/src/openai/resources/beta/realtime/realtime.py index e57e0be503..c79fd46217 100644 --- a/src/openai/resources/beta/realtime/realtime.py +++ b/src/openai/resources/beta/realtime/realtime.py @@ -2,6 +2,15 @@ from __future__ import annotations +import json +import logging +from types import TracebackType +from typing import TYPE_CHECKING, Any, Iterator, cast +from typing_extensions import AsyncIterator + +import httpx +from pydantic import BaseModel + from .sessions import ( Sessions, AsyncSessions, @@ -10,11 +19,34 @@ SessionsWithStreamingResponse, AsyncSessionsWithStreamingResponse, ) +from ...._types import NOT_GIVEN, Query, Headers, NotGiven +from ...._utils import ( + maybe_transform, + strip_not_given, + async_maybe_transform, +) from ...._compat import cached_property +from ...._models import construct_type_unchecked from ...._resource import SyncAPIResource, AsyncAPIResource +from ...._exceptions import OpenAIError +from ...._base_client import _merge_mappings +from ....types.beta.realtime import session_update_event_param, response_create_event_param +from ....types.websocket_connection_options import WebsocketConnectionOptions +from ....types.beta.realtime.realtime_client_event import RealtimeClientEvent +from ....types.beta.realtime.realtime_server_event import RealtimeServerEvent +from ....types.beta.realtime.conversation_item_param import ConversationItemParam +from ....types.beta.realtime.realtime_client_event_param import RealtimeClientEventParam + +if TYPE_CHECKING: + from websockets.sync.client import ClientConnection as WebsocketConnection + from websockets.asyncio.client import ClientConnection as AsyncWebsocketConnection + + from ...._client import OpenAI, AsyncOpenAI __all__ = ["Realtime", "AsyncRealtime"] +log: logging.Logger = logging.getLogger(__name__) + class Realtime(SyncAPIResource): @cached_property @@ -40,6 +72,33 @@ def with_streaming_response(self) -> RealtimeWithStreamingResponse: """ return RealtimeWithStreamingResponse(self) + def connect( + self, + *, + model: str, + extra_query: Query = {}, + extra_headers: Headers = {}, + websocket_connection_options: WebsocketConnectionOptions = {}, + ) -> RealtimeConnectionManager: + """ + The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling. + + Some notable benefits of the API include: + + - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output. + - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction. + - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback. + + The Realtime API is a stateful, event-based API that communicates over a WebSocket. + """ + return RealtimeConnectionManager( + client=self._client, + extra_query=extra_query, + extra_headers=extra_headers, + websocket_connection_options=websocket_connection_options, + model=model, + ) + class AsyncRealtime(AsyncAPIResource): @cached_property @@ -65,6 +124,33 @@ def with_streaming_response(self) -> AsyncRealtimeWithStreamingResponse: """ return AsyncRealtimeWithStreamingResponse(self) + def connect( + self, + *, + model: str, + extra_query: Query = {}, + extra_headers: Headers = {}, + websocket_connection_options: WebsocketConnectionOptions = {}, + ) -> AsyncRealtimeConnectionManager: + """ + The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling. + + Some notable benefits of the API include: + + - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output. + - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction. + - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback. + + The Realtime API is a stateful, event-based API that communicates over a WebSocket. + """ + return AsyncRealtimeConnectionManager( + client=self._client, + extra_query=extra_query, + extra_headers=extra_headers, + websocket_connection_options=websocket_connection_options, + model=model, + ) + class RealtimeWithRawResponse: def __init__(self, realtime: Realtime) -> None: @@ -100,3 +186,769 @@ def __init__(self, realtime: AsyncRealtime) -> None: @cached_property def sessions(self) -> AsyncSessionsWithStreamingResponse: return AsyncSessionsWithStreamingResponse(self._realtime.sessions) + + +class AsyncRealtimeConnection: + """Represents a live websocket connection to the Realtime API""" + + session: AsyncRealtimeSessionResource + response: AsyncRealtimeResponseResource + conversation: AsyncRealtimeConversationResource + input_audio_buffer: AsyncRealtimeInputAudioBufferResource + + _connection: AsyncWebsocketConnection + + def __init__(self, connection: AsyncWebsocketConnection) -> None: + self._connection = connection + + self.session = AsyncRealtimeSessionResource(self) + self.response = AsyncRealtimeResponseResource(self) + self.conversation = AsyncRealtimeConversationResource(self) + self.input_audio_buffer = AsyncRealtimeInputAudioBufferResource(self) + + async def __aiter__(self) -> AsyncIterator[RealtimeServerEvent]: + """ + An infinite-iterator that will continue to yield events until + the connection is closed. + """ + from websockets.exceptions import ConnectionClosedOK + + try: + while True: + yield await self.recv() + except ConnectionClosedOK: + return + + async def recv(self) -> RealtimeServerEvent: + """ + Receive the next message from the connection and parses it into a `RealtimeServerEvent` object. + + Canceling this method is safe. There's no risk of losing data. + """ + return self.parse_event(await self.recv_bytes()) + + async def recv_bytes(self) -> bytes: + """Receive the next message from the connection as raw bytes. + + Canceling this method is safe. There's no risk of losing data. + + If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does, + then you can call `.parse_event(data)`. + """ + message = await self._connection.recv(decode=False) + log.debug(f"Received websocket message: %s", message) + if not isinstance(message, bytes): + # passing `decode=False` should always result in us getting `bytes` back + raise TypeError(f"Expected `.recv(decode=False)` to return `bytes` but got {type(message)}") + + return message + + async def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None: + data = ( + event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True) + if isinstance(event, BaseModel) + else json.dumps(await async_maybe_transform(event, RealtimeClientEventParam)) + ) + await self._connection.send(data) + + async def close(self, *, code: int = 1000, reason: str = "") -> None: + await self._connection.close(code=code, reason=reason) + + def parse_event(self, data: str | bytes) -> RealtimeServerEvent: + """ + Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object. + + This is helpful if you're using `.recv_bytes()`. + """ + return cast( + RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent)) + ) + + +class AsyncRealtimeConnectionManager: + """ + Context manager over a `AsyncRealtimeConnection` that is returned by `beta.realtime.connect()` + + This context manager ensures that the connection will be closed when it exits. + + --- + + Note that if your application doesn't work well with the context manager approach then you + can call the `.enter()` method directly to initiate a connection. + + **Warning**: You must remember to close the connection with `.close()`. + + ```py + connection = await client.beta.realtime.connect(...).enter() + # ... + await connection.close() + ``` + """ + + def __init__( + self, + *, + client: AsyncOpenAI, + model: str, + extra_query: Query, + extra_headers: Headers, + websocket_connection_options: WebsocketConnectionOptions, + ) -> None: + self.__client = client + self.__model = model + self.__connection: AsyncRealtimeConnection | None = None + self.__extra_query = extra_query + self.__extra_headers = extra_headers + self.__websocket_connection_options = websocket_connection_options + + async def __aenter__(self) -> AsyncRealtimeConnection: + """ + 👋 If your application doesn't work well with the context manager approach then you + can call this method directly to initiate a connection. + + **Warning**: You must remember to close the connection with `.close()`. + + ```py + connection = await client.beta.realtime.connect(...).enter() + # ... + await connection.close() + ``` + """ + try: + from websockets.asyncio.client import connect + except ImportError as exc: + raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc + + url = self._prepare_url().copy_with( + params={ + **self.__client.base_url.params, + "model": self.__model, + **self.__extra_query, + }, + ) + log.debug("Connecting to %s", url) + if self.__websocket_connection_options: + log.debug("Connection options: %s", self.__websocket_connection_options) + + self.__connection = AsyncRealtimeConnection( + await connect( + str(url), + user_agent_header=self.__client.user_agent, + additional_headers=_merge_mappings( + { + **self.__client.auth_headers, + "OpenAI-Beta": "realtime=v1", + }, + self.__extra_headers, + ), + **self.__websocket_connection_options, + ) + ) + + return self.__connection + + enter = __aenter__ + + def _prepare_url(self) -> httpx.URL: + if self.__client.websocket_base_url is not None: + base_url = httpx.URL(self.__client.websocket_base_url) + else: + base_url = self.__client._base_url.copy_with(scheme="wss") + + merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime" + return base_url.copy_with(raw_path=merge_raw_path) + + async def __aexit__( + self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None + ) -> None: + if self.__connection is not None: + await self.__connection.close() + + +class RealtimeConnection: + """Represents a live websocket connection to the Realtime API""" + + session: RealtimeSessionResource + response: RealtimeResponseResource + conversation: RealtimeConversationResource + input_audio_buffer: RealtimeInputAudioBufferResource + + _connection: WebsocketConnection + + def __init__(self, connection: WebsocketConnection) -> None: + self._connection = connection + + self.session = RealtimeSessionResource(self) + self.response = RealtimeResponseResource(self) + self.conversation = RealtimeConversationResource(self) + self.input_audio_buffer = RealtimeInputAudioBufferResource(self) + + def __iter__(self) -> Iterator[RealtimeServerEvent]: + """ + An infinite-iterator that will continue to yield events until + the connection is closed. + """ + from websockets.exceptions import ConnectionClosedOK + + try: + while True: + yield self.recv() + except ConnectionClosedOK: + return + + def recv(self) -> RealtimeServerEvent: + """ + Receive the next message from the connection and parses it into a `RealtimeServerEvent` object. + + Canceling this method is safe. There's no risk of losing data. + """ + return self.parse_event(self.recv_bytes()) + + def recv_bytes(self) -> bytes: + """Receive the next message from the connection as raw bytes. + + Canceling this method is safe. There's no risk of losing data. + + If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does, + then you can call `.parse_event(data)`. + """ + message = self._connection.recv(decode=False) + log.debug(f"Received websocket message: %s", message) + if not isinstance(message, bytes): + # passing `decode=False` should always result in us getting `bytes` back + raise TypeError(f"Expected `.recv(decode=False)` to return `bytes` but got {type(message)}") + + return message + + def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None: + data = ( + event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True) + if isinstance(event, BaseModel) + else json.dumps(maybe_transform(event, RealtimeClientEventParam)) + ) + self._connection.send(data) + + def close(self, *, code: int = 1000, reason: str = "") -> None: + self._connection.close(code=code, reason=reason) + + def parse_event(self, data: str | bytes) -> RealtimeServerEvent: + """ + Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object. + + This is helpful if you're using `.recv_bytes()`. + """ + return cast( + RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent)) + ) + + +class RealtimeConnectionManager: + """ + Context manager over a `RealtimeConnection` that is returned by `beta.realtime.connect()` + + This context manager ensures that the connection will be closed when it exits. + + --- + + Note that if your application doesn't work well with the context manager approach then you + can call the `.enter()` method directly to initiate a connection. + + **Warning**: You must remember to close the connection with `.close()`. + + ```py + connection = client.beta.realtime.connect(...).enter() + # ... + connection.close() + ``` + """ + + def __init__( + self, + *, + client: OpenAI, + model: str, + extra_query: Query, + extra_headers: Headers, + websocket_connection_options: WebsocketConnectionOptions, + ) -> None: + self.__client = client + self.__model = model + self.__connection: RealtimeConnection | None = None + self.__extra_query = extra_query + self.__extra_headers = extra_headers + self.__websocket_connection_options = websocket_connection_options + + def __enter__(self) -> RealtimeConnection: + """ + 👋 If your application doesn't work well with the context manager approach then you + can call this method directly to initiate a connection. + + **Warning**: You must remember to close the connection with `.close()`. + + ```py + connection = client.beta.realtime.connect(...).enter() + # ... + connection.close() + ``` + """ + try: + from websockets.sync.client import connect + except ImportError as exc: + raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc + + url = self._prepare_url().copy_with( + params={ + **self.__client.base_url.params, + "model": self.__model, + **self.__extra_query, + }, + ) + log.debug("Connecting to %s", url) + if self.__websocket_connection_options: + log.debug("Connection options: %s", self.__websocket_connection_options) + + self.__connection = RealtimeConnection( + connect( + str(url), + user_agent_header=self.__client.user_agent, + additional_headers=_merge_mappings( + { + **self.__client.auth_headers, + "OpenAI-Beta": "realtime=v1", + }, + self.__extra_headers, + ), + **self.__websocket_connection_options, + ) + ) + + return self.__connection + + enter = __enter__ + + def _prepare_url(self) -> httpx.URL: + if self.__client.websocket_base_url is not None: + base_url = httpx.URL(self.__client.websocket_base_url) + else: + base_url = self.__client._base_url.copy_with(scheme="wss") + + merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime" + return base_url.copy_with(raw_path=merge_raw_path) + + def __exit__( + self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None + ) -> None: + if self.__connection is not None: + self.__connection.close() + + +class BaseRealtimeConnectionResource: + def __init__(self, connection: RealtimeConnection) -> None: + self._connection = connection + + +class RealtimeSessionResource(BaseRealtimeConnectionResource): + def update(self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN) -> None: + """Send this event to update the session’s default configuration. + + The client may + send this event at any time to update the session configuration, and any + field may be updated at any time, except for "voice". The server will respond + with a `session.updated` event that shows the full effective configuration. + Only fields that are present are updated, thus the correct way to clear a + field like "instructions" is to pass an empty string. + """ + self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "session.update", "session": session, "event_id": event_id}), + ) + ) + + +class RealtimeResponseResource(BaseRealtimeConnectionResource): + def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None: + """Send this event to cancel an in-progress response. + + The server will respond + with a `response.cancelled` event or an error if there is no response to + cancel. + """ + self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}), + ) + ) + + def create( + self, + *, + event_id: str | NotGiven = NOT_GIVEN, + response: response_create_event_param.Response | NotGiven = NOT_GIVEN, + ) -> None: + """ + This event instructs the server to create a Response, which means triggering + model inference. When in Server VAD mode, the server will create Responses + automatically. + + A Response will include at least one Item, and may have two, in which case + the second will be a function call. These Items will be appended to the + conversation history. + + The server will respond with a `response.created` event, events for Items + and content created, and finally a `response.done` event to indicate the + Response is complete. + + The `response.create` event includes inference configuration like + `instructions`, and `temperature`. These fields will override the Session's + configuration for this Response only. + """ + self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "response.create", "event_id": event_id, "response": response}), + ) + ) + + +class RealtimeConversationResource(BaseRealtimeConnectionResource): + @cached_property + def item(self) -> RealtimeConversationItemResource: + return RealtimeConversationItemResource(self._connection) + + +class RealtimeConversationItemResource(BaseRealtimeConnectionResource): + def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None: + """Send this event when you want to remove any item from the conversation + history. + + The server will respond with a `conversation.item.deleted` event, + unless the item does not exist in the conversation history, in which case the + server will respond with an error. + """ + self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}), + ) + ) + + def create( + self, + *, + item: ConversationItemParam, + event_id: str | NotGiven = NOT_GIVEN, + previous_item_id: str | NotGiven = NOT_GIVEN, + ) -> None: + """ + Add a new Item to the Conversation's context, including messages, function + calls, and function call responses. This event can be used both to populate a + "history" of the conversation and to add new items mid-stream, but has the + current limitation that it cannot populate assistant audio messages. + + If successful, the server will respond with a `conversation.item.created` + event, otherwise an `error` event will be sent. + """ + self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given( + { + "type": "conversation.item.create", + "item": item, + "event_id": event_id, + "previous_item_id": previous_item_id, + } + ), + ) + ) + + def truncate( + self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN + ) -> None: + """Send this event to truncate a previous assistant message’s audio. + + The server + will produce audio faster than realtime, so this event is useful when the user + interrupts to truncate audio that has already been sent to the client but not + yet played. This will synchronize the server's understanding of the audio with + the client's playback. + + Truncating audio will delete the server-side text transcript to ensure there + is not text in the context that hasn't been heard by the user. + + If successful, the server will respond with a `conversation.item.truncated` + event. + """ + self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given( + { + "type": "conversation.item.truncate", + "audio_end_ms": audio_end_ms, + "content_index": content_index, + "item_id": item_id, + "event_id": event_id, + } + ), + ) + ) + + +class RealtimeInputAudioBufferResource(BaseRealtimeConnectionResource): + def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None: + """Send this event to clear the audio bytes in the buffer. + + The server will + respond with an `input_audio_buffer.cleared` event. + """ + self._connection.send( + cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id})) + ) + + def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None: + """ + Send this event to commit the user input audio buffer, which will create a + new user message item in the conversation. This event will produce an error + if the input audio buffer is empty. When in Server VAD mode, the client does + not need to send this event, the server will commit the audio buffer + automatically. + + Committing the input audio buffer will trigger input audio transcription + (if enabled in session configuration), but it will not create a response + from the model. The server will respond with an `input_audio_buffer.committed` + event. + """ + self._connection.send( + cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id})) + ) + + def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None: + """Send this event to append audio bytes to the input audio buffer. + + The audio + buffer is temporary storage you can write to and later commit. In Server VAD + mode, the audio buffer is used to detect speech and the server will decide + when to commit. When Server VAD is disabled, you must commit the audio buffer + manually. + + The client may choose how much audio to place in each event up to a maximum + of 15 MiB, for example streaming smaller chunks from the client may allow the + VAD to be more responsive. Unlike made other client events, the server will + not send a confirmation response to this event. + """ + self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}), + ) + ) + + +class BaseAsyncRealtimeConnectionResource: + def __init__(self, connection: AsyncRealtimeConnection) -> None: + self._connection = connection + + +class AsyncRealtimeSessionResource(BaseAsyncRealtimeConnectionResource): + async def update( + self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN + ) -> None: + """Send this event to update the session’s default configuration. + + The client may + send this event at any time to update the session configuration, and any + field may be updated at any time, except for "voice". The server will respond + with a `session.updated` event that shows the full effective configuration. + Only fields that are present are updated, thus the correct way to clear a + field like "instructions" is to pass an empty string. + """ + await self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "session.update", "session": session, "event_id": event_id}), + ) + ) + + +class AsyncRealtimeResponseResource(BaseAsyncRealtimeConnectionResource): + async def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None: + """Send this event to cancel an in-progress response. + + The server will respond + with a `response.cancelled` event or an error if there is no response to + cancel. + """ + await self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}), + ) + ) + + async def create( + self, + *, + event_id: str | NotGiven = NOT_GIVEN, + response: response_create_event_param.Response | NotGiven = NOT_GIVEN, + ) -> None: + """ + This event instructs the server to create a Response, which means triggering + model inference. When in Server VAD mode, the server will create Responses + automatically. + + A Response will include at least one Item, and may have two, in which case + the second will be a function call. These Items will be appended to the + conversation history. + + The server will respond with a `response.created` event, events for Items + and content created, and finally a `response.done` event to indicate the + Response is complete. + + The `response.create` event includes inference configuration like + `instructions`, and `temperature`. These fields will override the Session's + configuration for this Response only. + """ + await self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "response.create", "event_id": event_id, "response": response}), + ) + ) + + +class AsyncRealtimeConversationResource(BaseAsyncRealtimeConnectionResource): + @cached_property + def item(self) -> AsyncRealtimeConversationItemResource: + return AsyncRealtimeConversationItemResource(self._connection) + + +class AsyncRealtimeConversationItemResource(BaseAsyncRealtimeConnectionResource): + async def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None: + """Send this event when you want to remove any item from the conversation + history. + + The server will respond with a `conversation.item.deleted` event, + unless the item does not exist in the conversation history, in which case the + server will respond with an error. + """ + await self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}), + ) + ) + + async def create( + self, + *, + item: ConversationItemParam, + event_id: str | NotGiven = NOT_GIVEN, + previous_item_id: str | NotGiven = NOT_GIVEN, + ) -> None: + """ + Add a new Item to the Conversation's context, including messages, function + calls, and function call responses. This event can be used both to populate a + "history" of the conversation and to add new items mid-stream, but has the + current limitation that it cannot populate assistant audio messages. + + If successful, the server will respond with a `conversation.item.created` + event, otherwise an `error` event will be sent. + """ + await self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given( + { + "type": "conversation.item.create", + "item": item, + "event_id": event_id, + "previous_item_id": previous_item_id, + } + ), + ) + ) + + async def truncate( + self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN + ) -> None: + """Send this event to truncate a previous assistant message’s audio. + + The server + will produce audio faster than realtime, so this event is useful when the user + interrupts to truncate audio that has already been sent to the client but not + yet played. This will synchronize the server's understanding of the audio with + the client's playback. + + Truncating audio will delete the server-side text transcript to ensure there + is not text in the context that hasn't been heard by the user. + + If successful, the server will respond with a `conversation.item.truncated` + event. + """ + await self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given( + { + "type": "conversation.item.truncate", + "audio_end_ms": audio_end_ms, + "content_index": content_index, + "item_id": item_id, + "event_id": event_id, + } + ), + ) + ) + + +class AsyncRealtimeInputAudioBufferResource(BaseAsyncRealtimeConnectionResource): + async def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None: + """Send this event to clear the audio bytes in the buffer. + + The server will + respond with an `input_audio_buffer.cleared` event. + """ + await self._connection.send( + cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id})) + ) + + async def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None: + """ + Send this event to commit the user input audio buffer, which will create a + new user message item in the conversation. This event will produce an error + if the input audio buffer is empty. When in Server VAD mode, the client does + not need to send this event, the server will commit the audio buffer + automatically. + + Committing the input audio buffer will trigger input audio transcription + (if enabled in session configuration), but it will not create a response + from the model. The server will respond with an `input_audio_buffer.committed` + event. + """ + await self._connection.send( + cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id})) + ) + + async def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None: + """Send this event to append audio bytes to the input audio buffer. + + The audio + buffer is temporary storage you can write to and later commit. In Server VAD + mode, the audio buffer is used to detect speech and the server will decide + when to commit. When Server VAD is disabled, you must commit the audio buffer + manually. + + The client may choose how much audio to place in each event up to a maximum + of 15 MiB, for example streaming smaller chunks from the client may allow the + VAD to be more responsive. Unlike made other client events, the server will + not send a confirmation response to this event. + """ + await self._connection.send( + cast( + RealtimeClientEventParam, + strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}), + ) + ) diff --git a/src/openai/types/__init__.py b/src/openai/types/__init__.py index 7677be01b2..72950f2491 100644 --- a/src/openai/types/__init__.py +++ b/src/openai/types/__init__.py @@ -47,6 +47,7 @@ from .create_embedding_response import CreateEmbeddingResponse as CreateEmbeddingResponse from .moderation_create_response import ModerationCreateResponse as ModerationCreateResponse from .moderation_text_input_param import ModerationTextInputParam as ModerationTextInputParam +from .websocket_connection_options import WebsocketConnectionOptions as WebsocketConnectionOptions from .image_create_variation_params import ImageCreateVariationParams as ImageCreateVariationParams from .moderation_image_url_input_param import ModerationImageURLInputParam as ModerationImageURLInputParam from .moderation_multi_modal_input_param import ModerationMultiModalInputParam as ModerationMultiModalInputParam diff --git a/src/openai/types/beta/realtime/__init__.py b/src/openai/types/beta/realtime/__init__.py index 1c5246db7a..372d4ec19d 100644 --- a/src/openai/types/beta/realtime/__init__.py +++ b/src/openai/types/beta/realtime/__init__.py @@ -2,5 +2,79 @@ from __future__ import annotations +from .session import Session as Session +from .error_event import ErrorEvent as ErrorEvent +from .conversation_item import ConversationItem as ConversationItem +from .realtime_response import RealtimeResponse as RealtimeResponse +from .response_done_event import ResponseDoneEvent as ResponseDoneEvent +from .session_update_event import SessionUpdateEvent as SessionUpdateEvent +from .realtime_client_event import RealtimeClientEvent as RealtimeClientEvent +from .realtime_server_event import RealtimeServerEvent as RealtimeServerEvent +from .response_cancel_event import ResponseCancelEvent as ResponseCancelEvent +from .response_create_event import ResponseCreateEvent as ResponseCreateEvent from .session_create_params import SessionCreateParams as SessionCreateParams +from .session_created_event import SessionCreatedEvent as SessionCreatedEvent +from .session_updated_event import SessionUpdatedEvent as SessionUpdatedEvent +from .response_created_event import ResponseCreatedEvent as ResponseCreatedEvent +from .conversation_item_param import ConversationItemParam as ConversationItemParam +from .realtime_connect_params import RealtimeConnectParams as RealtimeConnectParams +from .realtime_response_usage import RealtimeResponseUsage as RealtimeResponseUsage from .session_create_response import SessionCreateResponse as SessionCreateResponse +from .realtime_response_status import RealtimeResponseStatus as RealtimeResponseStatus +from .response_text_done_event import ResponseTextDoneEvent as ResponseTextDoneEvent +from .conversation_item_content import ConversationItemContent as ConversationItemContent +from .rate_limits_updated_event import RateLimitsUpdatedEvent as RateLimitsUpdatedEvent +from .response_audio_done_event import ResponseAudioDoneEvent as ResponseAudioDoneEvent +from .response_text_delta_event import ResponseTextDeltaEvent as ResponseTextDeltaEvent +from .conversation_created_event import ConversationCreatedEvent as ConversationCreatedEvent +from .response_audio_delta_event import ResponseAudioDeltaEvent as ResponseAudioDeltaEvent +from .session_update_event_param import SessionUpdateEventParam as SessionUpdateEventParam +from .realtime_client_event_param import RealtimeClientEventParam as RealtimeClientEventParam +from .response_cancel_event_param import ResponseCancelEventParam as ResponseCancelEventParam +from .response_create_event_param import ResponseCreateEventParam as ResponseCreateEventParam +from .conversation_item_create_event import ConversationItemCreateEvent as ConversationItemCreateEvent +from .conversation_item_delete_event import ConversationItemDeleteEvent as ConversationItemDeleteEvent +from .input_audio_buffer_clear_event import InputAudioBufferClearEvent as InputAudioBufferClearEvent +from .conversation_item_content_param import ConversationItemContentParam as ConversationItemContentParam +from .conversation_item_created_event import ConversationItemCreatedEvent as ConversationItemCreatedEvent +from .conversation_item_deleted_event import ConversationItemDeletedEvent as ConversationItemDeletedEvent +from .input_audio_buffer_append_event import InputAudioBufferAppendEvent as InputAudioBufferAppendEvent +from .input_audio_buffer_commit_event import InputAudioBufferCommitEvent as InputAudioBufferCommitEvent +from .response_output_item_done_event import ResponseOutputItemDoneEvent as ResponseOutputItemDoneEvent +from .conversation_item_truncate_event import ConversationItemTruncateEvent as ConversationItemTruncateEvent +from .input_audio_buffer_cleared_event import InputAudioBufferClearedEvent as InputAudioBufferClearedEvent +from .response_content_part_done_event import ResponseContentPartDoneEvent as ResponseContentPartDoneEvent +from .response_output_item_added_event import ResponseOutputItemAddedEvent as ResponseOutputItemAddedEvent +from .conversation_item_truncated_event import ConversationItemTruncatedEvent as ConversationItemTruncatedEvent +from .response_content_part_added_event import ResponseContentPartAddedEvent as ResponseContentPartAddedEvent +from .input_audio_buffer_committed_event import InputAudioBufferCommittedEvent as InputAudioBufferCommittedEvent +from .conversation_item_create_event_param import ConversationItemCreateEventParam as ConversationItemCreateEventParam +from .conversation_item_delete_event_param import ConversationItemDeleteEventParam as ConversationItemDeleteEventParam +from .input_audio_buffer_clear_event_param import InputAudioBufferClearEventParam as InputAudioBufferClearEventParam +from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent as ResponseAudioTranscriptDoneEvent +from .input_audio_buffer_append_event_param import InputAudioBufferAppendEventParam as InputAudioBufferAppendEventParam +from .input_audio_buffer_commit_event_param import InputAudioBufferCommitEventParam as InputAudioBufferCommitEventParam +from .response_audio_transcript_delta_event import ( + ResponseAudioTranscriptDeltaEvent as ResponseAudioTranscriptDeltaEvent, +) +from .conversation_item_truncate_event_param import ( + ConversationItemTruncateEventParam as ConversationItemTruncateEventParam, +) +from .input_audio_buffer_speech_started_event import ( + InputAudioBufferSpeechStartedEvent as InputAudioBufferSpeechStartedEvent, +) +from .input_audio_buffer_speech_stopped_event import ( + InputAudioBufferSpeechStoppedEvent as InputAudioBufferSpeechStoppedEvent, +) +from .response_function_call_arguments_done_event import ( + ResponseFunctionCallArgumentsDoneEvent as ResponseFunctionCallArgumentsDoneEvent, +) +from .response_function_call_arguments_delta_event import ( + ResponseFunctionCallArgumentsDeltaEvent as ResponseFunctionCallArgumentsDeltaEvent, +) +from .conversation_item_input_audio_transcription_failed_event import ( + ConversationItemInputAudioTranscriptionFailedEvent as ConversationItemInputAudioTranscriptionFailedEvent, +) +from .conversation_item_input_audio_transcription_completed_event import ( + ConversationItemInputAudioTranscriptionCompletedEvent as ConversationItemInputAudioTranscriptionCompletedEvent, +) diff --git a/src/openai/types/beta/realtime/conversation_created_event.py b/src/openai/types/beta/realtime/conversation_created_event.py new file mode 100644 index 0000000000..4ba0540867 --- /dev/null +++ b/src/openai/types/beta/realtime/conversation_created_event.py @@ -0,0 +1,27 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["ConversationCreatedEvent", "Conversation"] + + +class Conversation(BaseModel): + id: Optional[str] = None + """The unique ID of the conversation.""" + + object: Optional[Literal["realtime.conversation"]] = None + """The object type, must be `realtime.conversation`.""" + + +class ConversationCreatedEvent(BaseModel): + conversation: Conversation + """The conversation resource.""" + + event_id: str + """The unique ID of the server event.""" + + type: Literal["conversation.created"] + """The event type, must be `conversation.created`.""" diff --git a/src/openai/types/beta/realtime/conversation_item.py b/src/openai/types/beta/realtime/conversation_item.py new file mode 100644 index 0000000000..4edf6c4d5f --- /dev/null +++ b/src/openai/types/beta/realtime/conversation_item.py @@ -0,0 +1,61 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Optional +from typing_extensions import Literal + +from ...._models import BaseModel +from .conversation_item_content import ConversationItemContent + +__all__ = ["ConversationItem"] + + +class ConversationItem(BaseModel): + id: Optional[str] = None + """ + The unique ID of the item, this can be generated by the client to help manage + server-side context, but is not required because the server will generate one if + not provided. + """ + + arguments: Optional[str] = None + """The arguments of the function call (for `function_call` items).""" + + call_id: Optional[str] = None + """ + The ID of the function call (for `function_call` and `function_call_output` + items). If passed on a `function_call_output` item, the server will check that a + `function_call` item with the same ID exists in the conversation history. + """ + + content: Optional[List[ConversationItemContent]] = None + """The content of the message, applicable for `message` items. + + - Message items of role `system` support only `input_text` content + - Message items of role `user` support `input_text` and `input_audio` content + - Message items of role `assistant` support `text` content. + """ + + name: Optional[str] = None + """The name of the function being called (for `function_call` items).""" + + object: Optional[Literal["realtime.item"]] = None + """Identifier for the API object being returned - always `realtime.item`.""" + + output: Optional[str] = None + """The output of the function call (for `function_call_output` items).""" + + role: Optional[Literal["user", "assistant", "system"]] = None + """ + The role of the message sender (`user`, `assistant`, `system`), only applicable + for `message` items. + """ + + status: Optional[Literal["completed", "incomplete"]] = None + """The status of the item (`completed`, `incomplete`). + + These have no effect on the conversation, but are accepted for consistency with + the `conversation.item.created` event. + """ + + type: Optional[Literal["message", "function_call", "function_call_output"]] = None + """The type of the item (`message`, `function_call`, `function_call_output`).""" diff --git a/src/openai/types/beta/realtime/conversation_item_content.py b/src/openai/types/beta/realtime/conversation_item_content.py new file mode 100644 index 0000000000..b854aa0e0f --- /dev/null +++ b/src/openai/types/beta/realtime/conversation_item_content.py @@ -0,0 +1,28 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["ConversationItemContent"] + + +class ConversationItemContent(BaseModel): + id: Optional[str] = None + """ + ID of a previous conversation item (like a model response), used for + `item_reference` content types. + """ + + audio: Optional[str] = None + """Base64-encoded audio bytes, used for `input_audio` content type.""" + + text: Optional[str] = None + """The text content, used for `input_text` and `text` content types.""" + + transcript: Optional[str] = None + """The transcript of the audio, used for `input_audio` content type.""" + + type: Optional[Literal["input_text", "input_audio", "item_reference", "text"]] = None + """The content type (`input_text`, `input_audio`, `item_reference`, `text`).""" diff --git a/src/openai/types/beta/realtime/conversation_item_content_param.py b/src/openai/types/beta/realtime/conversation_item_content_param.py new file mode 100644 index 0000000000..b354d78971 --- /dev/null +++ b/src/openai/types/beta/realtime/conversation_item_content_param.py @@ -0,0 +1,27 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Literal, TypedDict + +__all__ = ["ConversationItemContentParam"] + + +class ConversationItemContentParam(TypedDict, total=False): + id: str + """ + ID of a previous conversation item (like a model response), used for + `item_reference` content types. + """ + + audio: str + """Base64-encoded audio bytes, used for `input_audio` content type.""" + + text: str + """The text content, used for `input_text` and `text` content types.""" + + transcript: str + """The transcript of the audio, used for `input_audio` content type.""" + + type: Literal["input_text", "input_audio", "item_reference", "text"] + """The content type (`input_text`, `input_audio`, `item_reference`, `text`).""" diff --git a/src/openai/types/beta/realtime/conversation_item_create_event.py b/src/openai/types/beta/realtime/conversation_item_create_event.py new file mode 100644 index 0000000000..50d309675b --- /dev/null +++ b/src/openai/types/beta/realtime/conversation_item_create_event.py @@ -0,0 +1,28 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional +from typing_extensions import Literal + +from ...._models import BaseModel +from .conversation_item import ConversationItem + +__all__ = ["ConversationItemCreateEvent"] + + +class ConversationItemCreateEvent(BaseModel): + item: ConversationItem + """The item to add to the conversation.""" + + type: Literal["conversation.item.create"] + """The event type, must be `conversation.item.create`.""" + + event_id: Optional[str] = None + """Optional client-generated ID used to identify this event.""" + + previous_item_id: Optional[str] = None + """The ID of the preceding item after which the new item will be inserted. + + If not set, the new item will be appended to the end of the conversation. If + set, it allows an item to be inserted mid-conversation. If the ID cannot be + found, an error will be returned and the item will not be added. + """ diff --git a/src/openai/types/beta/realtime/conversation_item_create_event_param.py b/src/openai/types/beta/realtime/conversation_item_create_event_param.py new file mode 100644 index 0000000000..b8c8bbc251 --- /dev/null +++ b/src/openai/types/beta/realtime/conversation_item_create_event_param.py @@ -0,0 +1,28 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Literal, Required, TypedDict + +from .conversation_item_param import ConversationItemParam + +__all__ = ["ConversationItemCreateEventParam"] + + +class ConversationItemCreateEventParam(TypedDict, total=False): + item: Required[ConversationItemParam] + """The item to add to the conversation.""" + + type: Required[Literal["conversation.item.create"]] + """The event type, must be `conversation.item.create`.""" + + event_id: str + """Optional client-generated ID used to identify this event.""" + + previous_item_id: str + """The ID of the preceding item after which the new item will be inserted. + + If not set, the new item will be appended to the end of the conversation. If + set, it allows an item to be inserted mid-conversation. If the ID cannot be + found, an error will be returned and the item will not be added. + """ diff --git a/src/openai/types/beta/realtime/conversation_item_created_event.py b/src/openai/types/beta/realtime/conversation_item_created_event.py new file mode 100644 index 0000000000..2f20388246 --- /dev/null +++ b/src/openai/types/beta/realtime/conversation_item_created_event.py @@ -0,0 +1,25 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ...._models import BaseModel +from .conversation_item import ConversationItem + +__all__ = ["ConversationItemCreatedEvent"] + + +class ConversationItemCreatedEvent(BaseModel): + event_id: str + """The unique ID of the server event.""" + + item: ConversationItem + """The item to add to the conversation.""" + + previous_item_id: str + """ + The ID of the preceding item in the Conversation context, allows the client to + understand the order of the conversation. + """ + + type: Literal["conversation.item.created"] + """The event type, must be `conversation.item.created`.""" diff --git a/src/openai/types/beta/realtime/conversation_item_delete_event.py b/src/openai/types/beta/realtime/conversation_item_delete_event.py new file mode 100644 index 0000000000..02ca8250ce --- /dev/null +++ b/src/openai/types/beta/realtime/conversation_item_delete_event.py @@ -0,0 +1,19 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["ConversationItemDeleteEvent"] + + +class ConversationItemDeleteEvent(BaseModel): + item_id: str + """The ID of the item to delete.""" + + type: Literal["conversation.item.delete"] + """The event type, must be `conversation.item.delete`.""" + + event_id: Optional[str] = None + """Optional client-generated ID used to identify this event.""" diff --git a/src/openai/types/beta/realtime/conversation_item_delete_event_param.py b/src/openai/types/beta/realtime/conversation_item_delete_event_param.py new file mode 100644 index 0000000000..c3f88d6627 --- /dev/null +++ b/src/openai/types/beta/realtime/conversation_item_delete_event_param.py @@ -0,0 +1,18 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Literal, Required, TypedDict + +__all__ = ["ConversationItemDeleteEventParam"] + + +class ConversationItemDeleteEventParam(TypedDict, total=False): + item_id: Required[str] + """The ID of the item to delete.""" + + type: Required[Literal["conversation.item.delete"]] + """The event type, must be `conversation.item.delete`.""" + + event_id: str + """Optional client-generated ID used to identify this event.""" diff --git a/src/openai/types/beta/realtime/conversation_item_deleted_event.py b/src/openai/types/beta/realtime/conversation_item_deleted_event.py new file mode 100644 index 0000000000..a35a97817a --- /dev/null +++ b/src/openai/types/beta/realtime/conversation_item_deleted_event.py @@ -0,0 +1,18 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["ConversationItemDeletedEvent"] + + +class ConversationItemDeletedEvent(BaseModel): + event_id: str + """The unique ID of the server event.""" + + item_id: str + """The ID of the item that was deleted.""" + + type: Literal["conversation.item.deleted"] + """The event type, must be `conversation.item.deleted`.""" diff --git a/src/openai/types/beta/realtime/conversation_item_input_audio_transcription_completed_event.py b/src/openai/types/beta/realtime/conversation_item_input_audio_transcription_completed_event.py new file mode 100644 index 0000000000..ded79cc0f7 --- /dev/null +++ b/src/openai/types/beta/realtime/conversation_item_input_audio_transcription_completed_event.py @@ -0,0 +1,26 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["ConversationItemInputAudioTranscriptionCompletedEvent"] + + +class ConversationItemInputAudioTranscriptionCompletedEvent(BaseModel): + content_index: int + """The index of the content part containing the audio.""" + + event_id: str + """The unique ID of the server event.""" + + item_id: str + """The ID of the user message item containing the audio.""" + + transcript: str + """The transcribed text.""" + + type: Literal["conversation.item.input_audio_transcription.completed"] + """ + The event type, must be `conversation.item.input_audio_transcription.completed`. + """ diff --git a/src/openai/types/beta/realtime/conversation_item_input_audio_transcription_failed_event.py b/src/openai/types/beta/realtime/conversation_item_input_audio_transcription_failed_event.py new file mode 100644 index 0000000000..cecac93e64 --- /dev/null +++ b/src/openai/types/beta/realtime/conversation_item_input_audio_transcription_failed_event.py @@ -0,0 +1,39 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["ConversationItemInputAudioTranscriptionFailedEvent", "Error"] + + +class Error(BaseModel): + code: Optional[str] = None + """Error code, if any.""" + + message: Optional[str] = None + """A human-readable error message.""" + + param: Optional[str] = None + """Parameter related to the error, if any.""" + + type: Optional[str] = None + """The type of error.""" + + +class ConversationItemInputAudioTranscriptionFailedEvent(BaseModel): + content_index: int + """The index of the content part containing the audio.""" + + error: Error + """Details of the transcription error.""" + + event_id: str + """The unique ID of the server event.""" + + item_id: str + """The ID of the user message item.""" + + type: Literal["conversation.item.input_audio_transcription.failed"] + """The event type, must be `conversation.item.input_audio_transcription.failed`.""" diff --git a/src/openai/types/beta/realtime/conversation_item_param.py b/src/openai/types/beta/realtime/conversation_item_param.py new file mode 100644 index 0000000000..ac0f8431e5 --- /dev/null +++ b/src/openai/types/beta/realtime/conversation_item_param.py @@ -0,0 +1,62 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Iterable +from typing_extensions import Literal, TypedDict + +from .conversation_item_content_param import ConversationItemContentParam + +__all__ = ["ConversationItemParam"] + + +class ConversationItemParam(TypedDict, total=False): + id: str + """ + The unique ID of the item, this can be generated by the client to help manage + server-side context, but is not required because the server will generate one if + not provided. + """ + + arguments: str + """The arguments of the function call (for `function_call` items).""" + + call_id: str + """ + The ID of the function call (for `function_call` and `function_call_output` + items). If passed on a `function_call_output` item, the server will check that a + `function_call` item with the same ID exists in the conversation history. + """ + + content: Iterable[ConversationItemContentParam] + """The content of the message, applicable for `message` items. + + - Message items of role `system` support only `input_text` content + - Message items of role `user` support `input_text` and `input_audio` content + - Message items of role `assistant` support `text` content. + """ + + name: str + """The name of the function being called (for `function_call` items).""" + + object: Literal["realtime.item"] + """Identifier for the API object being returned - always `realtime.item`.""" + + output: str + """The output of the function call (for `function_call_output` items).""" + + role: Literal["user", "assistant", "system"] + """ + The role of the message sender (`user`, `assistant`, `system`), only applicable + for `message` items. + """ + + status: Literal["completed", "incomplete"] + """The status of the item (`completed`, `incomplete`). + + These have no effect on the conversation, but are accepted for consistency with + the `conversation.item.created` event. + """ + + type: Literal["message", "function_call", "function_call_output"] + """The type of the item (`message`, `function_call`, `function_call_output`).""" diff --git a/src/openai/types/beta/realtime/conversation_item_truncate_event.py b/src/openai/types/beta/realtime/conversation_item_truncate_event.py new file mode 100644 index 0000000000..cb336bba2c --- /dev/null +++ b/src/openai/types/beta/realtime/conversation_item_truncate_event.py @@ -0,0 +1,32 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["ConversationItemTruncateEvent"] + + +class ConversationItemTruncateEvent(BaseModel): + audio_end_ms: int + """Inclusive duration up to which audio is truncated, in milliseconds. + + If the audio_end_ms is greater than the actual audio duration, the server will + respond with an error. + """ + + content_index: int + """The index of the content part to truncate. Set this to 0.""" + + item_id: str + """The ID of the assistant message item to truncate. + + Only assistant message items can be truncated. + """ + + type: Literal["conversation.item.truncate"] + """The event type, must be `conversation.item.truncate`.""" + + event_id: Optional[str] = None + """Optional client-generated ID used to identify this event.""" diff --git a/src/openai/types/beta/realtime/conversation_item_truncate_event_param.py b/src/openai/types/beta/realtime/conversation_item_truncate_event_param.py new file mode 100644 index 0000000000..d3ad1e1e25 --- /dev/null +++ b/src/openai/types/beta/realtime/conversation_item_truncate_event_param.py @@ -0,0 +1,31 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Literal, Required, TypedDict + +__all__ = ["ConversationItemTruncateEventParam"] + + +class ConversationItemTruncateEventParam(TypedDict, total=False): + audio_end_ms: Required[int] + """Inclusive duration up to which audio is truncated, in milliseconds. + + If the audio_end_ms is greater than the actual audio duration, the server will + respond with an error. + """ + + content_index: Required[int] + """The index of the content part to truncate. Set this to 0.""" + + item_id: Required[str] + """The ID of the assistant message item to truncate. + + Only assistant message items can be truncated. + """ + + type: Required[Literal["conversation.item.truncate"]] + """The event type, must be `conversation.item.truncate`.""" + + event_id: str + """Optional client-generated ID used to identify this event.""" diff --git a/src/openai/types/beta/realtime/conversation_item_truncated_event.py b/src/openai/types/beta/realtime/conversation_item_truncated_event.py new file mode 100644 index 0000000000..36368fa28f --- /dev/null +++ b/src/openai/types/beta/realtime/conversation_item_truncated_event.py @@ -0,0 +1,24 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["ConversationItemTruncatedEvent"] + + +class ConversationItemTruncatedEvent(BaseModel): + audio_end_ms: int + """The duration up to which the audio was truncated, in milliseconds.""" + + content_index: int + """The index of the content part that was truncated.""" + + event_id: str + """The unique ID of the server event.""" + + item_id: str + """The ID of the assistant message item that was truncated.""" + + type: Literal["conversation.item.truncated"] + """The event type, must be `conversation.item.truncated`.""" diff --git a/src/openai/types/beta/realtime/error_event.py b/src/openai/types/beta/realtime/error_event.py new file mode 100644 index 0000000000..e020fc3848 --- /dev/null +++ b/src/openai/types/beta/realtime/error_event.py @@ -0,0 +1,36 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["ErrorEvent", "Error"] + + +class Error(BaseModel): + message: str + """A human-readable error message.""" + + type: str + """The type of error (e.g., "invalid_request_error", "server_error").""" + + code: Optional[str] = None + """Error code, if any.""" + + event_id: Optional[str] = None + """The event_id of the client event that caused the error, if applicable.""" + + param: Optional[str] = None + """Parameter related to the error, if any.""" + + +class ErrorEvent(BaseModel): + error: Error + """Details of the error.""" + + event_id: str + """The unique ID of the server event.""" + + type: Literal["error"] + """The event type, must be `error`.""" diff --git a/src/openai/types/beta/realtime/input_audio_buffer_append_event.py b/src/openai/types/beta/realtime/input_audio_buffer_append_event.py new file mode 100644 index 0000000000..a253a6488c --- /dev/null +++ b/src/openai/types/beta/realtime/input_audio_buffer_append_event.py @@ -0,0 +1,23 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["InputAudioBufferAppendEvent"] + + +class InputAudioBufferAppendEvent(BaseModel): + audio: str + """Base64-encoded audio bytes. + + This must be in the format specified by the `input_audio_format` field in the + session configuration. + """ + + type: Literal["input_audio_buffer.append"] + """The event type, must be `input_audio_buffer.append`.""" + + event_id: Optional[str] = None + """Optional client-generated ID used to identify this event.""" diff --git a/src/openai/types/beta/realtime/input_audio_buffer_append_event_param.py b/src/openai/types/beta/realtime/input_audio_buffer_append_event_param.py new file mode 100644 index 0000000000..3ad0bc737d --- /dev/null +++ b/src/openai/types/beta/realtime/input_audio_buffer_append_event_param.py @@ -0,0 +1,22 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Literal, Required, TypedDict + +__all__ = ["InputAudioBufferAppendEventParam"] + + +class InputAudioBufferAppendEventParam(TypedDict, total=False): + audio: Required[str] + """Base64-encoded audio bytes. + + This must be in the format specified by the `input_audio_format` field in the + session configuration. + """ + + type: Required[Literal["input_audio_buffer.append"]] + """The event type, must be `input_audio_buffer.append`.""" + + event_id: str + """Optional client-generated ID used to identify this event.""" diff --git a/src/openai/types/beta/realtime/input_audio_buffer_clear_event.py b/src/openai/types/beta/realtime/input_audio_buffer_clear_event.py new file mode 100644 index 0000000000..b0624d34df --- /dev/null +++ b/src/openai/types/beta/realtime/input_audio_buffer_clear_event.py @@ -0,0 +1,16 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["InputAudioBufferClearEvent"] + + +class InputAudioBufferClearEvent(BaseModel): + type: Literal["input_audio_buffer.clear"] + """The event type, must be `input_audio_buffer.clear`.""" + + event_id: Optional[str] = None + """Optional client-generated ID used to identify this event.""" diff --git a/src/openai/types/beta/realtime/input_audio_buffer_clear_event_param.py b/src/openai/types/beta/realtime/input_audio_buffer_clear_event_param.py new file mode 100644 index 0000000000..2bd6bc5a02 --- /dev/null +++ b/src/openai/types/beta/realtime/input_audio_buffer_clear_event_param.py @@ -0,0 +1,15 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Literal, Required, TypedDict + +__all__ = ["InputAudioBufferClearEventParam"] + + +class InputAudioBufferClearEventParam(TypedDict, total=False): + type: Required[Literal["input_audio_buffer.clear"]] + """The event type, must be `input_audio_buffer.clear`.""" + + event_id: str + """Optional client-generated ID used to identify this event.""" diff --git a/src/openai/types/beta/realtime/input_audio_buffer_cleared_event.py b/src/openai/types/beta/realtime/input_audio_buffer_cleared_event.py new file mode 100644 index 0000000000..632e1b94bc --- /dev/null +++ b/src/openai/types/beta/realtime/input_audio_buffer_cleared_event.py @@ -0,0 +1,15 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["InputAudioBufferClearedEvent"] + + +class InputAudioBufferClearedEvent(BaseModel): + event_id: str + """The unique ID of the server event.""" + + type: Literal["input_audio_buffer.cleared"] + """The event type, must be `input_audio_buffer.cleared`.""" diff --git a/src/openai/types/beta/realtime/input_audio_buffer_commit_event.py b/src/openai/types/beta/realtime/input_audio_buffer_commit_event.py new file mode 100644 index 0000000000..7b6f5e46b7 --- /dev/null +++ b/src/openai/types/beta/realtime/input_audio_buffer_commit_event.py @@ -0,0 +1,16 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["InputAudioBufferCommitEvent"] + + +class InputAudioBufferCommitEvent(BaseModel): + type: Literal["input_audio_buffer.commit"] + """The event type, must be `input_audio_buffer.commit`.""" + + event_id: Optional[str] = None + """Optional client-generated ID used to identify this event.""" diff --git a/src/openai/types/beta/realtime/input_audio_buffer_commit_event_param.py b/src/openai/types/beta/realtime/input_audio_buffer_commit_event_param.py new file mode 100644 index 0000000000..c9c927ab98 --- /dev/null +++ b/src/openai/types/beta/realtime/input_audio_buffer_commit_event_param.py @@ -0,0 +1,15 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Literal, Required, TypedDict + +__all__ = ["InputAudioBufferCommitEventParam"] + + +class InputAudioBufferCommitEventParam(TypedDict, total=False): + type: Required[Literal["input_audio_buffer.commit"]] + """The event type, must be `input_audio_buffer.commit`.""" + + event_id: str + """Optional client-generated ID used to identify this event.""" diff --git a/src/openai/types/beta/realtime/input_audio_buffer_committed_event.py b/src/openai/types/beta/realtime/input_audio_buffer_committed_event.py new file mode 100644 index 0000000000..3071eff357 --- /dev/null +++ b/src/openai/types/beta/realtime/input_audio_buffer_committed_event.py @@ -0,0 +1,21 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["InputAudioBufferCommittedEvent"] + + +class InputAudioBufferCommittedEvent(BaseModel): + event_id: str + """The unique ID of the server event.""" + + item_id: str + """The ID of the user message item that will be created.""" + + previous_item_id: str + """The ID of the preceding item after which the new item will be inserted.""" + + type: Literal["input_audio_buffer.committed"] + """The event type, must be `input_audio_buffer.committed`.""" diff --git a/src/openai/types/beta/realtime/input_audio_buffer_speech_started_event.py b/src/openai/types/beta/realtime/input_audio_buffer_speech_started_event.py new file mode 100644 index 0000000000..4f3ab082c4 --- /dev/null +++ b/src/openai/types/beta/realtime/input_audio_buffer_speech_started_event.py @@ -0,0 +1,26 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["InputAudioBufferSpeechStartedEvent"] + + +class InputAudioBufferSpeechStartedEvent(BaseModel): + audio_start_ms: int + """ + Milliseconds from the start of all audio written to the buffer during the + session when speech was first detected. This will correspond to the beginning of + audio sent to the model, and thus includes the `prefix_padding_ms` configured in + the Session. + """ + + event_id: str + """The unique ID of the server event.""" + + item_id: str + """The ID of the user message item that will be created when speech stops.""" + + type: Literal["input_audio_buffer.speech_started"] + """The event type, must be `input_audio_buffer.speech_started`.""" diff --git a/src/openai/types/beta/realtime/input_audio_buffer_speech_stopped_event.py b/src/openai/types/beta/realtime/input_audio_buffer_speech_stopped_event.py new file mode 100644 index 0000000000..40568170f2 --- /dev/null +++ b/src/openai/types/beta/realtime/input_audio_buffer_speech_stopped_event.py @@ -0,0 +1,25 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["InputAudioBufferSpeechStoppedEvent"] + + +class InputAudioBufferSpeechStoppedEvent(BaseModel): + audio_end_ms: int + """Milliseconds since the session started when speech stopped. + + This will correspond to the end of audio sent to the model, and thus includes + the `min_silence_duration_ms` configured in the Session. + """ + + event_id: str + """The unique ID of the server event.""" + + item_id: str + """The ID of the user message item that will be created.""" + + type: Literal["input_audio_buffer.speech_stopped"] + """The event type, must be `input_audio_buffer.speech_stopped`.""" diff --git a/src/openai/types/beta/realtime/rate_limits_updated_event.py b/src/openai/types/beta/realtime/rate_limits_updated_event.py new file mode 100644 index 0000000000..7e12283c46 --- /dev/null +++ b/src/openai/types/beta/realtime/rate_limits_updated_event.py @@ -0,0 +1,33 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Optional +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["RateLimitsUpdatedEvent", "RateLimit"] + + +class RateLimit(BaseModel): + limit: Optional[int] = None + """The maximum allowed value for the rate limit.""" + + name: Optional[Literal["requests", "tokens"]] = None + """The name of the rate limit (`requests`, `tokens`).""" + + remaining: Optional[int] = None + """The remaining value before the limit is reached.""" + + reset_seconds: Optional[float] = None + """Seconds until the rate limit resets.""" + + +class RateLimitsUpdatedEvent(BaseModel): + event_id: str + """The unique ID of the server event.""" + + rate_limits: List[RateLimit] + """List of rate limit information.""" + + type: Literal["rate_limits.updated"] + """The event type, must be `rate_limits.updated`.""" diff --git a/src/openai/types/beta/realtime/realtime_client_event.py b/src/openai/types/beta/realtime/realtime_client_event.py new file mode 100644 index 0000000000..0769184cd0 --- /dev/null +++ b/src/openai/types/beta/realtime/realtime_client_event.py @@ -0,0 +1,32 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Union +from typing_extensions import Annotated, TypeAlias + +from ...._utils import PropertyInfo +from .session_update_event import SessionUpdateEvent +from .response_cancel_event import ResponseCancelEvent +from .response_create_event import ResponseCreateEvent +from .conversation_item_create_event import ConversationItemCreateEvent +from .conversation_item_delete_event import ConversationItemDeleteEvent +from .input_audio_buffer_clear_event import InputAudioBufferClearEvent +from .input_audio_buffer_append_event import InputAudioBufferAppendEvent +from .input_audio_buffer_commit_event import InputAudioBufferCommitEvent +from .conversation_item_truncate_event import ConversationItemTruncateEvent + +__all__ = ["RealtimeClientEvent"] + +RealtimeClientEvent: TypeAlias = Annotated[ + Union[ + SessionUpdateEvent, + InputAudioBufferAppendEvent, + InputAudioBufferCommitEvent, + InputAudioBufferClearEvent, + ConversationItemCreateEvent, + ConversationItemTruncateEvent, + ConversationItemDeleteEvent, + ResponseCreateEvent, + ResponseCancelEvent, + ], + PropertyInfo(discriminator="type"), +] diff --git a/src/openai/types/beta/realtime/realtime_client_event_param.py b/src/openai/types/beta/realtime/realtime_client_event_param.py new file mode 100644 index 0000000000..4020892c33 --- /dev/null +++ b/src/openai/types/beta/realtime/realtime_client_event_param.py @@ -0,0 +1,30 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Union +from typing_extensions import TypeAlias + +from .session_update_event_param import SessionUpdateEventParam +from .response_cancel_event_param import ResponseCancelEventParam +from .response_create_event_param import ResponseCreateEventParam +from .conversation_item_create_event_param import ConversationItemCreateEventParam +from .conversation_item_delete_event_param import ConversationItemDeleteEventParam +from .input_audio_buffer_clear_event_param import InputAudioBufferClearEventParam +from .input_audio_buffer_append_event_param import InputAudioBufferAppendEventParam +from .input_audio_buffer_commit_event_param import InputAudioBufferCommitEventParam +from .conversation_item_truncate_event_param import ConversationItemTruncateEventParam + +__all__ = ["RealtimeClientEventParam"] + +RealtimeClientEventParam: TypeAlias = Union[ + SessionUpdateEventParam, + InputAudioBufferAppendEventParam, + InputAudioBufferCommitEventParam, + InputAudioBufferClearEventParam, + ConversationItemCreateEventParam, + ConversationItemTruncateEventParam, + ConversationItemDeleteEventParam, + ResponseCreateEventParam, + ResponseCancelEventParam, +] diff --git a/src/openai/types/beta/realtime/realtime_connect_params.py b/src/openai/types/beta/realtime/realtime_connect_params.py new file mode 100644 index 0000000000..76474f3de4 --- /dev/null +++ b/src/openai/types/beta/realtime/realtime_connect_params.py @@ -0,0 +1,11 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Required, TypedDict + +__all__ = ["RealtimeConnectParams"] + + +class RealtimeConnectParams(TypedDict, total=False): + model: Required[str] diff --git a/src/openai/types/beta/realtime/realtime_response.py b/src/openai/types/beta/realtime/realtime_response.py new file mode 100644 index 0000000000..3e1b1406c0 --- /dev/null +++ b/src/openai/types/beta/realtime/realtime_response.py @@ -0,0 +1,42 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Optional +from typing_extensions import Literal + +from ...._models import BaseModel +from .conversation_item import ConversationItem +from .realtime_response_usage import RealtimeResponseUsage +from .realtime_response_status import RealtimeResponseStatus + +__all__ = ["RealtimeResponse"] + + +class RealtimeResponse(BaseModel): + id: Optional[str] = None + """The unique ID of the response.""" + + metadata: Optional[object] = None + """Developer-provided string key-value pairs associated with this response.""" + + object: Optional[Literal["realtime.response"]] = None + """The object type, must be `realtime.response`.""" + + output: Optional[List[ConversationItem]] = None + """The list of output items generated by the response.""" + + status: Optional[Literal["completed", "cancelled", "failed", "incomplete"]] = None + """ + The final status of the response (`completed`, `cancelled`, `failed`, or + `incomplete`). + """ + + status_details: Optional[RealtimeResponseStatus] = None + """Additional details about the status.""" + + usage: Optional[RealtimeResponseUsage] = None + """Usage statistics for the Response, this will correspond to billing. + + A Realtime API session will maintain a conversation context and append new Items + to the Conversation, thus output from previous turns (text and audio tokens) + will become the input for later turns. + """ diff --git a/src/openai/types/beta/realtime/realtime_response_status.py b/src/openai/types/beta/realtime/realtime_response_status.py new file mode 100644 index 0000000000..7189cd58a1 --- /dev/null +++ b/src/openai/types/beta/realtime/realtime_response_status.py @@ -0,0 +1,39 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["RealtimeResponseStatus", "Error"] + + +class Error(BaseModel): + code: Optional[str] = None + """Error code, if any.""" + + type: Optional[str] = None + """The type of error.""" + + +class RealtimeResponseStatus(BaseModel): + error: Optional[Error] = None + """ + A description of the error that caused the response to fail, populated when the + `status` is `failed`. + """ + + reason: Optional[Literal["turn_detected", "client_cancelled", "max_output_tokens", "content_filter"]] = None + """The reason the Response did not complete. + + For a `cancelled` Response, one of `turn_detected` (the server VAD detected a + new start of speech) or `client_cancelled` (the client sent a cancel event). For + an `incomplete` Response, one of `max_output_tokens` or `content_filter` (the + server-side safety filter activated and cut off the response). + """ + + type: Optional[Literal["completed", "cancelled", "incomplete", "failed"]] = None + """ + The type of error that caused the response to fail, corresponding with the + `status` field (`completed`, `cancelled`, `incomplete`, `failed`). + """ diff --git a/src/openai/types/beta/realtime/realtime_response_usage.py b/src/openai/types/beta/realtime/realtime_response_usage.py new file mode 100644 index 0000000000..7ca822e25e --- /dev/null +++ b/src/openai/types/beta/realtime/realtime_response_usage.py @@ -0,0 +1,52 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional + +from ...._models import BaseModel + +__all__ = ["RealtimeResponseUsage", "InputTokenDetails", "OutputTokenDetails"] + + +class InputTokenDetails(BaseModel): + audio_tokens: Optional[int] = None + """The number of audio tokens used in the Response.""" + + cached_tokens: Optional[int] = None + """The number of cached tokens used in the Response.""" + + text_tokens: Optional[int] = None + """The number of text tokens used in the Response.""" + + +class OutputTokenDetails(BaseModel): + audio_tokens: Optional[int] = None + """The number of audio tokens used in the Response.""" + + text_tokens: Optional[int] = None + """The number of text tokens used in the Response.""" + + +class RealtimeResponseUsage(BaseModel): + input_token_details: Optional[InputTokenDetails] = None + """Details about the input tokens used in the Response.""" + + input_tokens: Optional[int] = None + """ + The number of input tokens used in the Response, including text and audio + tokens. + """ + + output_token_details: Optional[OutputTokenDetails] = None + """Details about the output tokens used in the Response.""" + + output_tokens: Optional[int] = None + """ + The number of output tokens sent in the Response, including text and audio + tokens. + """ + + total_tokens: Optional[int] = None + """ + The total number of tokens in the Response including input and output text and + audio tokens. + """ diff --git a/src/openai/types/beta/realtime/realtime_server_event.py b/src/openai/types/beta/realtime/realtime_server_event.py new file mode 100644 index 0000000000..5f8ed55b13 --- /dev/null +++ b/src/openai/types/beta/realtime/realtime_server_event.py @@ -0,0 +1,72 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Union +from typing_extensions import Annotated, TypeAlias + +from ...._utils import PropertyInfo +from .error_event import ErrorEvent +from .response_done_event import ResponseDoneEvent +from .session_created_event import SessionCreatedEvent +from .session_updated_event import SessionUpdatedEvent +from .response_created_event import ResponseCreatedEvent +from .response_text_done_event import ResponseTextDoneEvent +from .rate_limits_updated_event import RateLimitsUpdatedEvent +from .response_audio_done_event import ResponseAudioDoneEvent +from .response_text_delta_event import ResponseTextDeltaEvent +from .conversation_created_event import ConversationCreatedEvent +from .response_audio_delta_event import ResponseAudioDeltaEvent +from .conversation_item_created_event import ConversationItemCreatedEvent +from .conversation_item_deleted_event import ConversationItemDeletedEvent +from .response_output_item_done_event import ResponseOutputItemDoneEvent +from .input_audio_buffer_cleared_event import InputAudioBufferClearedEvent +from .response_content_part_done_event import ResponseContentPartDoneEvent +from .response_output_item_added_event import ResponseOutputItemAddedEvent +from .conversation_item_truncated_event import ConversationItemTruncatedEvent +from .response_content_part_added_event import ResponseContentPartAddedEvent +from .input_audio_buffer_committed_event import InputAudioBufferCommittedEvent +from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent +from .response_audio_transcript_delta_event import ResponseAudioTranscriptDeltaEvent +from .input_audio_buffer_speech_started_event import InputAudioBufferSpeechStartedEvent +from .input_audio_buffer_speech_stopped_event import InputAudioBufferSpeechStoppedEvent +from .response_function_call_arguments_done_event import ResponseFunctionCallArgumentsDoneEvent +from .response_function_call_arguments_delta_event import ResponseFunctionCallArgumentsDeltaEvent +from .conversation_item_input_audio_transcription_failed_event import ConversationItemInputAudioTranscriptionFailedEvent +from .conversation_item_input_audio_transcription_completed_event import ( + ConversationItemInputAudioTranscriptionCompletedEvent, +) + +__all__ = ["RealtimeServerEvent"] + +RealtimeServerEvent: TypeAlias = Annotated[ + Union[ + ErrorEvent, + SessionCreatedEvent, + SessionUpdatedEvent, + ConversationCreatedEvent, + InputAudioBufferCommittedEvent, + InputAudioBufferClearedEvent, + InputAudioBufferSpeechStartedEvent, + InputAudioBufferSpeechStoppedEvent, + ConversationItemCreatedEvent, + ConversationItemInputAudioTranscriptionCompletedEvent, + ConversationItemInputAudioTranscriptionFailedEvent, + ConversationItemTruncatedEvent, + ConversationItemDeletedEvent, + ResponseCreatedEvent, + ResponseDoneEvent, + ResponseOutputItemAddedEvent, + ResponseOutputItemDoneEvent, + ResponseContentPartAddedEvent, + ResponseContentPartDoneEvent, + ResponseTextDeltaEvent, + ResponseTextDoneEvent, + ResponseAudioTranscriptDeltaEvent, + ResponseAudioTranscriptDoneEvent, + ResponseAudioDeltaEvent, + ResponseAudioDoneEvent, + ResponseFunctionCallArgumentsDeltaEvent, + ResponseFunctionCallArgumentsDoneEvent, + RateLimitsUpdatedEvent, + ], + PropertyInfo(discriminator="type"), +] diff --git a/src/openai/types/beta/realtime/response_audio_delta_event.py b/src/openai/types/beta/realtime/response_audio_delta_event.py new file mode 100644 index 0000000000..8e0128d942 --- /dev/null +++ b/src/openai/types/beta/realtime/response_audio_delta_event.py @@ -0,0 +1,30 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["ResponseAudioDeltaEvent"] + + +class ResponseAudioDeltaEvent(BaseModel): + content_index: int + """The index of the content part in the item's content array.""" + + delta: str + """Base64-encoded audio data delta.""" + + event_id: str + """The unique ID of the server event.""" + + item_id: str + """The ID of the item.""" + + output_index: int + """The index of the output item in the response.""" + + response_id: str + """The ID of the response.""" + + type: Literal["response.audio.delta"] + """The event type, must be `response.audio.delta`.""" diff --git a/src/openai/types/beta/realtime/response_audio_done_event.py b/src/openai/types/beta/realtime/response_audio_done_event.py new file mode 100644 index 0000000000..68e78bc778 --- /dev/null +++ b/src/openai/types/beta/realtime/response_audio_done_event.py @@ -0,0 +1,27 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["ResponseAudioDoneEvent"] + + +class ResponseAudioDoneEvent(BaseModel): + content_index: int + """The index of the content part in the item's content array.""" + + event_id: str + """The unique ID of the server event.""" + + item_id: str + """The ID of the item.""" + + output_index: int + """The index of the output item in the response.""" + + response_id: str + """The ID of the response.""" + + type: Literal["response.audio.done"] + """The event type, must be `response.audio.done`.""" diff --git a/src/openai/types/beta/realtime/response_audio_transcript_delta_event.py b/src/openai/types/beta/realtime/response_audio_transcript_delta_event.py new file mode 100644 index 0000000000..3609948d10 --- /dev/null +++ b/src/openai/types/beta/realtime/response_audio_transcript_delta_event.py @@ -0,0 +1,30 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["ResponseAudioTranscriptDeltaEvent"] + + +class ResponseAudioTranscriptDeltaEvent(BaseModel): + content_index: int + """The index of the content part in the item's content array.""" + + delta: str + """The transcript delta.""" + + event_id: str + """The unique ID of the server event.""" + + item_id: str + """The ID of the item.""" + + output_index: int + """The index of the output item in the response.""" + + response_id: str + """The ID of the response.""" + + type: Literal["response.audio_transcript.delta"] + """The event type, must be `response.audio_transcript.delta`.""" diff --git a/src/openai/types/beta/realtime/response_audio_transcript_done_event.py b/src/openai/types/beta/realtime/response_audio_transcript_done_event.py new file mode 100644 index 0000000000..4e4436a95f --- /dev/null +++ b/src/openai/types/beta/realtime/response_audio_transcript_done_event.py @@ -0,0 +1,30 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["ResponseAudioTranscriptDoneEvent"] + + +class ResponseAudioTranscriptDoneEvent(BaseModel): + content_index: int + """The index of the content part in the item's content array.""" + + event_id: str + """The unique ID of the server event.""" + + item_id: str + """The ID of the item.""" + + output_index: int + """The index of the output item in the response.""" + + response_id: str + """The ID of the response.""" + + transcript: str + """The final transcript of the audio.""" + + type: Literal["response.audio_transcript.done"] + """The event type, must be `response.audio_transcript.done`.""" diff --git a/src/openai/types/beta/realtime/response_cancel_event.py b/src/openai/types/beta/realtime/response_cancel_event.py new file mode 100644 index 0000000000..c5ff991e9a --- /dev/null +++ b/src/openai/types/beta/realtime/response_cancel_event.py @@ -0,0 +1,22 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["ResponseCancelEvent"] + + +class ResponseCancelEvent(BaseModel): + type: Literal["response.cancel"] + """The event type, must be `response.cancel`.""" + + event_id: Optional[str] = None + """Optional client-generated ID used to identify this event.""" + + response_id: Optional[str] = None + """ + A specific response ID to cancel - if not provided, will cancel an in-progress + response in the default conversation. + """ diff --git a/src/openai/types/beta/realtime/response_cancel_event_param.py b/src/openai/types/beta/realtime/response_cancel_event_param.py new file mode 100644 index 0000000000..f33740730a --- /dev/null +++ b/src/openai/types/beta/realtime/response_cancel_event_param.py @@ -0,0 +1,21 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Literal, Required, TypedDict + +__all__ = ["ResponseCancelEventParam"] + + +class ResponseCancelEventParam(TypedDict, total=False): + type: Required[Literal["response.cancel"]] + """The event type, must be `response.cancel`.""" + + event_id: str + """Optional client-generated ID used to identify this event.""" + + response_id: str + """ + A specific response ID to cancel - if not provided, will cancel an in-progress + response in the default conversation. + """ diff --git a/src/openai/types/beta/realtime/response_content_part_added_event.py b/src/openai/types/beta/realtime/response_content_part_added_event.py new file mode 100644 index 0000000000..45c8f20f97 --- /dev/null +++ b/src/openai/types/beta/realtime/response_content_part_added_event.py @@ -0,0 +1,45 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["ResponseContentPartAddedEvent", "Part"] + + +class Part(BaseModel): + audio: Optional[str] = None + """Base64-encoded audio data (if type is "audio").""" + + text: Optional[str] = None + """The text content (if type is "text").""" + + transcript: Optional[str] = None + """The transcript of the audio (if type is "audio").""" + + type: Optional[Literal["text", "audio"]] = None + """The content type ("text", "audio").""" + + +class ResponseContentPartAddedEvent(BaseModel): + content_index: int + """The index of the content part in the item's content array.""" + + event_id: str + """The unique ID of the server event.""" + + item_id: str + """The ID of the item to which the content part was added.""" + + output_index: int + """The index of the output item in the response.""" + + part: Part + """The content part that was added.""" + + response_id: str + """The ID of the response.""" + + type: Literal["response.content_part.added"] + """The event type, must be `response.content_part.added`.""" diff --git a/src/openai/types/beta/realtime/response_content_part_done_event.py b/src/openai/types/beta/realtime/response_content_part_done_event.py new file mode 100644 index 0000000000..3d16116106 --- /dev/null +++ b/src/openai/types/beta/realtime/response_content_part_done_event.py @@ -0,0 +1,45 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["ResponseContentPartDoneEvent", "Part"] + + +class Part(BaseModel): + audio: Optional[str] = None + """Base64-encoded audio data (if type is "audio").""" + + text: Optional[str] = None + """The text content (if type is "text").""" + + transcript: Optional[str] = None + """The transcript of the audio (if type is "audio").""" + + type: Optional[Literal["text", "audio"]] = None + """The content type ("text", "audio").""" + + +class ResponseContentPartDoneEvent(BaseModel): + content_index: int + """The index of the content part in the item's content array.""" + + event_id: str + """The unique ID of the server event.""" + + item_id: str + """The ID of the item.""" + + output_index: int + """The index of the output item in the response.""" + + part: Part + """The content part that is done.""" + + response_id: str + """The ID of the response.""" + + type: Literal["response.content_part.done"] + """The event type, must be `response.content_part.done`.""" diff --git a/src/openai/types/beta/realtime/response_create_event.py b/src/openai/types/beta/realtime/response_create_event.py new file mode 100644 index 0000000000..00ba1e5dad --- /dev/null +++ b/src/openai/types/beta/realtime/response_create_event.py @@ -0,0 +1,115 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Union, Optional +from typing_extensions import Literal + +from ...._models import BaseModel +from .conversation_item import ConversationItem + +__all__ = ["ResponseCreateEvent", "Response", "ResponseTool"] + + +class ResponseTool(BaseModel): + description: Optional[str] = None + """ + The description of the function, including guidance on when and how to call it, + and guidance about what to tell the user when calling (if anything). + """ + + name: Optional[str] = None + """The name of the function.""" + + parameters: Optional[object] = None + """Parameters of the function in JSON Schema.""" + + type: Optional[Literal["function"]] = None + """The type of the tool, i.e. `function`.""" + + +class Response(BaseModel): + conversation: Union[str, Literal["auto", "none"], None] = None + """Controls which conversation the response is added to. + + Currently supports `auto` and `none`, with `auto` as the default value. The + `auto` value means that the contents of the response will be added to the + default conversation. Set this to `none` to create an out-of-band response which + will not add items to default conversation. + """ + + input: Optional[List[ConversationItem]] = None + """Input items to include in the prompt for the model. + + Creates a new context for this response, without including the default + conversation. Can include references to items from the default conversation. + """ + + instructions: Optional[str] = None + """The default system instructions (i.e. + + system message) prepended to model calls. This field allows the client to guide + the model on desired responses. The model can be instructed on response content + and format, (e.g. "be extremely succinct", "act friendly", "here are examples of + good responses") and on audio behavior (e.g. "talk quickly", "inject emotion + into your voice", "laugh frequently"). The instructions are not guaranteed to be + followed by the model, but they provide guidance to the model on the desired + behavior. + + Note that the server sets default instructions which will be used if this field + is not set and are visible in the `session.created` event at the start of the + session. + """ + + max_response_output_tokens: Union[int, Literal["inf"], None] = None + """ + Maximum number of output tokens for a single assistant response, inclusive of + tool calls. Provide an integer between 1 and 4096 to limit output tokens, or + `inf` for the maximum available tokens for a given model. Defaults to `inf`. + """ + + metadata: Optional[object] = None + """Set of 16 key-value pairs that can be attached to an object. + + This can be useful for storing additional information about the object in a + structured format. Keys can be a maximum of 64 characters long and values can be + a maximum of 512 characters long. + """ + + modalities: Optional[List[Literal["text", "audio"]]] = None + """The set of modalities the model can respond with. + + To disable audio, set this to ["text"]. + """ + + output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None + """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.""" + + temperature: Optional[float] = None + """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.""" + + tool_choice: Optional[str] = None + """How the model chooses tools. + + Options are `auto`, `none`, `required`, or specify a function. + """ + + tools: Optional[List[ResponseTool]] = None + """Tools (functions) available to the model.""" + + voice: Optional[Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]] = None + """The voice the model uses to respond. + + Voice cannot be changed during the session once the model has responded with + audio at least once. Current voice options are `alloy`, `ash`, `ballad`, + `coral`, `echo` `sage`, `shimmer` and `verse`. + """ + + +class ResponseCreateEvent(BaseModel): + type: Literal["response.create"] + """The event type, must be `response.create`.""" + + event_id: Optional[str] = None + """Optional client-generated ID used to identify this event.""" + + response: Optional[Response] = None + """Create a new Realtime response with these parameters""" diff --git a/src/openai/types/beta/realtime/response_create_event_param.py b/src/openai/types/beta/realtime/response_create_event_param.py new file mode 100644 index 0000000000..7c92b32df1 --- /dev/null +++ b/src/openai/types/beta/realtime/response_create_event_param.py @@ -0,0 +1,116 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import List, Union, Iterable, Optional +from typing_extensions import Literal, Required, TypedDict + +from .conversation_item_param import ConversationItemParam + +__all__ = ["ResponseCreateEventParam", "Response", "ResponseTool"] + + +class ResponseTool(TypedDict, total=False): + description: str + """ + The description of the function, including guidance on when and how to call it, + and guidance about what to tell the user when calling (if anything). + """ + + name: str + """The name of the function.""" + + parameters: object + """Parameters of the function in JSON Schema.""" + + type: Literal["function"] + """The type of the tool, i.e. `function`.""" + + +class Response(TypedDict, total=False): + conversation: Union[str, Literal["auto", "none"]] + """Controls which conversation the response is added to. + + Currently supports `auto` and `none`, with `auto` as the default value. The + `auto` value means that the contents of the response will be added to the + default conversation. Set this to `none` to create an out-of-band response which + will not add items to default conversation. + """ + + input: Iterable[ConversationItemParam] + """Input items to include in the prompt for the model. + + Creates a new context for this response, without including the default + conversation. Can include references to items from the default conversation. + """ + + instructions: str + """The default system instructions (i.e. + + system message) prepended to model calls. This field allows the client to guide + the model on desired responses. The model can be instructed on response content + and format, (e.g. "be extremely succinct", "act friendly", "here are examples of + good responses") and on audio behavior (e.g. "talk quickly", "inject emotion + into your voice", "laugh frequently"). The instructions are not guaranteed to be + followed by the model, but they provide guidance to the model on the desired + behavior. + + Note that the server sets default instructions which will be used if this field + is not set and are visible in the `session.created` event at the start of the + session. + """ + + max_response_output_tokens: Union[int, Literal["inf"]] + """ + Maximum number of output tokens for a single assistant response, inclusive of + tool calls. Provide an integer between 1 and 4096 to limit output tokens, or + `inf` for the maximum available tokens for a given model. Defaults to `inf`. + """ + + metadata: Optional[object] + """Set of 16 key-value pairs that can be attached to an object. + + This can be useful for storing additional information about the object in a + structured format. Keys can be a maximum of 64 characters long and values can be + a maximum of 512 characters long. + """ + + modalities: List[Literal["text", "audio"]] + """The set of modalities the model can respond with. + + To disable audio, set this to ["text"]. + """ + + output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] + """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.""" + + temperature: float + """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.""" + + tool_choice: str + """How the model chooses tools. + + Options are `auto`, `none`, `required`, or specify a function. + """ + + tools: Iterable[ResponseTool] + """Tools (functions) available to the model.""" + + voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] + """The voice the model uses to respond. + + Voice cannot be changed during the session once the model has responded with + audio at least once. Current voice options are `alloy`, `ash`, `ballad`, + `coral`, `echo` `sage`, `shimmer` and `verse`. + """ + + +class ResponseCreateEventParam(TypedDict, total=False): + type: Required[Literal["response.create"]] + """The event type, must be `response.create`.""" + + event_id: str + """Optional client-generated ID used to identify this event.""" + + response: Response + """Create a new Realtime response with these parameters""" diff --git a/src/openai/types/beta/realtime/response_created_event.py b/src/openai/types/beta/realtime/response_created_event.py new file mode 100644 index 0000000000..a4990cf095 --- /dev/null +++ b/src/openai/types/beta/realtime/response_created_event.py @@ -0,0 +1,19 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ...._models import BaseModel +from .realtime_response import RealtimeResponse + +__all__ = ["ResponseCreatedEvent"] + + +class ResponseCreatedEvent(BaseModel): + event_id: str + """The unique ID of the server event.""" + + response: RealtimeResponse + """The response resource.""" + + type: Literal["response.created"] + """The event type, must be `response.created`.""" diff --git a/src/openai/types/beta/realtime/response_done_event.py b/src/openai/types/beta/realtime/response_done_event.py new file mode 100644 index 0000000000..9e655184b6 --- /dev/null +++ b/src/openai/types/beta/realtime/response_done_event.py @@ -0,0 +1,19 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ...._models import BaseModel +from .realtime_response import RealtimeResponse + +__all__ = ["ResponseDoneEvent"] + + +class ResponseDoneEvent(BaseModel): + event_id: str + """The unique ID of the server event.""" + + response: RealtimeResponse + """The response resource.""" + + type: Literal["response.done"] + """The event type, must be `response.done`.""" diff --git a/src/openai/types/beta/realtime/response_function_call_arguments_delta_event.py b/src/openai/types/beta/realtime/response_function_call_arguments_delta_event.py new file mode 100644 index 0000000000..cdbb64e658 --- /dev/null +++ b/src/openai/types/beta/realtime/response_function_call_arguments_delta_event.py @@ -0,0 +1,30 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["ResponseFunctionCallArgumentsDeltaEvent"] + + +class ResponseFunctionCallArgumentsDeltaEvent(BaseModel): + call_id: str + """The ID of the function call.""" + + delta: str + """The arguments delta as a JSON string.""" + + event_id: str + """The unique ID of the server event.""" + + item_id: str + """The ID of the function call item.""" + + output_index: int + """The index of the output item in the response.""" + + response_id: str + """The ID of the response.""" + + type: Literal["response.function_call_arguments.delta"] + """The event type, must be `response.function_call_arguments.delta`.""" diff --git a/src/openai/types/beta/realtime/response_function_call_arguments_done_event.py b/src/openai/types/beta/realtime/response_function_call_arguments_done_event.py new file mode 100644 index 0000000000..0a5db53323 --- /dev/null +++ b/src/openai/types/beta/realtime/response_function_call_arguments_done_event.py @@ -0,0 +1,30 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["ResponseFunctionCallArgumentsDoneEvent"] + + +class ResponseFunctionCallArgumentsDoneEvent(BaseModel): + arguments: str + """The final arguments as a JSON string.""" + + call_id: str + """The ID of the function call.""" + + event_id: str + """The unique ID of the server event.""" + + item_id: str + """The ID of the function call item.""" + + output_index: int + """The index of the output item in the response.""" + + response_id: str + """The ID of the response.""" + + type: Literal["response.function_call_arguments.done"] + """The event type, must be `response.function_call_arguments.done`.""" diff --git a/src/openai/types/beta/realtime/response_output_item_added_event.py b/src/openai/types/beta/realtime/response_output_item_added_event.py new file mode 100644 index 0000000000..c89bfdc3be --- /dev/null +++ b/src/openai/types/beta/realtime/response_output_item_added_event.py @@ -0,0 +1,25 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ...._models import BaseModel +from .conversation_item import ConversationItem + +__all__ = ["ResponseOutputItemAddedEvent"] + + +class ResponseOutputItemAddedEvent(BaseModel): + event_id: str + """The unique ID of the server event.""" + + item: ConversationItem + """The item to add to the conversation.""" + + output_index: int + """The index of the output item in the Response.""" + + response_id: str + """The ID of the Response to which the item belongs.""" + + type: Literal["response.output_item.added"] + """The event type, must be `response.output_item.added`.""" diff --git a/src/openai/types/beta/realtime/response_output_item_done_event.py b/src/openai/types/beta/realtime/response_output_item_done_event.py new file mode 100644 index 0000000000..b5910e22aa --- /dev/null +++ b/src/openai/types/beta/realtime/response_output_item_done_event.py @@ -0,0 +1,25 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ...._models import BaseModel +from .conversation_item import ConversationItem + +__all__ = ["ResponseOutputItemDoneEvent"] + + +class ResponseOutputItemDoneEvent(BaseModel): + event_id: str + """The unique ID of the server event.""" + + item: ConversationItem + """The item to add to the conversation.""" + + output_index: int + """The index of the output item in the Response.""" + + response_id: str + """The ID of the Response to which the item belongs.""" + + type: Literal["response.output_item.done"] + """The event type, must be `response.output_item.done`.""" diff --git a/src/openai/types/beta/realtime/response_text_delta_event.py b/src/openai/types/beta/realtime/response_text_delta_event.py new file mode 100644 index 0000000000..c463b3c3d0 --- /dev/null +++ b/src/openai/types/beta/realtime/response_text_delta_event.py @@ -0,0 +1,30 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["ResponseTextDeltaEvent"] + + +class ResponseTextDeltaEvent(BaseModel): + content_index: int + """The index of the content part in the item's content array.""" + + delta: str + """The text delta.""" + + event_id: str + """The unique ID of the server event.""" + + item_id: str + """The ID of the item.""" + + output_index: int + """The index of the output item in the response.""" + + response_id: str + """The ID of the response.""" + + type: Literal["response.text.delta"] + """The event type, must be `response.text.delta`.""" diff --git a/src/openai/types/beta/realtime/response_text_done_event.py b/src/openai/types/beta/realtime/response_text_done_event.py new file mode 100644 index 0000000000..020ff41d58 --- /dev/null +++ b/src/openai/types/beta/realtime/response_text_done_event.py @@ -0,0 +1,30 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["ResponseTextDoneEvent"] + + +class ResponseTextDoneEvent(BaseModel): + content_index: int + """The index of the content part in the item's content array.""" + + event_id: str + """The unique ID of the server event.""" + + item_id: str + """The ID of the item.""" + + output_index: int + """The index of the output item in the response.""" + + response_id: str + """The ID of the response.""" + + text: str + """The final text content.""" + + type: Literal["response.text.done"] + """The event type, must be `response.text.done`.""" diff --git a/src/openai/types/beta/realtime/session.py b/src/openai/types/beta/realtime/session.py new file mode 100644 index 0000000000..09cdbb02bc --- /dev/null +++ b/src/openai/types/beta/realtime/session.py @@ -0,0 +1,148 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Union, Optional +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["Session", "InputAudioTranscription", "Tool", "TurnDetection"] + + +class InputAudioTranscription(BaseModel): + model: Optional[str] = None + """ + The model to use for transcription, `whisper-1` is the only currently supported + model. + """ + + +class Tool(BaseModel): + description: Optional[str] = None + """ + The description of the function, including guidance on when and how to call it, + and guidance about what to tell the user when calling (if anything). + """ + + name: Optional[str] = None + """The name of the function.""" + + parameters: Optional[object] = None + """Parameters of the function in JSON Schema.""" + + type: Optional[Literal["function"]] = None + """The type of the tool, i.e. `function`.""" + + +class TurnDetection(BaseModel): + prefix_padding_ms: Optional[int] = None + """Amount of audio to include before the VAD detected speech (in milliseconds). + + Defaults to 300ms. + """ + + silence_duration_ms: Optional[int] = None + """Duration of silence to detect speech stop (in milliseconds). + + Defaults to 500ms. With shorter values the model will respond more quickly, but + may jump in on short pauses from the user. + """ + + threshold: Optional[float] = None + """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. + + A higher threshold will require louder audio to activate the model, and thus + might perform better in noisy environments. + """ + + type: Optional[Literal["server_vad"]] = None + """Type of turn detection, only `server_vad` is currently supported.""" + + +class Session(BaseModel): + id: Optional[str] = None + """Unique identifier for the session object.""" + + input_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None + """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.""" + + input_audio_transcription: Optional[InputAudioTranscription] = None + """ + Configuration for input audio transcription, defaults to off and can be set to + `null` to turn off once on. Input audio transcription is not native to the + model, since the model consumes audio directly. Transcription runs + asynchronously through Whisper and should be treated as rough guidance rather + than the representation understood by the model. + """ + + instructions: Optional[str] = None + """The default system instructions (i.e. + + system message) prepended to model calls. This field allows the client to guide + the model on desired responses. The model can be instructed on response content + and format, (e.g. "be extremely succinct", "act friendly", "here are examples of + good responses") and on audio behavior (e.g. "talk quickly", "inject emotion + into your voice", "laugh frequently"). The instructions are not guaranteed to be + followed by the model, but they provide guidance to the model on the desired + behavior. + + Note that the server sets default instructions which will be used if this field + is not set and are visible in the `session.created` event at the start of the + session. + """ + + max_response_output_tokens: Union[int, Literal["inf"], None] = None + """ + Maximum number of output tokens for a single assistant response, inclusive of + tool calls. Provide an integer between 1 and 4096 to limit output tokens, or + `inf` for the maximum available tokens for a given model. Defaults to `inf`. + """ + + modalities: Optional[List[Literal["text", "audio"]]] = None + """The set of modalities the model can respond with. + + To disable audio, set this to ["text"]. + """ + + model: Union[ + str, + Literal[ + "gpt-4o-realtime-preview", + "gpt-4o-realtime-preview-2024-10-01", + "gpt-4o-realtime-preview-2024-12-17", + "gpt-4o-mini-realtime-preview", + "gpt-4o-mini-realtime-preview-2024-12-17", + ], + None, + ] = None + """The Realtime model used for this session.""" + + output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None + """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.""" + + temperature: Optional[float] = None + """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.""" + + tool_choice: Optional[str] = None + """How the model chooses tools. + + Options are `auto`, `none`, `required`, or specify a function. + """ + + tools: Optional[List[Tool]] = None + """Tools (functions) available to the model.""" + + turn_detection: Optional[TurnDetection] = None + """Configuration for turn detection. + + Can be set to `null` to turn off. Server VAD means that the model will detect + the start and end of speech based on audio volume and respond at the end of user + speech. + """ + + voice: Optional[Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]] = None + """The voice the model uses to respond. + + Voice cannot be changed during the session once the model has responded with + audio at least once. Current voice options are `alloy`, `ash`, `ballad`, + `coral`, `echo` `sage`, `shimmer` and `verse`. + """ diff --git a/src/openai/types/beta/realtime/session_created_event.py b/src/openai/types/beta/realtime/session_created_event.py new file mode 100644 index 0000000000..baf6af388b --- /dev/null +++ b/src/openai/types/beta/realtime/session_created_event.py @@ -0,0 +1,19 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from .session import Session +from ...._models import BaseModel + +__all__ = ["SessionCreatedEvent"] + + +class SessionCreatedEvent(BaseModel): + event_id: str + """The unique ID of the server event.""" + + session: Session + """Realtime session object configuration.""" + + type: Literal["session.created"] + """The event type, must be `session.created`.""" diff --git a/src/openai/types/beta/realtime/session_update_event.py b/src/openai/types/beta/realtime/session_update_event.py new file mode 100644 index 0000000000..c04220aa25 --- /dev/null +++ b/src/openai/types/beta/realtime/session_update_event.py @@ -0,0 +1,158 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Union, Optional +from typing_extensions import Literal + +from ...._models import BaseModel + +__all__ = ["SessionUpdateEvent", "Session", "SessionInputAudioTranscription", "SessionTool", "SessionTurnDetection"] + + +class SessionInputAudioTranscription(BaseModel): + model: Optional[str] = None + """ + The model to use for transcription, `whisper-1` is the only currently supported + model. + """ + + +class SessionTool(BaseModel): + description: Optional[str] = None + """ + The description of the function, including guidance on when and how to call it, + and guidance about what to tell the user when calling (if anything). + """ + + name: Optional[str] = None + """The name of the function.""" + + parameters: Optional[object] = None + """Parameters of the function in JSON Schema.""" + + type: Optional[Literal["function"]] = None + """The type of the tool, i.e. `function`.""" + + +class SessionTurnDetection(BaseModel): + create_response: Optional[bool] = None + """Whether or not to automatically generate a response when VAD is enabled. + + `true` by default. + """ + + prefix_padding_ms: Optional[int] = None + """Amount of audio to include before the VAD detected speech (in milliseconds). + + Defaults to 300ms. + """ + + silence_duration_ms: Optional[int] = None + """Duration of silence to detect speech stop (in milliseconds). + + Defaults to 500ms. With shorter values the model will respond more quickly, but + may jump in on short pauses from the user. + """ + + threshold: Optional[float] = None + """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. + + A higher threshold will require louder audio to activate the model, and thus + might perform better in noisy environments. + """ + + type: Optional[str] = None + """Type of turn detection, only `server_vad` is currently supported.""" + + +class Session(BaseModel): + model: Literal[ + "gpt-4o-realtime-preview", + "gpt-4o-realtime-preview-2024-10-01", + "gpt-4o-realtime-preview-2024-12-17", + "gpt-4o-mini-realtime-preview", + "gpt-4o-mini-realtime-preview-2024-12-17", + ] + """The Realtime model used for this session.""" + + input_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None + """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.""" + + input_audio_transcription: Optional[SessionInputAudioTranscription] = None + """ + Configuration for input audio transcription, defaults to off and can be set to + `null` to turn off once on. Input audio transcription is not native to the + model, since the model consumes audio directly. Transcription runs + asynchronously through Whisper and should be treated as rough guidance rather + than the representation understood by the model. + """ + + instructions: Optional[str] = None + """The default system instructions (i.e. + + system message) prepended to model calls. This field allows the client to guide + the model on desired responses. The model can be instructed on response content + and format, (e.g. "be extremely succinct", "act friendly", "here are examples of + good responses") and on audio behavior (e.g. "talk quickly", "inject emotion + into your voice", "laugh frequently"). The instructions are not guaranteed to be + followed by the model, but they provide guidance to the model on the desired + behavior. + + Note that the server sets default instructions which will be used if this field + is not set and are visible in the `session.created` event at the start of the + session. + """ + + max_response_output_tokens: Union[int, Literal["inf"], None] = None + """ + Maximum number of output tokens for a single assistant response, inclusive of + tool calls. Provide an integer between 1 and 4096 to limit output tokens, or + `inf` for the maximum available tokens for a given model. Defaults to `inf`. + """ + + modalities: Optional[List[Literal["text", "audio"]]] = None + """The set of modalities the model can respond with. + + To disable audio, set this to ["text"]. + """ + + output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None + """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.""" + + temperature: Optional[float] = None + """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.""" + + tool_choice: Optional[str] = None + """How the model chooses tools. + + Options are `auto`, `none`, `required`, or specify a function. + """ + + tools: Optional[List[SessionTool]] = None + """Tools (functions) available to the model.""" + + turn_detection: Optional[SessionTurnDetection] = None + """Configuration for turn detection. + + Can be set to `null` to turn off. Server VAD means that the model will detect + the start and end of speech based on audio volume and respond at the end of user + speech. + """ + + voice: Optional[Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]] = None + """The voice the model uses to respond. + + Voice cannot be changed during the session once the model has responded with + audio at least once. Current voice options are `alloy`, `ash`, `ballad`, + `coral`, `echo` `sage`, `shimmer` and `verse`. + """ + + +class SessionUpdateEvent(BaseModel): + session: Session + """Realtime session object configuration.""" + + type: Literal["session.update"] + """The event type, must be `session.update`.""" + + event_id: Optional[str] = None + """Optional client-generated ID used to identify this event.""" diff --git a/src/openai/types/beta/realtime/session_update_event_param.py b/src/openai/types/beta/realtime/session_update_event_param.py new file mode 100644 index 0000000000..aa06069b04 --- /dev/null +++ b/src/openai/types/beta/realtime/session_update_event_param.py @@ -0,0 +1,166 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import List, Union, Iterable +from typing_extensions import Literal, Required, TypedDict + +__all__ = [ + "SessionUpdateEventParam", + "Session", + "SessionInputAudioTranscription", + "SessionTool", + "SessionTurnDetection", +] + + +class SessionInputAudioTranscription(TypedDict, total=False): + model: str + """ + The model to use for transcription, `whisper-1` is the only currently supported + model. + """ + + +class SessionTool(TypedDict, total=False): + description: str + """ + The description of the function, including guidance on when and how to call it, + and guidance about what to tell the user when calling (if anything). + """ + + name: str + """The name of the function.""" + + parameters: object + """Parameters of the function in JSON Schema.""" + + type: Literal["function"] + """The type of the tool, i.e. `function`.""" + + +class SessionTurnDetection(TypedDict, total=False): + create_response: bool + """Whether or not to automatically generate a response when VAD is enabled. + + `true` by default. + """ + + prefix_padding_ms: int + """Amount of audio to include before the VAD detected speech (in milliseconds). + + Defaults to 300ms. + """ + + silence_duration_ms: int + """Duration of silence to detect speech stop (in milliseconds). + + Defaults to 500ms. With shorter values the model will respond more quickly, but + may jump in on short pauses from the user. + """ + + threshold: float + """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. + + A higher threshold will require louder audio to activate the model, and thus + might perform better in noisy environments. + """ + + type: str + """Type of turn detection, only `server_vad` is currently supported.""" + + +class Session(TypedDict, total=False): + model: Required[ + Literal[ + "gpt-4o-realtime-preview", + "gpt-4o-realtime-preview-2024-10-01", + "gpt-4o-realtime-preview-2024-12-17", + "gpt-4o-mini-realtime-preview", + "gpt-4o-mini-realtime-preview-2024-12-17", + ] + ] + """The Realtime model used for this session.""" + + input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] + """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.""" + + input_audio_transcription: SessionInputAudioTranscription + """ + Configuration for input audio transcription, defaults to off and can be set to + `null` to turn off once on. Input audio transcription is not native to the + model, since the model consumes audio directly. Transcription runs + asynchronously through Whisper and should be treated as rough guidance rather + than the representation understood by the model. + """ + + instructions: str + """The default system instructions (i.e. + + system message) prepended to model calls. This field allows the client to guide + the model on desired responses. The model can be instructed on response content + and format, (e.g. "be extremely succinct", "act friendly", "here are examples of + good responses") and on audio behavior (e.g. "talk quickly", "inject emotion + into your voice", "laugh frequently"). The instructions are not guaranteed to be + followed by the model, but they provide guidance to the model on the desired + behavior. + + Note that the server sets default instructions which will be used if this field + is not set and are visible in the `session.created` event at the start of the + session. + """ + + max_response_output_tokens: Union[int, Literal["inf"]] + """ + Maximum number of output tokens for a single assistant response, inclusive of + tool calls. Provide an integer between 1 and 4096 to limit output tokens, or + `inf` for the maximum available tokens for a given model. Defaults to `inf`. + """ + + modalities: List[Literal["text", "audio"]] + """The set of modalities the model can respond with. + + To disable audio, set this to ["text"]. + """ + + output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] + """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.""" + + temperature: float + """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.""" + + tool_choice: str + """How the model chooses tools. + + Options are `auto`, `none`, `required`, or specify a function. + """ + + tools: Iterable[SessionTool] + """Tools (functions) available to the model.""" + + turn_detection: SessionTurnDetection + """Configuration for turn detection. + + Can be set to `null` to turn off. Server VAD means that the model will detect + the start and end of speech based on audio volume and respond at the end of user + speech. + """ + + voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] + """The voice the model uses to respond. + + Voice cannot be changed during the session once the model has responded with + audio at least once. Current voice options are `alloy`, `ash`, `ballad`, + `coral`, `echo` `sage`, `shimmer` and `verse`. + """ + + +class SessionUpdateEventParam(TypedDict, total=False): + session: Required[Session] + """Realtime session object configuration.""" + + type: Required[Literal["session.update"]] + """The event type, must be `session.update`.""" + + event_id: str + """Optional client-generated ID used to identify this event.""" diff --git a/src/openai/types/beta/realtime/session_updated_event.py b/src/openai/types/beta/realtime/session_updated_event.py new file mode 100644 index 0000000000..b9b6488eb3 --- /dev/null +++ b/src/openai/types/beta/realtime/session_updated_event.py @@ -0,0 +1,19 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from .session import Session +from ...._models import BaseModel + +__all__ = ["SessionUpdatedEvent"] + + +class SessionUpdatedEvent(BaseModel): + event_id: str + """The unique ID of the server event.""" + + session: Session + """Realtime session object configuration.""" + + type: Literal["session.updated"] + """The event type, must be `session.updated`.""" diff --git a/src/openai/types/websocket_connection_options.py b/src/openai/types/websocket_connection_options.py new file mode 100644 index 0000000000..40fd24ab03 --- /dev/null +++ b/src/openai/types/websocket_connection_options.py @@ -0,0 +1,36 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import TYPE_CHECKING +from typing_extensions import Sequence, TypedDict + +if TYPE_CHECKING: + from websockets import Subprotocol + from websockets.extensions import ClientExtensionFactory + + +class WebsocketConnectionOptions(TypedDict, total=False): + """Websocket connection options copied from `websockets`. + + For example: https://websockets.readthedocs.io/en/stable/reference/asyncio/client.html#websockets.asyncio.client.connect + """ + + extensions: Sequence[ClientExtensionFactory] | None + """List of supported extensions, in order in which they should be negotiated and run.""" + + subprotocols: Sequence[Subprotocol] | None + """List of supported subprotocols, in order of decreasing preference.""" + + compression: str | None + """The “permessage-deflate” extension is enabled by default. Set compression to None to disable it. See the [compression guide](https://websockets.readthedocs.io/en/stable/topics/compression.html) for details.""" + + # limits + max_size: int | None + """Maximum size of incoming messages in bytes. None disables the limit.""" + + max_queue: int | None | tuple[int | None, int | None] + """High-water mark of the buffer where frames are received. It defaults to 16 frames. The low-water mark defaults to max_queue // 4. You may pass a (high, low) tuple to set the high-water and low-water marks. If you want to disable flow control entirely, you may set it to None, although that’s a bad idea.""" + + write_limit: int | tuple[int, int | None] + """High-water mark of write buffer in bytes. It is passed to set_write_buffer_limits(). It defaults to 32 KiB. You may pass a (high, low) tuple to set the high-water and low-water marks.""" diff --git a/tests/api_resources/beta/test_realtime.py b/tests/api_resources/beta/test_realtime.py new file mode 100644 index 0000000000..537017ffd3 --- /dev/null +++ b/tests/api_resources/beta/test_realtime.py @@ -0,0 +1,17 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import os + +import pytest + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + + +class TestRealtime: + parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) + + +class TestAsyncRealtime: + parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])