diff --git a/docs/speech-usage.rst b/docs/speech-usage.rst index de05fce3973e..27c1309beda1 100644 --- a/docs/speech-usage.rst +++ b/docs/speech-usage.rst @@ -34,10 +34,10 @@ create an instance of :class:`~google.cloud.speech.client.Client`. Asynchronous Recognition ------------------------ -The :meth:`~google.cloud.speech.Client.async_recognize` sends audio data to the -Speech API and initiates a Long Running Operation. Using this operation, you -can periodically poll for recognition results. Use asynchronous requests for -audio data of any duration up to 80 minutes. +The :meth:`~google.cloud.speech.Client.long_running_recognize` sends audio +data to the Speech API and initiates a Long Running Operation. Using this +operation, you can periodically poll for recognition results. Use asynchronous +requests for audio data of any duration up to 80 minutes. .. note:: @@ -54,8 +54,11 @@ See: `Speech Asynchronous Recognize`_ >>> client = speech.Client() >>> sample = client.sample(source_uri='gs://my-bucket/recording.flac', ... encoding=speech.Encoding.LINEAR16, - ... sample_rate=44100) - >>> operation = sample.async_recognize(max_alternatives=2) + ... sample_rate_hertz=44100) + >>> operation = sample.long_running_recognize( + ... language_code='en-US', + ... max_alternatives=2, + ... ) >>> retry_count = 100 >>> while retry_count > 0 and not operation.complete: ... retry_count -= 1 @@ -76,7 +79,7 @@ See: `Speech Asynchronous Recognize`_ Synchronous Recognition ----------------------- -The :meth:`~google.cloud.speech.Client.sync_recognize` method converts speech +The :meth:`~google.cloud.speech.Client.recognize` method converts speech data to text and returns alternative text transcriptions. This example uses ``language_code='en-GB'`` to better recognize a dialect from @@ -88,8 +91,8 @@ Great Britain. >>> client = speech.Client() >>> sample = client.sample(source_uri='gs://my-bucket/recording.flac', ... encoding=speech.Encoding.FLAC, - ... sample_rate=44100) - >>> results = sample.sync_recognize( + ... sample_rate_hertz=44100) + >>> results = sample.recognize( ... language_code='en-GB', max_alternatives=2) >>> for result in results: ... for alternative in result.alternatives: @@ -111,9 +114,12 @@ Example of using the profanity filter. >>> client = speech.Client() >>> sample = client.sample(source_uri='gs://my-bucket/recording.flac', ... encoding=speech.Encoding.FLAC, - ... sample_rate=44100) - >>> results = sample.sync_recognize(max_alternatives=1, - ... profanity_filter=True) + ... sample_rate_hertz=44100) + >>> results = sample.recognize( + ... language_code='en-US', + ... max_alternatives=1, + ... profanity_filter=True, + ... ) >>> for result in results: ... for alternative in result.alternatives: ... print('=' * 20) @@ -133,10 +139,13 @@ words to the vocabulary of the recognizer. >>> client = speech.Client() >>> sample = client.sample(source_uri='gs://my-bucket/recording.flac', ... encoding=speech.Encoding.FLAC, - ... sample_rate=44100) + ... sample_rate_hertz=44100) >>> hints = ['hi', 'good afternoon'] - >>> results = sample.sync_recognize(max_alternatives=2, - ... speech_context=hints) + >>> results = sample.recognize( + ... language_code='en-US', + ... max_alternatives=2, + ... speech_context=hints, + ... ) >>> for result in results: ... for alternative in result.alternatives: ... print('=' * 20) @@ -165,8 +174,8 @@ speech data to possible text alternatives on the fly. >>> with open('./hello.wav', 'rb') as stream: ... sample = client.sample(stream=stream, ... encoding=speech.Encoding.LINEAR16, - ... sample_rate=16000) - ... results = sample.streaming_recognize() + ... sample_rate_hertz=16000) + ... results = sample.streaming_recognize(language_code='en-US') ... for result in results: ... for alternative in result.alternatives: ... print('=' * 20) @@ -192,8 +201,11 @@ See: `Single Utterance`_ >>> with open('./hello_pause_goodbye.wav', 'rb') as stream: ... sample = client.sample(stream=stream, ... encoding=speech.Encoding.LINEAR16, - ... sample_rate=16000) - ... results = sample.streaming_recognize(single_utterance=True) + ... sample_rate_hertz=16000) + ... results = sample.streaming_recognize( + ... language_code='en-US', + ... single_utterance=True, + ... ) ... for result in results: ... for alternative in result.alternatives: ... print('=' * 20) @@ -214,7 +226,10 @@ If ``interim_results`` is set to :data:`True`, interim results ... sample = client.sample(stream=stream, ... encoding=speech.Encoding.LINEAR16, ... sample_rate=16000) - ... results = sample.streaming_recognize(interim_results=True): + ... results = sample.streaming_recognize( + ... interim_results=True, + ... language_code='en-US', + ... ) ... for result in results: ... for alternative in result.alternatives: ... print('=' * 20) diff --git a/speech/google/cloud/speech/_gax.py b/speech/google/cloud/speech/_gax.py index da822f8356cc..b643f35aa9c9 100644 --- a/speech/google/cloud/speech/_gax.py +++ b/speech/google/cloud/speech/_gax.py @@ -14,16 +14,15 @@ """GAX/GAPIC module for managing Speech API requests.""" - -from google.cloud.gapic.speech.v1beta1.speech_client import SpeechClient -from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import RecognitionAudio -from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import ( +from google.cloud.gapic.speech.v1.speech_client import SpeechClient +from google.cloud.proto.speech.v1.cloud_speech_pb2 import RecognitionAudio +from google.cloud.proto.speech.v1.cloud_speech_pb2 import ( RecognitionConfig) -from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import ( +from google.cloud.proto.speech.v1.cloud_speech_pb2 import ( SpeechContext) -from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import ( +from google.cloud.proto.speech.v1.cloud_speech_pb2 import ( StreamingRecognitionConfig) -from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import ( +from google.cloud.proto.speech.v1.cloud_speech_pb2 import ( StreamingRecognizeRequest) from google.longrunning import operations_grpc @@ -62,23 +61,22 @@ def __init__(self, client=None): OPERATIONS_API_HOST, ) - def async_recognize(self, sample, language_code=None, - max_alternatives=None, profanity_filter=None, - speech_context=None): - """Asychronous Recognize request to Google Speech API. + def long_running_recognize(self, sample, language_code, + max_alternatives=None, profanity_filter=None, + speech_contexts=()): + """Long-running Recognize request to Google Speech API. - .. _async_recognize: https://cloud.google.com/speech/reference/\ - rest/v1beta1/speech/asyncrecognize + .. _long_running_recognize: https://cloud.google.com/speech/reference/\ + rest/v1/speech/longrunningrecognize - See `async_recognize`_. + See `long_running_recognize`_. :type sample: :class:`~google.cloud.speech.sample.Sample` :param sample: Instance of ``Sample`` containing audio information. :type language_code: str - :param language_code: (Optional) The language of the supplied audio as - BCP-47 language tag. Example: ``'en-GB'``. - If omitted, defaults to ``'en-US'``. + :param language_code: The language of the supplied audio as + BCP-47 language tag. Example: ``'en-US'``. :type max_alternatives: int :param max_alternatives: (Optional) Maximum number of recognition @@ -94,8 +92,8 @@ def async_recognize(self, sample, language_code=None, asterisks, e.g. ``'f***'``. If False or omitted, profanities won't be filtered out. - :type speech_context: list - :param speech_context: A list of strings (max 50) containing words and + :type speech_contexts: list + :param speech_contexts: A list of strings (max 50) containing words and phrases "hints" so that the speech recognition is more likely to recognize them. This can be used to improve the accuracy for specific words @@ -106,21 +104,27 @@ def async_recognize(self, sample, language_code=None, :returns: Instance of ``Operation`` to poll for results. """ config = RecognitionConfig( - encoding=sample.encoding, sample_rate=sample.sample_rate, - language_code=language_code, max_alternatives=max_alternatives, + encoding=sample.encoding, + language_code=language_code, + max_alternatives=max_alternatives, profanity_filter=profanity_filter, - speech_context=SpeechContext(phrases=speech_context)) + sample_rate_hertz=sample.sample_rate_hertz, + speech_contexts=[SpeechContext(phrases=speech_contexts)], + ) audio = RecognitionAudio(content=sample.content, uri=sample.source_uri) api = self._gapic_api - operation_future = api.async_recognize(config=config, audio=audio) + operation_future = api.long_running_recognize( + audio=audio, + config=config, + ) return Operation.from_pb(operation_future.last_operation_data(), self) - def streaming_recognize(self, sample, language_code=None, + def streaming_recognize(self, sample, language_code, max_alternatives=None, profanity_filter=None, - speech_context=None, single_utterance=False, + speech_contexts=(), single_utterance=False, interim_results=False): """Streaming speech recognition. @@ -136,9 +140,8 @@ def streaming_recognize(self, sample, language_code=None, :param sample: Instance of ``Sample`` containing audio information. :type language_code: str - :param language_code: (Optional) The language of the supplied audio as - BCP-47 language tag. Example: ``'en-GB'``. - If omitted, defaults to ``'en-US'``. + :param language_code: The language of the supplied audio as + BCP-47 language tag. Example: ``'en-US'``. :type max_alternatives: int :param max_alternatives: (Optional) Maximum number of recognition @@ -154,8 +157,8 @@ def streaming_recognize(self, sample, language_code=None, asterisks, e.g. ``'f***'``. If False or omitted, profanities won't be filtered out. - :type speech_context: list - :param speech_context: A list of strings (max 50) containing words and + :type speech_contexts: list + :param speech_contexts: A list of strings (max 50) containing words and phrases "hints" so that the speech recognition is more likely to recognize them. This can be used to improve the accuracy for specific words @@ -190,7 +193,7 @@ def streaming_recognize(self, sample, language_code=None, :raises: :class:`ValueError` if sample.content is not a file-like object. :class:`ValueError` if stream has closed. - :rtype: :class:`~google.cloud.grpc.speech.v1beta1\ + :rtype: :class:`~google.cloud.grpc.speech.v1\ .cloud_speech_pb2.StreamingRecognizeResponse` :returns: ``StreamingRecognizeResponse`` instances. """ @@ -200,29 +203,28 @@ def streaming_recognize(self, sample, language_code=None, requests = _stream_requests(sample, language_code=language_code, max_alternatives=max_alternatives, profanity_filter=profanity_filter, - speech_context=speech_context, + speech_contexts=speech_contexts, single_utterance=single_utterance, interim_results=interim_results) api = self._gapic_api responses = api.streaming_recognize(requests) return responses - def sync_recognize(self, sample, language_code=None, max_alternatives=None, - profanity_filter=None, speech_context=None): + def recognize(self, sample, language_code, max_alternatives=None, + profanity_filter=None, speech_contexts=()): """Synchronous Speech Recognition. - .. _sync_recognize: https://cloud.google.com/speech/reference/\ - rest/v1beta1/speech/syncrecognize + .. _recognize: https://cloud.google.com/speech/reference/\ + rest/v1/speech/recognize - See `sync_recognize`_. + See `recognize`_. :type sample: :class:`~google.cloud.speech.sample.Sample` :param sample: Instance of ``Sample`` containing audio information. :type language_code: str - :param language_code: (Optional) The language of the supplied audio as - BCP-47 language tag. Example: ``'en-GB'``. - If omitted, defaults to ``'en-US'``. + :param language_code: The language of the supplied audio as + BCP-47 language tag. Example: ``'en-US'``. :type max_alternatives: int :param max_alternatives: (Optional) Maximum number of recognition @@ -238,8 +240,8 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None, asterisks, e.g. ``'f***'``. If False or omitted, profanities won't be filtered out. - :type speech_context: list - :param speech_context: A list of strings (max 50) containing words and + :type speech_contexts: list + :param speech_contexts: A list of strings (max 50) containing words and phrases "hints" so that the speech recognition is more likely to recognize them. This can be used to improve the accuracy for specific words @@ -252,14 +254,17 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None, :raises: ValueError if there are no results. """ config = RecognitionConfig( - encoding=sample.encoding, sample_rate=sample.sample_rate, - language_code=language_code, max_alternatives=max_alternatives, + encoding=sample.encoding, + language_code=language_code, + max_alternatives=max_alternatives, profanity_filter=profanity_filter, - speech_context=SpeechContext(phrases=speech_context)) + sample_rate_hertz=sample.sample_rate_hertz, + speech_contexts=[SpeechContext(phrases=speech_contexts)], + ) audio = RecognitionAudio(content=sample.content, uri=sample.source_uri) api = self._gapic_api - api_response = api.sync_recognize(config=config, audio=audio) + api_response = api.recognize(config=config, audio=audio) # Sanity check: If we got no results back, raise an error. if len(api_response.results) == 0: @@ -269,8 +274,8 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None, return [Result.from_pb(result) for result in api_response.results] -def _stream_requests(sample, language_code=None, max_alternatives=None, - profanity_filter=None, speech_context=None, +def _stream_requests(sample, language_code, max_alternatives=None, + profanity_filter=None, speech_contexts=(), single_utterance=None, interim_results=None): """Generate stream of requests from sample. @@ -278,9 +283,8 @@ def _stream_requests(sample, language_code=None, max_alternatives=None, :param sample: Instance of ``Sample`` containing audio information. :type language_code: str - :param language_code: (Optional) The language of the supplied audio as - BCP-47 language tag. Example: ``'en-GB'``. - If omitted, defaults to ``'en-US'``. + :param language_code: The language of the supplied audio as + BCP-47 language tag. Example: ``'en-US'``. :type max_alternatives: int :param max_alternatives: (Optional) Maximum number of recognition @@ -296,13 +300,14 @@ def _stream_requests(sample, language_code=None, max_alternatives=None, asterisks, e.g. ``'f***'``. If False or omitted, profanities won't be filtered out. - :type speech_context: list - :param speech_context: (Optional) A list of strings (max 50) containing - words and phrases "hints" so that the speech - recognition is more likely to recognize them. - This can be used to improve the accuracy for - specific words and phrases. This can also be used to - add new words to the vocabulary of the recognizer. + :type speech_contexts: list + :param speech_contexts: (Optional) A list of strings (max 50) containing + words and phrases "hints" so that the speech + recognition is more likely to recognize them. + This can be used to improve the accuracy for + specific words and phrases. This can also be used + to add new words to the vocabulary of the + recognizer. :type single_utterance: bool :param single_utterance: (Optional) If false or omitted, the recognizer @@ -333,7 +338,7 @@ def _stream_requests(sample, language_code=None, max_alternatives=None, config_request = _make_streaming_request( sample, language_code=language_code, max_alternatives=max_alternatives, profanity_filter=profanity_filter, - speech_context=SpeechContext(phrases=speech_context), + speech_contexts=[SpeechContext(phrases=speech_contexts)], single_utterance=single_utterance, interim_results=interim_results) # The config request MUST go first and not contain any audio data. @@ -348,7 +353,7 @@ def _stream_requests(sample, language_code=None, max_alternatives=None, def _make_streaming_request(sample, language_code, max_alternatives, profanity_filter, - speech_context, single_utterance, + speech_contexts, single_utterance, interim_results): """Build streaming request. @@ -374,8 +379,8 @@ def _make_streaming_request(sample, language_code, asterisks, e.g. ``'f***'``. If False or omitted, profanities won't be filtered out. - :type speech_context: list - :param speech_context: A list of strings (max 50) containing words and + :type speech_contexts: list + :param speech_contexts: A list of strings (max 50) containing words and phrases "hints" so that the speech recognition is more likely to recognize them. This can be used to improve the accuracy for specific words @@ -409,13 +414,17 @@ def _make_streaming_request(sample, language_code, returned. :rtype: - :class:`~grpc.speech.v1beta1.cloud_speech_pb2.StreamingRecognizeRequest` + :class:`~grpc.speech.v1.cloud_speech_pb2.StreamingRecognizeRequest` :returns: Instance of ``StreamingRecognizeRequest``. """ config = RecognitionConfig( - encoding=sample.encoding, sample_rate=sample.sample_rate, - language_code=language_code, max_alternatives=max_alternatives, - profanity_filter=profanity_filter, speech_context=speech_context) + encoding=sample.encoding, + language_code=language_code, + max_alternatives=max_alternatives, + profanity_filter=profanity_filter, + sample_rate_hertz=sample.sample_rate_hertz, + speech_contexts=speech_contexts, + ) streaming_config = StreamingRecognitionConfig( config=config, single_utterance=single_utterance, diff --git a/speech/google/cloud/speech/_http.py b/speech/google/cloud/speech/_http.py index 870bf62edc67..61990a5a9ff6 100644 --- a/speech/google/cloud/speech/_http.py +++ b/speech/google/cloud/speech/_http.py @@ -38,7 +38,7 @@ class Connection(_http.JSONConnection): API_BASE_URL = 'https://speech.googleapis.com' """The base of the API call URL.""" - API_VERSION = 'v1beta1' + API_VERSION = 'v1' """The version of the API, used in building the API call's URL.""" API_URL_TEMPLATE = '{api_base_url}/{api_version}/{path}' @@ -59,23 +59,22 @@ def __init__(self, client): self._client = client self._connection = Connection(client) - def async_recognize(self, sample, language_code=None, - max_alternatives=None, profanity_filter=None, - speech_context=None): - """Asychronous Recognize request to Google Speech API. + def long_running_recognize(self, sample, language_code, + max_alternatives=None, profanity_filter=None, + speech_contexts=()): + """Long-running Recognize request to Google Speech API. - .. _async_recognize: https://cloud.google.com/speech/reference/\ - rest/v1beta1/speech/asyncrecognize + .. _long_running_recognize: https://cloud.google.com/speech/reference/\ + rest/v1/speech/longrunningrecognize - See `async_recognize`_. + See `long_running_recognize`_. :type sample: :class:`~google.cloud.speech.sample.Sample` :param sample: Instance of ``Sample`` containing audio information. :type language_code: str - :param language_code: (Optional) The language of the supplied audio as - BCP-47 language tag. Example: ``'en-GB'``. - If omitted, defaults to ``'en-US'``. + :param language_code: The language of the supplied audio as + BCP-47 language tag. Example: ``'en-US'``. :type max_alternatives: int :param max_alternatives: (Optional) Maximum number of recognition @@ -91,8 +90,8 @@ def async_recognize(self, sample, language_code=None, asterisks, e.g. ``'f***'``. If False or omitted, profanities won't be filtered out. - :type speech_context: list - :param speech_context: A list of strings (max 50) containing words and + :type speech_contexts: list + :param speech_contexts: A list of strings (max 50) containing words and phrases "hints" so that the speech recognition is more likely to recognize them. This can be used to improve the accuracy for specific words @@ -103,30 +102,29 @@ def async_recognize(self, sample, language_code=None, :returns: Operation for asynchronous request to Google Speech API. """ data = _build_request_data(sample, language_code, max_alternatives, - profanity_filter, speech_context) + profanity_filter, speech_contexts) api_response = self._connection.api_request( - method='POST', path='speech:asyncrecognize', data=data) + method='POST', path='speech:longrunningrecognize', data=data) operation = Operation.from_dict(api_response, self._client) - operation.caller_metadata['request_type'] = 'AsyncRecognize' + operation.caller_metadata['request_type'] = 'LongRunningRecognize' return operation - def sync_recognize(self, sample, language_code=None, max_alternatives=None, - profanity_filter=None, speech_context=None): + def recognize(self, sample, language_code, max_alternatives=None, + profanity_filter=None, speech_contexts=()): """Synchronous Speech Recognition. - .. _sync_recognize: https://cloud.google.com/speech/reference/\ - rest/v1beta1/speech/syncrecognize + .. _recognize: https://cloud.google.com/speech/reference/\ + rest/v1/speech/recognize - See `sync_recognize`_. + See `recognize`_. :type sample: :class:`~google.cloud.speech.sample.Sample` :param sample: Instance of ``Sample`` containing audio information. :type language_code: str :param language_code: (Optional) The language of the supplied audio as - BCP-47 language tag. Example: ``'en-GB'``. - If omitted, defaults to ``'en-US'``. + BCP-47 language tag. Example: ``'en-US'``. :type max_alternatives: int :param max_alternatives: (Optional) Maximum number of recognition @@ -142,8 +140,8 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None, asterisks, e.g. ``'f***'``. If False or omitted, profanities won't be filtered out. - :type speech_context: list - :param speech_context: A list of strings (max 50) containing words and + :type speech_contexts: list + :param speech_contexts: A list of strings (max 50) containing words and phrases "hints" so that the speech recognition is more likely to recognize them. This can be used to improve the accuracy for specific words @@ -162,9 +160,9 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None, :raises: ValueError if more than one result is returned or no results. """ data = _build_request_data(sample, language_code, max_alternatives, - profanity_filter, speech_context) + profanity_filter, speech_contexts) api_response = self._connection.api_request( - method='POST', path='speech:syncrecognize', data=data) + method='POST', path='speech:recognize', data=data) if len(api_response['results']) > 0: results = api_response['results'] @@ -173,17 +171,16 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None, raise ValueError('No results were returned from the API') -def _build_request_data(sample, language_code=None, max_alternatives=None, - profanity_filter=None, speech_context=None): +def _build_request_data(sample, language_code, max_alternatives=None, + profanity_filter=None, speech_contexts=()): """Builds the request data before making API request. :type sample: :class:`~google.cloud.speech.sample.Sample` :param sample: Instance of ``Sample`` containing audio information. :type language_code: str - :param language_code: (Optional) The language of the supplied audio as - BCP-47 language tag. Example: ``'en-GB'``. - If omitted, defaults to ``'en-US'``. + :param language_code: The language of the supplied audio as + BCP-47 language tag. Example: ``'en-US'``. :type max_alternatives: int :param max_alternatives: (Optional) Maximum number of recognition @@ -199,8 +196,8 @@ def _build_request_data(sample, language_code=None, max_alternatives=None, asterisks, e.g. ``'f***'``. If False or omitted, profanities won't be filtered out. - :type speech_context: list - :param speech_context: A list of strings (max 50) containing words and + :type speech_contexts: list + :param speech_contexts: A list of strings (max 50) containing words and phrases "hints" so that the speech recognition is more likely to recognize them. This can be used to improve the accuracy for specific words @@ -216,17 +213,18 @@ def _build_request_data(sample, language_code=None, max_alternatives=None, else: audio = {'uri': sample.source_uri} - config = {'encoding': sample.encoding, - 'sampleRate': sample.sample_rate} + config = { + 'encoding': sample.encoding, + 'languageCode': language_code, + 'sampleRateHertz': sample.sample_rate_hertz, + } - if language_code is not None: - config['languageCode'] = language_code if max_alternatives is not None: config['maxAlternatives'] = max_alternatives if profanity_filter is not None: config['profanityFilter'] = profanity_filter - if speech_context is not None: - config['speechContext'] = {'phrases': speech_context} + if speech_contexts: + config['speechContext'] = {'phrases': speech_contexts} data = { 'audio': audio, diff --git a/speech/google/cloud/speech/alternative.py b/speech/google/cloud/speech/alternative.py index af71788a1923..9b48e4a5c8d5 100644 --- a/speech/google/cloud/speech/alternative.py +++ b/speech/google/cloud/speech/alternative.py @@ -45,7 +45,7 @@ def from_pb(cls, alternative): """Factory: construct ``Alternative`` from protobuf response. :type alternative: - :class:`google.cloud.speech.v1beta1.SpeechRecognitionAlternative` + :class:`google.cloud.speech.v1.SpeechRecognitionAlternative` :param alternative: Instance of ``SpeechRecognitionAlternative`` from protobuf. diff --git a/speech/google/cloud/speech/client.py b/speech/google/cloud/speech/client.py index 4751bad8e417..f9eb211c4a80 100644 --- a/speech/google/cloud/speech/client.py +++ b/speech/google/cloud/speech/client.py @@ -68,7 +68,7 @@ def __init__(self, credentials=None, _http=None, _use_grpc=None): self._use_grpc = _use_grpc def sample(self, content=None, source_uri=None, stream=None, encoding=None, - sample_rate=None): + sample_rate_hertz=None): """Factory: construct Sample to use when making recognize requests. :type content: bytes @@ -90,19 +90,21 @@ def sample(self, content=None, source_uri=None, stream=None, encoding=None, :attr:`~.Encoding.FLAC`, :attr:`~.Encoding.MULAW`, :attr:`~.Encoding.AMR`, :attr:`~.Encoding.AMR_WB` - :type sample_rate: int - :param sample_rate: Sample rate in Hertz of the audio data sent in all - requests. Valid values are: 8000-48000. For best - results, set the sampling rate of the audio source - to 16000 Hz. If that's not possible, use the - native sample rate of the audio source (instead of - re-sampling). + :type sample_rate_hertz: int + :param sample_rate_hertz: Sample rate in Hertz of the audio data sent + in all requests. Valid values are: + 8000-48000. For best results, set the + sampling rate of the audio source + to 16000 Hz. If that's not possible, use the + native sample rate of the audio source + (instead of re-sampling). :rtype: :class:`~google.cloud.speech.sample.Sample` :returns: Instance of ``Sample``. """ return Sample(content=content, source_uri=source_uri, stream=stream, - encoding=encoding, sample_rate=sample_rate, client=self) + encoding=encoding, sample_rate_hertz=sample_rate_hertz, + client=self) @property def speech_api(self): diff --git a/speech/google/cloud/speech/encoding.py b/speech/google/cloud/speech/encoding.py index dfdaaab31d46..9519ecfce4e4 100644 --- a/speech/google/cloud/speech/encoding.py +++ b/speech/google/cloud/speech/encoding.py @@ -19,7 +19,7 @@ class Encoding(object): """Audio encoding types. See: - https://cloud.google.com/speech/reference/rest/v1beta1/RecognitionConfig#AudioEncoding + https://cloud.google.com/speech/reference/rest/v1/RecognitionConfig#AudioEncoding """ LINEAR16 = 'LINEAR16' diff --git a/speech/google/cloud/speech/operation.py b/speech/google/cloud/speech/operation.py index 330328ce0b3b..dda9ab449b87 100644 --- a/speech/google/cloud/speech/operation.py +++ b/speech/google/cloud/speech/operation.py @@ -14,14 +14,14 @@ """Long running operation representation for Google Speech API""" -from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 +from google.cloud.proto.speech.v1 import cloud_speech_pb2 from google.cloud import operation from google.cloud.speech.result import Result -operation.register_type(cloud_speech_pb2.AsyncRecognizeMetadata) -operation.register_type(cloud_speech_pb2.AsyncRecognizeResponse) +operation.register_type(cloud_speech_pb2.LongRunningRecognizeMetadata) +operation.register_type(cloud_speech_pb2.LongRunningRecognizeResponse) class Operation(operation.Operation): diff --git a/speech/google/cloud/speech/result.py b/speech/google/cloud/speech/result.py index 83d4f629abfd..6aa01313e8b3 100644 --- a/speech/google/cloud/speech/result.py +++ b/speech/google/cloud/speech/result.py @@ -34,7 +34,7 @@ def __init__(self, alternatives): def from_pb(cls, result): """Factory: construct instance of ``Result``. - :type result: :class:`~google.cloud.proto.speech.v1beta1\ + :type result: :class:`~google.cloud.proto.speech.v1\ .cloud_speech_pb2.SpeechRecognitionResult` :param result: Instance of ``SpeechRecognitionResult`` protobuf. @@ -51,7 +51,7 @@ def from_api_repr(cls, result): :type result: dict :param result: Dictionary of a :class:`~google.cloud.proto.speech.\ - v1beta1.cloud_speech_pb2.SpeechRecognitionResult` + v1.cloud_speech_pb2.SpeechRecognitionResult` :rtype: :class:`~google.cloud.speech.result.Result` :returns: Instance of ``Result``. @@ -101,7 +101,7 @@ def __init__(self, alternatives, is_final=False, stability=0.0): def from_pb(cls, response): """Factory: construct instance of ``StreamingSpeechResult``. - :type response: :class:`~google.cloud.proto.speech.v1beta1\ + :type response: :class:`~google.cloud.proto.speech.v1\ .cloud_speech_pb2.StreamingRecognizeResult` :param response: Instance of ``StreamingRecognizeResult`` protobuf. diff --git a/speech/google/cloud/speech/sample.py b/speech/google/cloud/speech/sample.py index ac685bfd9e11..673e6ab4969c 100644 --- a/speech/google/cloud/speech/sample.py +++ b/speech/google/cloud/speech/sample.py @@ -14,7 +14,6 @@ """Sample class to handle content for Google Cloud Speech API.""" - from google.cloud.speech.encoding import Encoding from google.cloud.speech.result import StreamingSpeechResult @@ -41,13 +40,13 @@ class Sample(object): :attr:`~.Encoding.FLAC`, :attr:`~.Encoding.MULAW`, :attr:`~.Encoding.AMR`, :attr:`~.Encoding.AMR_WB` - :type sample_rate: int - :param sample_rate: Sample rate in Hertz of the audio data sent in all - requests. Valid values are: 8000-48000. For best - results, set the sampling rate of the audio source - to 16000 Hz. If that's not possible, use the - native sample rate of the audio source (instead of - re-sampling). + :type sample_rate_hertz: int + :param sample_rate_hertz: Sample rate in Hertz of the audio data sent in + all requests. Valid values are: 8000-48000. For + best results, set the sampling rate of the audio + source to 16000 Hz. If that's not possible, use + the native sample rate of the audio source + (instead of re-sampling). :type client: :class:`~google.cloud.speech.client.Client` :param client: (Optional) The client that owns this instance of sample. @@ -55,7 +54,7 @@ class Sample(object): default_encoding = Encoding.FLAC def __init__(self, content=None, source_uri=None, stream=None, - encoding=None, sample_rate=None, client=None): + encoding=None, sample_rate_hertz=None, client=None): self._client = client sources = [content is not None, source_uri is not None, @@ -68,10 +67,11 @@ def __init__(self, content=None, source_uri=None, stream=None, self._source_uri = source_uri self._stream = stream - if sample_rate is not None and not 8000 <= sample_rate <= 48000: - raise ValueError('The value of sample_rate must be between 8000' - ' and 48000.') - self._sample_rate = sample_rate + if (sample_rate_hertz is not None and + not 8000 <= sample_rate_hertz <= 48000): + raise ValueError('The value of sample_rate_hertz must be between ' + '8000 and 48000.') + self._sample_rate_hertz = sample_rate_hertz if encoding is not None and getattr(Encoding, encoding, False): self._encoding = getattr(Encoding, encoding) @@ -85,7 +85,7 @@ def chunk_size(self): :rtype: int :returns: Optimized chunk size. """ - return int(self.sample_rate / 10.0) + return int(self.sample_rate_hertz / 10.0) @property def source_uri(self): @@ -106,13 +106,13 @@ def content(self): return self._content @property - def sample_rate(self): + def sample_rate_hertz(self): """Sample rate integer. :rtype: int :returns: Integer between 8000 and 48,000. """ - return self._sample_rate + return self._sample_rate_hertz @property def stream(self): @@ -132,19 +132,18 @@ def encoding(self): """ return self._encoding - def async_recognize(self, language_code=None, max_alternatives=None, - profanity_filter=None, speech_context=None): + def long_running_recognize(self, language_code, max_alternatives=None, + profanity_filter=None, speech_contexts=()): """Asychronous Recognize request to Google Speech API. - .. _async_recognize: https://cloud.google.com/speech/reference/\ - rest/v1beta1/speech/asyncrecognize + .. _long_running_recognize: https://cloud.google.com/speech/reference/\ + rest/v1/speech/longrunningrecognize - See `async_recognize`_. + See `long_running_recognize`_. :type language_code: str :param language_code: (Optional) The language of the supplied audio as - BCP-47 language tag. Example: ``'en-GB'``. - If omitted, defaults to ``'en-US'``. + BCP-47 language tag. Example: ``'en-US'``. :type max_alternatives: int :param max_alternatives: (Optional) Maximum number of recognition @@ -160,27 +159,28 @@ def async_recognize(self, language_code=None, max_alternatives=None, asterisks, e.g. ``'f***'``. If False or omitted, profanities won't be filtered out. - :type speech_context: list - :param speech_context: A list of strings (max 50) containing words and - phrases "hints" so that the speech recognition - is more likely to recognize them. This can be - used to improve the accuracy for specific words - and phrases. This can also be used to add new - words to the vocabulary of the recognizer. + :type speech_contexts: list + :param speech_contexts: A list of strings (max 50) containing words and + phrases "hints" so that the speech recognition + is more likely to recognize them. This can be + used to improve the accuracy for specific words + and phrases. This can also be used to add new + words to the vocabulary of the recognizer. :rtype: :class:`~google.cloud.speech.operation.Operation` :returns: Operation for asynchronous request to Google Speech API. """ if self.encoding is not Encoding.LINEAR16: raise ValueError('Only LINEAR16 encoding is supported by ' - 'asynchronous speech requests.') + 'long-running speech requests.') api = self._client.speech_api - return api.async_recognize(self, language_code, max_alternatives, - profanity_filter, speech_context) + return api.long_running_recognize( + self, language_code, max_alternatives, profanity_filter, + speech_contexts) - def streaming_recognize(self, language_code=None, + def streaming_recognize(self, language_code, max_alternatives=None, profanity_filter=None, - speech_context=None, single_utterance=False, + speech_contexts=(), single_utterance=False, interim_results=False): """Streaming speech recognition. @@ -194,9 +194,8 @@ def streaming_recognize(self, language_code=None, containing results and metadata from the streaming request. :type language_code: str - :param language_code: (Optional) The language of the supplied audio as - BCP-47 language tag. Example: ``'en-GB'``. - If omitted, defaults to ``'en-US'``. + :param language_code: The language of the supplied audio as + BCP-47 language tag. Example: ``'en-US'``. :type max_alternatives: int :param max_alternatives: (Optional) Maximum number of recognition @@ -212,13 +211,13 @@ def streaming_recognize(self, language_code=None, asterisks, e.g. ``'f***'``. If False or omitted, profanities won't be filtered out. - :type speech_context: list - :param speech_context: A list of strings (max 50) containing words and - phrases "hints" so that the speech recognition - is more likely to recognize them. This can be - used to improve the accuracy for specific words - and phrases. This can also be used to add new - words to the vocabulary of the recognizer. + :type speech_contexts: list + :param speech_contexts: A list of strings (max 50) containing words and + phrases "hints" so that the speech recognition + is more likely to recognize them. This can be + used to improve the accuracy for specific words + and phrases. This can also be used to add new + words to the vocabulary of the recognizer. :type single_utterance: bool :param single_utterance: (Optional) If false or omitted, the recognizer @@ -253,26 +252,25 @@ def streaming_recognize(self, language_code=None, api = self._client.speech_api responses = api.streaming_recognize(self, language_code, max_alternatives, profanity_filter, - speech_context, single_utterance, + speech_contexts, single_utterance, interim_results) for response in responses: for result in response.results: if result.is_final or interim_results: yield StreamingSpeechResult.from_pb(result) - def sync_recognize(self, language_code=None, max_alternatives=None, - profanity_filter=None, speech_context=None): + def recognize(self, language_code, max_alternatives=None, + profanity_filter=None, speech_contexts=()): """Synchronous Speech Recognition. - .. _sync_recognize: https://cloud.google.com/speech/reference/\ - rest/v1beta1/speech/syncrecognize + .. _recognize: https://cloud.google.com/speech/reference/\ + rest/v1/speech/recognize - See `sync_recognize`_. + See `recognize`_. :type language_code: str - :param language_code: (Optional) The language of the supplied audio as - BCP-47 language tag. Example: ``'en-GB'``. - If omitted, defaults to ``'en-US'``. + :param language_code: The language of the supplied audio as + BCP-47 language tag. Example: ``'en-US'``. :type max_alternatives: int :param max_alternatives: (Optional) Maximum number of recognition @@ -288,13 +286,13 @@ def sync_recognize(self, language_code=None, max_alternatives=None, asterisks, e.g. ``'f***'``. If False or omitted, profanities won't be filtered out. - :type speech_context: list - :param speech_context: A list of strings (max 50) containing words and - phrases "hints" so that the speech recognition - is more likely to recognize them. This can be - used to improve the accuracy for specific words - and phrases. This can also be used to add new - words to the vocabulary of the recognizer. + :type speech_contexts: list + :param speech_contexts: A list of strings (max 50) containing words and + phrases "hints" so that the speech recognition + is more likely to recognize them. This can be + used to improve the accuracy for specific words + and phrases. This can also be used to add new + words to the vocabulary of the recognizer. :rtype: list :returns: A list of dictionaries. One dict for each alternative. Each @@ -306,5 +304,5 @@ def sync_recognize(self, language_code=None, max_alternatives=None, between 0 and 1. """ api = self._client.speech_api - return api.sync_recognize(self, language_code, max_alternatives, - profanity_filter, speech_context) + return api.recognize(self, language_code, max_alternatives, + profanity_filter, speech_contexts) diff --git a/speech/setup.py b/speech/setup.py index 17a3d7585b70..7a0eb1010a5a 100644 --- a/speech/setup.py +++ b/speech/setup.py @@ -52,7 +52,7 @@ REQUIREMENTS = [ 'google-cloud-core >= 0.24.0, < 0.25dev', 'grpcio >= 1.0.2, < 2.0dev', - 'gapic-google-cloud-speech-v1beta1 >= 0.15.2, < 0.16dev', + 'gapic-google-cloud-speech-v1 >= 0.15.3, < 0.16dev', ] setup( diff --git a/speech/tests/system.py b/speech/tests/system.py index dd3dfdf14314..0c4acfb52767 100644 --- a/speech/tests/system.py +++ b/speech/tests/system.py @@ -13,6 +13,7 @@ # limitations under the License. import os +import time import unittest from google.cloud import exceptions @@ -46,6 +47,9 @@ def _wait_until_complete(operation, max_attempts=10): :rtype: bool :returns: Boolean indicating if the operation is complete. """ + # This bizarre delay is necessary because the v1 API seems to return + # the v1beta1 type URL sometimes if you poll too soon. + time.sleep(3) retry = RetryResult(_operation_complete, max_tries=max_attempts) return retry(operation.poll)() @@ -94,36 +98,47 @@ def tearDown(self): def _make_sync_request(self, content=None, source_uri=None, max_alternatives=None): client = Config.CLIENT - sample = client.sample(content=content, - source_uri=source_uri, - encoding=speech.Encoding.LINEAR16, - sample_rate=16000) - return sample.sync_recognize(language_code='en-US', - max_alternatives=max_alternatives, - profanity_filter=True, - speech_context=['Google', 'cloud']) + sample = client.sample( + content=content, + encoding=speech.Encoding.LINEAR16, + sample_rate_hertz=16000, + source_uri=source_uri, + ) + return sample.recognize( + language_code='en-US', + max_alternatives=max_alternatives, + profanity_filter=True, + speech_contexts=['Google', 'cloud'], + ) def _make_async_request(self, content=None, source_uri=None, max_alternatives=None): client = Config.CLIENT - sample = client.sample(content=content, - source_uri=source_uri, - encoding=speech.Encoding.LINEAR16, - sample_rate=16000) - return sample.async_recognize(language_code='en-US', - max_alternatives=max_alternatives, - profanity_filter=True, - speech_context=['Google', 'cloud']) + sample = client.sample( + content=content, + encoding=speech.Encoding.LINEAR16, + sample_rate_hertz=16000, + source_uri=source_uri, + ) + return sample.long_running_recognize( + language_code='en-US', + max_alternatives=max_alternatives, + profanity_filter=True, + speech_contexts=['Google', 'cloud'], + ) def _make_streaming_request(self, file_obj, single_utterance=True, interim_results=False): client = Config.CLIENT sample = client.sample(stream=file_obj, encoding=speech.Encoding.LINEAR16, - sample_rate=16000) - return sample.streaming_recognize(single_utterance=single_utterance, - interim_results=interim_results, - speech_context=['hello', 'google']) + sample_rate_hertz=16000) + return sample.streaming_recognize( + interim_results=interim_results, + language_code='en-US', + single_utterance=single_utterance, + speech_contexts=['hello', 'google'], + ) def _check_results(self, alternatives, num_results=1): self.assertEqual(len(alternatives), num_results) @@ -169,7 +184,6 @@ def test_async_recognize_local_file(self): operation = self._make_async_request(content=content, max_alternatives=2) - _wait_until_complete(operation) self.assertEqual(len(operation.results), 1) alternatives = operation.results[0].alternatives diff --git a/speech/tests/unit/_fixtures.py b/speech/tests/unit/_fixtures.py index c193960d9137..989bb2b54d9f 100644 --- a/speech/tests/unit/_fixtures.py +++ b/speech/tests/unit/_fixtures.py @@ -37,7 +37,7 @@ 'name': '123456789', 'metadata': { '@type': ('type.googleapis.com/' - 'google.cloud.speech.v1beta1.AsyncRecognizeMetadata'), + 'google.cloud.speech.v1.LongRunningRecognizeMetadata'), 'progressPercent': 100, 'startTime': '2016-09-22T17:52:25.536964Z', 'lastUpdateTime': '2016-09-22T17:52:27.802902Z', @@ -45,7 +45,7 @@ 'done': True, 'response': { '@type': ('type.googleapis.com/' - 'google.cloud.speech.v1beta1.AsyncRecognizeResponse'), + 'google.cloud.speech.v1.LongRunningRecognizeResponse'), 'results': [ { 'alternatives': [ @@ -63,7 +63,7 @@ 'name': '123456789', 'metadata': { '@type': ('type.googleapis.com/' - 'google.cloud.speech.v1beta1.AsyncRecognizeMetadata'), + 'google.cloud.speech.v1.LongRunningRecognizeMetadata'), 'progressPercent': 27, 'startTime': '2016-09-22T17:52:25.536964Z', 'lastUpdateTime': '2016-09-22T17:52:27.802902Z', diff --git a/speech/tests/unit/test__gax.py b/speech/tests/unit/test__gax.py index e5d7624a2378..7cf44ba58f6e 100644 --- a/speech/tests/unit/test__gax.py +++ b/speech/tests/unit/test__gax.py @@ -38,7 +38,7 @@ def _make_one(self, *args, **kw): 'google.cloud._helpers.make_secure_channel', return_value=mock.sentinel.channel) @mock.patch( - 'google.cloud.gapic.speech.v1beta1.speech_client.SpeechClient', + 'google.cloud.gapic.speech.v1.speech_client.SpeechClient', SERVICE_ADDRESS='hey.you.guys') @mock.patch( 'google.cloud._helpers.make_secure_stub', @@ -76,36 +76,36 @@ class TestSpeechGAXMakeRequests(unittest.TestCase): AUDIO_CONTENT = b'/9j/4QNURXhpZgAASUkq' def _call_fut(self, sample, language_code, max_alternatives, - profanity_filter, speech_context, single_utterance, + profanity_filter, speech_contexts, single_utterance, interim_results): from google.cloud.speech._gax import _make_streaming_request return _make_streaming_request( sample=sample, language_code=language_code, max_alternatives=max_alternatives, - profanity_filter=profanity_filter, speech_context=speech_context, + profanity_filter=profanity_filter, speech_contexts=speech_contexts, single_utterance=single_utterance, interim_results=interim_results) def test_ctor(self): from google.cloud import speech from google.cloud.speech.sample import Sample - from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import ( + from google.cloud.proto.speech.v1.cloud_speech_pb2 import ( RecognitionConfig, SpeechContext, StreamingRecognitionConfig, StreamingRecognizeRequest) sample = Sample( content=self.AUDIO_CONTENT, encoding=speech.Encoding.FLAC, - sample_rate=self.SAMPLE_RATE) + sample_rate_hertz=self.SAMPLE_RATE) language_code = 'US-en' max_alternatives = 2 profanity_filter = True - speech_context = SpeechContext(phrases=self.HINTS) + speech_contexts = [SpeechContext(phrases=self.HINTS)] single_utterance = True interim_results = False streaming_request = self._call_fut( sample, language_code, max_alternatives, profanity_filter, - speech_context, single_utterance, interim_results) + speech_contexts, single_utterance, interim_results) self.assertIsInstance(streaming_request, StreamingRecognizeRequest) # This isn't set by _make_streaming_request(). @@ -121,11 +121,11 @@ def test_ctor(self): config = streaming_config.config self.assertIsInstance(config, RecognitionConfig) self.assertEqual(config.encoding, 2) # speech.Encoding.FLAC maps to 2. - self.assertEqual(config.sample_rate, self.SAMPLE_RATE) + self.assertEqual(config.sample_rate_hertz, self.SAMPLE_RATE) self.assertEqual(config.language_code, language_code) self.assertEqual(config.max_alternatives, max_alternatives) self.assertTrue(config.profanity_filter) - self.assertEqual(config.speech_context.phrases, self.HINTS) + self.assertEqual(config.speech_contexts[0].phrases, self.HINTS) class TestSpeechGAXMakeRequestsStream(unittest.TestCase): @@ -134,35 +134,35 @@ class TestSpeechGAXMakeRequestsStream(unittest.TestCase): AUDIO_CONTENT = b'/9j/4QNURXhpZgAASUkq' def _call_fut(self, sample, language_code, max_alternatives, - profanity_filter, speech_context, single_utterance, + profanity_filter, speech_contexts, single_utterance, interim_results): from google.cloud.speech._gax import _stream_requests return _stream_requests( sample=sample, language_code=language_code, max_alternatives=max_alternatives, - profanity_filter=profanity_filter, speech_context=speech_context, + profanity_filter=profanity_filter, speech_contexts=speech_contexts, single_utterance=single_utterance, interim_results=interim_results) def test_stream_requests(self): from io import BytesIO from google.cloud import speech from google.cloud.speech.sample import Sample - from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import ( + from google.cloud.proto.speech.v1.cloud_speech_pb2 import ( StreamingRecognitionConfig, StreamingRecognizeRequest) sample = Sample( stream=BytesIO(self.AUDIO_CONTENT), encoding=speech.Encoding.FLAC, - sample_rate=self.SAMPLE_RATE) + sample_rate_hertz=self.SAMPLE_RATE) language_code = 'US-en' max_alternatives = 2 profanity_filter = True - speech_context = self.HINTS + speech_contexts = self.HINTS single_utterance = True interim_results = False streaming_requests = self._call_fut( sample, language_code, max_alternatives, profanity_filter, - speech_context, single_utterance, interim_results) + speech_contexts, single_utterance, interim_results) all_requests = [] for streaming_request in streaming_requests: self.assertIsInstance(streaming_request, StreamingRecognizeRequest) diff --git a/speech/tests/unit/test_alternative.py b/speech/tests/unit/test_alternative.py index 7a79d37e43b8..ceb7321f02de 100644 --- a/speech/tests/unit/test_alternative.py +++ b/speech/tests/unit/test_alternative.py @@ -54,7 +54,7 @@ def test_from_api_repr_with_no_confidence(self): self.assertIsNone(alternative.confidence) def test_from_pb_with_no_confidence(self): - from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 + from google.cloud.proto.speech.v1 import cloud_speech_pb2 text = 'the double trouble' pb_value = cloud_speech_pb2.SpeechRecognitionAlternative( diff --git a/speech/tests/unit/test_client.py b/speech/tests/unit/test_client.py index dbf1d82d34e5..f971bb4865d1 100644 --- a/speech/tests/unit/test_client.py +++ b/speech/tests/unit/test_client.py @@ -24,7 +24,7 @@ def _make_credentials(): def _make_result(alternatives=()): - from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 + from google.cloud.proto.speech.v1 import cloud_speech_pb2 return cloud_speech_pb2.SpeechRecognitionResult( alternatives=[ @@ -37,7 +37,7 @@ def _make_result(alternatives=()): def _make_streaming_result(alternatives=(), is_final=True, stability=1.0): - from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 + from google.cloud.proto.speech.v1 import cloud_speech_pb2 return cloud_speech_pb2.StreamingRecognitionResult( alternatives=[ @@ -52,7 +52,7 @@ def _make_streaming_result(alternatives=(), is_final=True, stability=1.0): def _make_streaming_response(*results): - from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 + from google.cloud.proto.speech.v1 import cloud_speech_pb2 response = cloud_speech_pb2.StreamingRecognizeResponse( results=results, @@ -61,9 +61,9 @@ def _make_streaming_response(*results): def _make_sync_response(*results): - from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 + from google.cloud.proto.speech.v1 import cloud_speech_pb2 - response = cloud_speech_pb2.SyncRecognizeResponse( + response = cloud_speech_pb2.RecognizeResponse( results=results, ) return response @@ -106,17 +106,17 @@ def test_create_sample_from_client(self): sample = client.sample( source_uri=self.AUDIO_SOURCE_URI, encoding=speech.Encoding.FLAC, - sample_rate=self.SAMPLE_RATE) + sample_rate_hertz=self.SAMPLE_RATE) self.assertIsInstance(sample, Sample) self.assertEqual(sample.source_uri, self.AUDIO_SOURCE_URI) - self.assertEqual(sample.sample_rate, self.SAMPLE_RATE) + self.assertEqual(sample.sample_rate_hertz, self.SAMPLE_RATE) self.assertEqual(sample.encoding, speech.Encoding.FLAC) content_sample = client.sample( content=self.AUDIO_CONTENT, encoding=speech.Encoding.FLAC, - sample_rate=self.SAMPLE_RATE) + sample_rate_hertz=self.SAMPLE_RATE) self.assertEqual(content_sample.content, self.AUDIO_CONTENT) - self.assertEqual(content_sample.sample_rate, self.SAMPLE_RATE) + self.assertEqual(content_sample.sample_rate_hertz, self.SAMPLE_RATE) self.assertEqual(content_sample.encoding, speech.Encoding.FLAC) def test_sync_recognize_content_with_optional_params_no_gax(self): @@ -134,7 +134,7 @@ def test_sync_recognize_content_with_optional_params_no_gax(self): 'config': { 'encoding': 'FLAC', 'maxAlternatives': 2, - 'sampleRate': 16000, + 'sampleRateHertz': 16000, 'speechContext': { 'phrases': [ 'hi', @@ -157,18 +157,18 @@ def test_sync_recognize_content_with_optional_params_no_gax(self): sample = client.sample( content=self.AUDIO_CONTENT, encoding=encoding, - sample_rate=self.SAMPLE_RATE) + sample_rate_hertz=self.SAMPLE_RATE) - response = sample.sync_recognize( + response = sample.recognize( language_code='EN', max_alternatives=2, profanity_filter=True, - speech_context=self.HINTS) + speech_contexts=self.HINTS) self.assertEqual(len(connection._requested), 1) req = connection._requested[0] self.assertEqual(len(req), 3) self.assertEqual(req['data'], request) self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], 'speech:syncrecognize') + self.assertEqual(req['path'], 'speech:recognize') alternative = SYNC_RECOGNIZE_RESPONSE['results'][0]['alternatives'][0] expected = Alternative.from_api_repr(alternative) @@ -189,7 +189,8 @@ def test_sync_recognize_source_uri_without_optional_params_no_gax(self): request = { 'config': { 'encoding': 'FLAC', - 'sampleRate': 16000, + 'languageCode': 'en-US', + 'sampleRateHertz': 16000, }, 'audio': { 'uri': self.AUDIO_SOURCE_URI, @@ -205,16 +206,16 @@ def test_sync_recognize_source_uri_without_optional_params_no_gax(self): sample = client.sample( source_uri=self.AUDIO_SOURCE_URI, encoding=encoding, - sample_rate=self.SAMPLE_RATE) + sample_rate_hertz=self.SAMPLE_RATE) - response = [i for i in sample.sync_recognize()] + response = [i for i in sample.recognize(language_code='en-US')] self.assertEqual(len(connection._requested), 1) req = connection._requested[0] self.assertEqual(len(req), 3) self.assertEqual(req['data'], request) self.assertEqual(req['method'], 'POST') - self.assertEqual(req['path'], 'speech:syncrecognize') + self.assertEqual(req['path'], 'speech:recognize') expected = Alternative.from_api_repr( SYNC_RECOGNIZE_RESPONSE['results'][0]['alternatives'][0]) @@ -239,10 +240,10 @@ def test_sync_recognize_with_empty_results_no_gax(self): sample = client.sample( source_uri=self.AUDIO_SOURCE_URI, encoding=speech.Encoding.FLAC, - sample_rate=self.SAMPLE_RATE) + sample_rate_hertz=self.SAMPLE_RATE) with self.assertRaises(ValueError): - next(sample.sync_recognize()) + next(sample.recognize(language_code='en-US')) def test_sync_recognize_with_empty_results_gax(self): from google.cloud._testing import _Monkey @@ -281,10 +282,10 @@ def speech_api(channel=None, **kwargs): sample = client.sample( source_uri=self.AUDIO_SOURCE_URI, encoding=speech.Encoding.FLAC, - sample_rate=self.SAMPLE_RATE) + sample_rate_hertz=self.SAMPLE_RATE) with self.assertRaises(ValueError): - next(sample.sync_recognize()) + next(sample.recognize(language_code='en-US')) def test_sync_recognize_with_gax(self): from google.cloud._testing import _Monkey @@ -322,7 +323,7 @@ def speech_api(channel=None, **kwargs): sample = client.sample( source_uri=self.AUDIO_SOURCE_URI, encoding=speech.Encoding.FLAC, - sample_rate=self.SAMPLE_RATE) + sample_rate_hertz=self.SAMPLE_RATE) with _Monkey(_gax, SpeechClient=speech_api, make_secure_channel=make_channel): @@ -334,7 +335,7 @@ def speech_api(channel=None, **kwargs): self.assertEqual( channel_args, [(creds, _gax.DEFAULT_USER_AGENT, host)]) - results = [i for i in sample.sync_recognize()] + results = [i for i in sample.recognize(language_code='en-US')] self.assertEqual(len(results), 1) result = results[0] @@ -358,9 +359,9 @@ def test_async_supported_encodings(self): sample = client.sample( source_uri=self.AUDIO_SOURCE_URI, encoding=speech.Encoding.FLAC, - sample_rate=self.SAMPLE_RATE) + sample_rate_hertz=self.SAMPLE_RATE) with self.assertRaises(ValueError): - sample.async_recognize() + sample.recognize(language_code='en-US') def test_async_recognize_no_gax(self): from google.cloud import speech @@ -375,13 +376,16 @@ def test_async_recognize_no_gax(self): speech_api._connection = _Connection(RETURNED) sample = client.sample( - encoding=speech.Encoding.LINEAR16, sample_rate=self.SAMPLE_RATE, - source_uri=self.AUDIO_SOURCE_URI) - operation = sample.async_recognize() + encoding=speech.Encoding.LINEAR16, + sample_rate_hertz=self.SAMPLE_RATE, + source_uri=self.AUDIO_SOURCE_URI, + ) + operation = sample.long_running_recognize(language_code='en-US') self.assertIsInstance(operation, Operation) self.assertIs(operation.client, client) - self.assertEqual( - operation.caller_metadata, {'request_type': 'AsyncRecognize'}) + self.assertEqual(operation.caller_metadata, { + 'request_type': 'LongRunningRecognize', + }) self.assertFalse(operation.complete) self.assertIsNone(operation.metadata) @@ -404,8 +408,10 @@ def make_channel(*args): return channel_obj sample = client.sample( - encoding=speech.Encoding.LINEAR16, sample_rate=self.SAMPLE_RATE, - source_uri=self.AUDIO_SOURCE_URI) + encoding=speech.Encoding.LINEAR16, + sample_rate_hertz=self.SAMPLE_RATE, + source_uri=self.AUDIO_SOURCE_URI, + ) def speech_api(channel=None, **kwargs): return _MockGAPICSpeechAPI(channel=channel, **kwargs) @@ -424,7 +430,7 @@ def speech_api(channel=None, **kwargs): low_level.SERVICE_ADDRESS) self.assertEqual(channel_args, [expected]) - operation = sample.async_recognize() + operation = sample.long_running_recognize(language_code='en-US') self.assertIsInstance(operation, Operation) self.assertFalse(operation.complete) self.assertIsNone(operation.response) @@ -436,10 +442,10 @@ def test_streaming_depends_on_gax(self): client = self._make_one(credentials=credentials, _use_grpc=False) sample = client.sample( content=self.AUDIO_CONTENT, encoding=speech.Encoding.LINEAR16, - sample_rate=self.SAMPLE_RATE) + sample_rate_hertz=self.SAMPLE_RATE) with self.assertRaises(EnvironmentError): - list(sample.streaming_recognize()) + list(sample.streaming_recognize(language_code='en-US')) def test_streaming_closed_stream(self): from io import BytesIO @@ -472,14 +478,14 @@ def speech_api(channel=None, **kwargs): sample = client.sample( stream=stream, encoding=Encoding.LINEAR16, - sample_rate=self.SAMPLE_RATE) + sample_rate_hertz=self.SAMPLE_RATE) with _Monkey(_gax, SpeechClient=speech_api, make_secure_channel=make_channel): client._speech_api = _gax.GAPICSpeechAPI(client) with self.assertRaises(ValueError): - list(sample.streaming_recognize()) + list(sample.streaming_recognize(language_code='en-US')) def test_stream_recognize_interim_results(self): from io import BytesIO @@ -532,9 +538,12 @@ def speech_api(channel=None, **kwargs): sample = client.sample( stream=stream, encoding=Encoding.LINEAR16, - sample_rate=self.SAMPLE_RATE) + sample_rate_hertz=self.SAMPLE_RATE) - results = list(sample.streaming_recognize(interim_results=True)) + results = list(sample.streaming_recognize( + interim_results=True, + language_code='en-US', + )) self.assertEqual(len(results), 3) for result in results: @@ -617,9 +626,9 @@ def speech_api(channel=None, **kwargs): sample = client.sample( stream=stream, encoding=Encoding.LINEAR16, - sample_rate=self.SAMPLE_RATE) + sample_rate_hertz=self.SAMPLE_RATE) - results = list(sample.streaming_recognize()) + results = list(sample.streaming_recognize(language_code='en-US')) self.assertEqual(len(results), 1) result = results[0] self.assertEqual( @@ -662,9 +671,9 @@ def speech_api(channel=None, **kwargs): sample = client.sample( stream=stream, encoding=Encoding.LINEAR16, - sample_rate=self.SAMPLE_RATE) + sample_rate_hertz=self.SAMPLE_RATE) - results = list(sample.streaming_recognize()) + results = list(sample.streaming_recognize(language_code='en-US')) self.assertEqual(results, []) def test_speech_api_with_gax(self): @@ -731,21 +740,21 @@ def __init__(self, response=None, channel=None, **kwargs): self._channel = channel self._kwargs = kwargs - def async_recognize(self, config, audio): + def long_running_recognize(self, config, audio): from google.gapic.longrunning.operations_client import OperationsClient from google.gax import _OperationFuture from google.longrunning.operations_pb2 import Operation - from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import ( - AsyncRecognizeResponse) + from google.cloud.proto.speech.v1.cloud_speech_pb2 import ( + LongRunningRecognizeResponse) self.config = config self.audio = audio operations_client = mock.Mock(spec=OperationsClient) operation_future = _OperationFuture( - Operation(), operations_client, AsyncRecognizeResponse, {}) + Operation(), operations_client, LongRunningRecognizeResponse, {}) return operation_future - def sync_recognize(self, config, audio): + def recognize(self, config, audio): self.config = config self.audio = audio diff --git a/speech/tests/unit/test_operation.py b/speech/tests/unit/test_operation.py index a5ed1d30e780..d41aaaacc7df 100644 --- a/speech/tests/unit/test_operation.py +++ b/speech/tests/unit/test_operation.py @@ -44,7 +44,7 @@ def test_constructor(self): @staticmethod def _make_result(transcript, confidence): - from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 + from google.cloud.proto.speech.v1 import cloud_speech_pb2 return cloud_speech_pb2.SpeechRecognitionResult( alternatives=[ @@ -56,13 +56,13 @@ def _make_result(transcript, confidence): ) def _make_operation_pb(self, *results): - from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 + from google.cloud.proto.speech.v1 import cloud_speech_pb2 from google.longrunning import operations_pb2 from google.protobuf.any_pb2 import Any any_pb = None if results: - result_pb = cloud_speech_pb2.AsyncRecognizeResponse( + result_pb = cloud_speech_pb2.LongRunningRecognizeResponse( results=results, ) type_url = 'type.googleapis.com/%s' % ( @@ -108,13 +108,13 @@ def test__update_state_with_response(self): self.assertIsInstance(result.alternatives[0], Alternative) def test__update_state_with_empty_response(self): - from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 + from google.cloud.proto.speech.v1 import cloud_speech_pb2 from google.longrunning import operations_pb2 from google.protobuf.any_pb2 import Any # Simulate an empty response (rather than no response yet, which # is distinct). - response = cloud_speech_pb2.AsyncRecognizeResponse(results=[]) + response = cloud_speech_pb2.LongRunningRecognizeResponse(results=[]) type_url = 'type.googleapis.com/%s' % response.DESCRIPTOR.full_name any_pb = Any( type_url=type_url, diff --git a/speech/tests/unit/test_result.py b/speech/tests/unit/test_result.py index 3e9ba36140d5..1e8971d6028a 100644 --- a/speech/tests/unit/test_result.py +++ b/speech/tests/unit/test_result.py @@ -30,7 +30,7 @@ def test_ctor(self): self.assertIsInstance(result, self._get_target_class()) def test_from_pb(self): - from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 + from google.cloud.proto.speech.v1 import cloud_speech_pb2 confidence = 0.625 transcript = 'this is a test transcript' diff --git a/speech/tests/unit/test_sample.py b/speech/tests/unit/test_sample.py index cbae7da80c8d..73b40dd0041c 100644 --- a/speech/tests/unit/test_sample.py +++ b/speech/tests/unit/test_sample.py @@ -33,10 +33,10 @@ def test_initialize_sample(self): sample = self._make_one( source_uri=self.AUDIO_SOURCE_URI, encoding=Encoding.FLAC, - sample_rate=self.SAMPLE_RATE) + sample_rate_hertz=self.SAMPLE_RATE) self.assertEqual(sample.source_uri, self.AUDIO_SOURCE_URI) self.assertEqual(sample.encoding, Encoding.FLAC) - self.assertEqual(sample.sample_rate, self.SAMPLE_RATE) + self.assertEqual(sample.sample_rate_hertz, self.SAMPLE_RATE) def test_content_and_source_uri(self): from io import BytesIO @@ -64,7 +64,7 @@ def test_stream_property(self): stream = BytesIO(data) sample = self._make_one( stream=stream, encoding=Encoding.FLAC, - sample_rate=self.SAMPLE_RATE) + sample_rate_hertz=self.SAMPLE_RATE) self.assertEqual(sample.stream, stream) self.assertEqual(sample.stream.read(), data) @@ -76,25 +76,27 @@ def test_bytes_converts_to_file_like_object(self): sample = Sample( content=test_bytes, encoding=speech.Encoding.FLAC, - sample_rate=self.SAMPLE_RATE) + sample_rate_hertz=self.SAMPLE_RATE) self.assertEqual(sample.content, test_bytes) self.assertEqual(sample.encoding, speech.Encoding.FLAC) - self.assertEqual(sample.sample_rate, self.SAMPLE_RATE) + self.assertEqual(sample.sample_rate_hertz, self.SAMPLE_RATE) def test_sample_rates(self): from google.cloud.speech.encoding import Encoding with self.assertRaises(ValueError): self._make_one( - source_uri=self.AUDIO_SOURCE_URI, sample_rate=7999) + source_uri=self.AUDIO_SOURCE_URI, sample_rate_hertz=7999) with self.assertRaises(ValueError): self._make_one( - source_uri=self.AUDIO_SOURCE_URI, sample_rate=48001) + source_uri=self.AUDIO_SOURCE_URI, sample_rate_hertz=48001) sample = self._make_one( - source_uri=self.AUDIO_SOURCE_URI, sample_rate=self.SAMPLE_RATE, - encoding=Encoding.FLAC) - self.assertEqual(sample.sample_rate, self.SAMPLE_RATE) + encoding=Encoding.FLAC, + sample_rate_hertz=self.SAMPLE_RATE, + source_uri=self.AUDIO_SOURCE_URI, + ) + self.assertEqual(sample.sample_rate_hertz, self.SAMPLE_RATE) self.assertEqual(sample.encoding, Encoding.FLAC) def test_encoding(self): @@ -102,12 +104,29 @@ def test_encoding(self): with self.assertRaises(ValueError): self._make_one( - source_uri=self.AUDIO_SOURCE_URI, sample_rate=self.SAMPLE_RATE, - encoding='OGG') + encoding='OGG', + sample_rate_hertz=self.SAMPLE_RATE, + source_uri=self.AUDIO_SOURCE_URI, + ) with self.assertRaises(ValueError): self._make_one( - source_uri=self.AUDIO_SOURCE_URI, sample_rate=self.SAMPLE_RATE) + sample_rate_hertz=self.SAMPLE_RATE, + source_uri=self.AUDIO_SOURCE_URI, + ) sample = self._make_one( - source_uri=self.AUDIO_SOURCE_URI, sample_rate=self.SAMPLE_RATE, - encoding=Encoding.FLAC) + encoding=Encoding.FLAC, + sample_rate_hertz=self.SAMPLE_RATE, + source_uri=self.AUDIO_SOURCE_URI, + ) self.assertEqual(sample.encoding, Encoding.FLAC) + + def test_async_linear16_only(self): + from google.cloud.speech.encoding import Encoding + + sample = self._make_one( + encoding=Encoding.FLAC, + sample_rate_hertz=self.SAMPLE_RATE, + source_uri=self.AUDIO_SOURCE_URI, + ) + with self.assertRaises(ValueError): + sample.long_running_recognize(language_code='en-US')