Speech v1 (#3266)

This updates our manual client library to the Speech v1 API. This entails several **backwards incompatible changes**: * The `language_code` parameter is no longer optional anywhere. It must be explicitly specified, and does _not_ default to `'en-US'`. * The `sync_recognize` method has been renamed to `recognize` on every class where it appears. * The `async_recognize` method has been renamed to `long_running_recognize` on every class where it appears. * The `sample_rate` parameter and property has been renamed to `sample_rate_hertz` everywhere it appears. Additionally, the backend API contains a backwards incompatible change which does not require a code change in the client library, but will likely require one downstream: The `START_OF_SPEECH`, `END_OF_SPEECH`, and `END_OF_AUDIO` events have been removed.
googleapis · Apr 5, 2017 · d225480 · d225480
1 parent bc6f2ef
commit d225480
Show file tree

Hide file tree

Showing 17 changed files with 346 additions and 297 deletions.
diff --git a/packages/google-cloud-python-speech/google/cloud/speech/_gax.py b/packages/google-cloud-python-speech/google/cloud/speech/_gax.py
@@ -14,16 +14,15 @@
 
 """GAX/GAPIC module for managing Speech API requests."""
 
-
-from google.cloud.gapic.speech.v1beta1.speech_client import SpeechClient
-from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import RecognitionAudio
-from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import (
+from google.cloud.gapic.speech.v1.speech_client import SpeechClient
+from google.cloud.proto.speech.v1.cloud_speech_pb2 import RecognitionAudio
+from google.cloud.proto.speech.v1.cloud_speech_pb2 import (
     RecognitionConfig)
-from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import (
+from google.cloud.proto.speech.v1.cloud_speech_pb2 import (
     SpeechContext)
-from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import (
+from google.cloud.proto.speech.v1.cloud_speech_pb2 import (
     StreamingRecognitionConfig)
-from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import (
+from google.cloud.proto.speech.v1.cloud_speech_pb2 import (
     StreamingRecognizeRequest)
 from google.longrunning import operations_grpc
 
@@ -62,23 +61,22 @@ def __init__(self, client=None):
             OPERATIONS_API_HOST,
         )
 
-    def async_recognize(self, sample, language_code=None,
-                        max_alternatives=None, profanity_filter=None,
-                        speech_context=None):
-        """Asychronous Recognize request to Google Speech API.
+    def long_running_recognize(self, sample, language_code,
+                               max_alternatives=None, profanity_filter=None,
+                               speech_contexts=()):
+        """Long-running Recognize request to Google Speech API.
 
-        .. _async_recognize: https://cloud.google.com/speech/reference/\
-                             rest/v1beta1/speech/asyncrecognize
+        .. _long_running_recognize: https://cloud.google.com/speech/reference/\
+                                    rest/v1/speech/longrunningrecognize
 
-        See `async_recognize`_.
+        See `long_running_recognize`_.
 
         :type sample: :class:`~google.cloud.speech.sample.Sample`
         :param sample: Instance of ``Sample`` containing audio information.
 
         :type language_code: str
-        :param language_code: (Optional) The language of the supplied audio as
-                              BCP-47 language tag. Example: ``'en-GB'``.
-                              If omitted, defaults to ``'en-US'``.
+        :param language_code: The language of the supplied audio as
+                              BCP-47 language tag. Example: ``'en-US'``.
 
         :type max_alternatives: int
         :param max_alternatives: (Optional) Maximum number of recognition
@@ -94,8 +92,8 @@ def async_recognize(self, sample, language_code=None,
                                  asterisks, e.g. ``'f***'``. If False or
                                  omitted, profanities won't be filtered out.
 
-        :type speech_context: list
-        :param speech_context: A list of strings (max 50) containing words and
+        :type speech_contexts: list
+        :param speech_contexts: A list of strings (max 50) containing words and
                                phrases "hints" so that the speech recognition
                                is more likely to recognize them. This can be
                                used to improve the accuracy for specific words
@@ -106,21 +104,27 @@ def async_recognize(self, sample, language_code=None,
         :returns: Instance of ``Operation`` to poll for results.
         """
         config = RecognitionConfig(
-            encoding=sample.encoding, sample_rate=sample.sample_rate,
-            language_code=language_code, max_alternatives=max_alternatives,
+            encoding=sample.encoding,
+            language_code=language_code,
+            max_alternatives=max_alternatives,
             profanity_filter=profanity_filter,
-            speech_context=SpeechContext(phrases=speech_context))
+            sample_rate_hertz=sample.sample_rate_hertz,
+            speech_contexts=[SpeechContext(phrases=speech_contexts)],
+        )
 
         audio = RecognitionAudio(content=sample.content,
                                  uri=sample.source_uri)
         api = self._gapic_api
-        operation_future = api.async_recognize(config=config, audio=audio)
+        operation_future = api.long_running_recognize(
+            audio=audio,
+            config=config,
+        )
 
         return Operation.from_pb(operation_future.last_operation_data(), self)
 
-    def streaming_recognize(self, sample, language_code=None,
+    def streaming_recognize(self, sample, language_code,
                             max_alternatives=None, profanity_filter=None,
-                            speech_context=None, single_utterance=False,
+                            speech_contexts=(), single_utterance=False,
                             interim_results=False):
         """Streaming speech recognition.
 
@@ -136,9 +140,8 @@ def streaming_recognize(self, sample, language_code=None,
         :param sample: Instance of ``Sample`` containing audio information.
 
         :type language_code: str
-        :param language_code: (Optional) The language of the supplied audio as
-                              BCP-47 language tag. Example: ``'en-GB'``.
-                              If omitted, defaults to ``'en-US'``.
+        :param language_code: The language of the supplied audio as
+                              BCP-47 language tag. Example: ``'en-US'``.
 
         :type max_alternatives: int
         :param max_alternatives: (Optional) Maximum number of recognition
@@ -154,8 +157,8 @@ def streaming_recognize(self, sample, language_code=None,
                                  asterisks, e.g. ``'f***'``. If False or
                                  omitted, profanities won't be filtered out.
 
-        :type speech_context: list
-        :param speech_context: A list of strings (max 50) containing words and
+        :type speech_contexts: list
+        :param speech_contexts: A list of strings (max 50) containing words and
                                phrases "hints" so that the speech recognition
                                is more likely to recognize them. This can be
                                used to improve the accuracy for specific words
@@ -190,7 +193,7 @@ def streaming_recognize(self, sample, language_code=None,
         :raises: :class:`ValueError` if sample.content is not a file-like
                  object. :class:`ValueError` if stream has closed.
 
-        :rtype: :class:`~google.cloud.grpc.speech.v1beta1\
+        :rtype: :class:`~google.cloud.grpc.speech.v1\
                        .cloud_speech_pb2.StreamingRecognizeResponse`
         :returns: ``StreamingRecognizeResponse`` instances.
         """
@@ -200,29 +203,28 @@ def streaming_recognize(self, sample, language_code=None,
         requests = _stream_requests(sample, language_code=language_code,
                                     max_alternatives=max_alternatives,
                                     profanity_filter=profanity_filter,
-                                    speech_context=speech_context,
+                                    speech_contexts=speech_contexts,
                                     single_utterance=single_utterance,
                                     interim_results=interim_results)
         api = self._gapic_api
         responses = api.streaming_recognize(requests)
         return responses
 
-    def sync_recognize(self, sample, language_code=None, max_alternatives=None,
-                       profanity_filter=None, speech_context=None):
+    def recognize(self, sample, language_code, max_alternatives=None,
+                  profanity_filter=None, speech_contexts=()):
         """Synchronous Speech Recognition.
 
-        .. _sync_recognize: https://cloud.google.com/speech/reference/\
-                            rest/v1beta1/speech/syncrecognize
+        .. _recognize: https://cloud.google.com/speech/reference/\
+                       rest/v1/speech/recognize
 
-        See `sync_recognize`_.
+        See `recognize`_.
 
         :type sample: :class:`~google.cloud.speech.sample.Sample`
         :param sample: Instance of ``Sample`` containing audio information.
 
         :type language_code: str
-        :param language_code: (Optional) The language of the supplied audio as
-                              BCP-47 language tag. Example: ``'en-GB'``.
-                              If omitted, defaults to ``'en-US'``.
+        :param language_code: The language of the supplied audio as
+                              BCP-47 language tag. Example: ``'en-US'``.
 
         :type max_alternatives: int
         :param max_alternatives: (Optional) Maximum number of recognition
@@ -238,8 +240,8 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None,
                                  asterisks, e.g. ``'f***'``. If False or
                                  omitted, profanities won't be filtered out.
 
-        :type speech_context: list
-        :param speech_context: A list of strings (max 50) containing words and
+        :type speech_contexts: list
+        :param speech_contexts: A list of strings (max 50) containing words and
                                phrases "hints" so that the speech recognition
                                is more likely to recognize them. This can be
                                used to improve the accuracy for specific words
@@ -252,14 +254,17 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None,
         :raises: ValueError if there are no results.
         """
         config = RecognitionConfig(
-            encoding=sample.encoding, sample_rate=sample.sample_rate,
-            language_code=language_code, max_alternatives=max_alternatives,
+            encoding=sample.encoding,
+            language_code=language_code,
+            max_alternatives=max_alternatives,
             profanity_filter=profanity_filter,
-            speech_context=SpeechContext(phrases=speech_context))
+            sample_rate_hertz=sample.sample_rate_hertz,
+            speech_contexts=[SpeechContext(phrases=speech_contexts)],
+        )
         audio = RecognitionAudio(content=sample.content,
                                  uri=sample.source_uri)
         api = self._gapic_api
-        api_response = api.sync_recognize(config=config, audio=audio)
+        api_response = api.recognize(config=config, audio=audio)
 
         # Sanity check: If we got no results back, raise an error.
         if len(api_response.results) == 0:
@@ -269,18 +274,17 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None,
         return [Result.from_pb(result) for result in api_response.results]
 
 
-def _stream_requests(sample, language_code=None, max_alternatives=None,
-                     profanity_filter=None, speech_context=None,
+def _stream_requests(sample, language_code, max_alternatives=None,
+                     profanity_filter=None, speech_contexts=(),
                      single_utterance=None, interim_results=None):
     """Generate stream of requests from sample.
 
     :type sample: :class:`~google.cloud.speech.sample.Sample`
     :param sample: Instance of ``Sample`` containing audio information.
 
     :type language_code: str
-    :param language_code: (Optional) The language of the supplied audio as
-                          BCP-47 language tag. Example: ``'en-GB'``.
-                          If omitted, defaults to ``'en-US'``.
+    :param language_code: The language of the supplied audio as
+                          BCP-47 language tag. Example: ``'en-US'``.
 
     :type max_alternatives: int
     :param max_alternatives: (Optional) Maximum number of recognition
@@ -296,13 +300,14 @@ def _stream_requests(sample, language_code=None, max_alternatives=None,
                              asterisks, e.g. ``'f***'``. If False or
                              omitted, profanities won't be filtered out.
 
-    :type speech_context: list
-    :param speech_context: (Optional) A list of strings (max 50) containing
-                           words and phrases "hints" so that the speech
-                           recognition is more likely to recognize them.
-                           This can be used to improve the accuracy for
-                           specific words and phrases. This can also be used to
-                           add new words to the vocabulary of the recognizer.
+    :type speech_contexts: list
+    :param speech_contexts: (Optional) A list of strings (max 50) containing
+                            words and phrases "hints" so that the speech
+                            recognition is more likely to recognize them.
+                            This can be used to improve the accuracy for
+                            specific words and phrases. This can also be used
+                            to add new words to the vocabulary of the
+                            recognizer.
 
     :type single_utterance: bool
     :param single_utterance: (Optional) If false or omitted, the recognizer
@@ -333,7 +338,7 @@ def _stream_requests(sample, language_code=None, max_alternatives=None,
     config_request = _make_streaming_request(
         sample, language_code=language_code, max_alternatives=max_alternatives,
         profanity_filter=profanity_filter,
-        speech_context=SpeechContext(phrases=speech_context),
+        speech_contexts=[SpeechContext(phrases=speech_contexts)],
         single_utterance=single_utterance, interim_results=interim_results)
 
     # The config request MUST go first and not contain any audio data.
@@ -348,7 +353,7 @@ def _stream_requests(sample, language_code=None, max_alternatives=None,
 
 def _make_streaming_request(sample, language_code,
                             max_alternatives, profanity_filter,
-                            speech_context, single_utterance,
+                            speech_contexts, single_utterance,
                             interim_results):
     """Build streaming request.
 
@@ -374,8 +379,8 @@ def _make_streaming_request(sample, language_code,
                              asterisks, e.g. ``'f***'``. If False or
                              omitted, profanities won't be filtered out.
 
-    :type speech_context: list
-    :param speech_context: A list of strings (max 50) containing words and
+    :type speech_contexts: list
+    :param speech_contexts: A list of strings (max 50) containing words and
                            phrases "hints" so that the speech recognition
                            is more likely to recognize them. This can be
                            used to improve the accuracy for specific words
@@ -409,13 +414,17 @@ def _make_streaming_request(sample, language_code,
                             returned.
 
     :rtype:
-        :class:`~grpc.speech.v1beta1.cloud_speech_pb2.StreamingRecognizeRequest`
+        :class:`~grpc.speech.v1.cloud_speech_pb2.StreamingRecognizeRequest`
     :returns: Instance of ``StreamingRecognizeRequest``.
     """
     config = RecognitionConfig(
-        encoding=sample.encoding, sample_rate=sample.sample_rate,
-        language_code=language_code, max_alternatives=max_alternatives,
-        profanity_filter=profanity_filter, speech_context=speech_context)
+        encoding=sample.encoding,
+        language_code=language_code,
+        max_alternatives=max_alternatives,
+        profanity_filter=profanity_filter,
+        sample_rate_hertz=sample.sample_rate_hertz,
+        speech_contexts=speech_contexts,
+    )
 
     streaming_config = StreamingRecognitionConfig(
         config=config, single_utterance=single_utterance,