Make API surface more usable.

googleapis · Oct 13, 2016 · a49643f · a49643f
1 parent fd07cd7
commit a49643f
Show file tree

Hide file tree

Showing 9 changed files with 159 additions and 247 deletions.
diff --git a/docs/speech-usage.rst b/docs/speech-usage.rst
@@ -157,24 +157,47 @@ data to possible text alternatives on the fly.
 
     See: https://cloud.google.com/speech/limits#content
 
+.. code-block:: python
+
     >>> import io
     >>> from google.cloud import speech
     >>> from google.cloud.speech.encoding import Encoding
     >>> client = speech.Client()
     >>> with io.open('./hello.wav', 'rb') as stream:
     >>>     sample = client.sample(stream=stream, encoding=Encoding.LINEAR16,
     ...                            sample_rate=16000)
-    >>>     stream_container = client.stream_recognize(sample)
-    >>> print(stream_container)
-    <google.cloud.speech.streaming.container.StreamingResponseContainer object at 0x10538ee10>
-    >>> print(stream_container.responses)
-    {0: <google.cloud.speech.streaming.response.StreamingSpeechResponse object at 0x10f9ac9d0>}
-    >>> print(stream_container.responses[0].results[0].alternatives[0].confidence)
-    0.698092460632
-    >>> print(stream_container.is_finished)
+    >>>     for response in client.stream_recognize(sample):
+    ...         print(response.transcript)
+    hello
+    ...         print(response.is_final)
     True
-    >>> print stream_container.get_full_text()
+
+
+By setting ``interim_results`` to true, interim results (tentative hypotheses)
+may be returned as they become available (these interim results are indicated
+with the is_final=false flag). If false or omitted, only is_final=true
+result(s) are returned.
+
+.. code-block:: python
+
+    >>> import io
+    >>> from google.cloud import speech
+    >>> from google.cloud.speech.encoding import Encoding
+    >>> client = speech.Client()
+    >>> with io.open('./hello.wav', 'rb') as stream:
+    >>>     sample = client.sample(stream=stream, encoding=Encoding.LINEAR16,
+    ...                            sample_rate=16000)
+    >>>     for response in client.stream_recognize(sample,
+    ...                                             interim_results=True):
+    ...         print(response.transcript)
+    hell
+    ...         print(response.is_final)
+    False
+    ...         print(response.transcript)
     hello
+    ...         print(response.is_final)
+    True
+
 
 By default the recognizer will perform continuous recognition
 (continuing to process audio even if the user pauses speaking) until the client
@@ -195,32 +218,6 @@ See: `Single Utterance`_
     >>> print(stream_container.get_full_text())
     hello
 
-
-If ``interim_results`` is set to ``True``, interim results
-(tentative hypotheses) may be returned as they become available.
-
-  .. code-block:: python
-
-    >>> with io.open('./hello_pause_goodbye.wav', 'rb') as stream:
-    >>>     sample = client.sample(stream=stream, encoding=Encoding.LINEAR16,
-    ...                            sample_rate=16000)
-    >>>     stream_container = client.stream_recognize(sample,
-    ...                                                interim_results=True)
-    >>> print(stream_container.get_full_text())
-    hello
-
-    >>> sample = client.sample(source_uri='gs://my-bucket/recording.flac',
-    ...                        encoding=Encoding.FLAC,
-    ...                        sample_rate=44100)
-    >>> results = client.stream_recognize(sample, interim_results=True)
-    >>> print(stream_container.responses[0].results[0].alternatives[0].transcript)
-    how
-    print(stream_container.responses[1].results[0].alternatives[0].transcript)
-    hello
-    >>> print(stream_container.responses[1].results[2].is_final)
-    True
-
-
 .. _Single Utterance: https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#streamingrecognitionconfig
 .. _sync_recognize: https://cloud.google.com/speech/reference/rest/v1beta1/speech/syncrecognize
 .. _Speech Asynchronous Recognize: https://cloud.google.com/speech/reference/rest/v1beta1/speech/asyncrecognize
diff --git a/speech/google/cloud/speech/client.py b/speech/google/cloud/speech/client.py
@@ -26,7 +26,7 @@
 from google.cloud.speech.operation import Operation
 from google.cloud.speech.streaming.request import _make_request_stream
 from google.cloud.speech.sample import Sample
-from google.cloud.speech.streaming.container import StreamingResponseContainer
+from google.cloud.speech.streaming.response import StreamingSpeechResponse
 
 try:
     from google.cloud.gapic.speech.v1beta1.speech_api import SpeechApi
@@ -302,12 +302,9 @@ def stream_recognize(self, sample, language_code=None,
                                         single_utterance=single_utterance,
                                         interim_results=interim_results)
 
-        responses = StreamingResponseContainer()
         for response in self.speech_api.streaming_recognize(requests):
-            if response:
-                responses.add_response(response)
-
-        return responses
+            if hasattr(response, 'results') or interim_results:
+                yield StreamingSpeechResponse.from_pb(response)
 
     @property
     def speech_api(self):

diff --git a/speech/google/cloud/speech/streaming/container.py b/speech/google/cloud/speech/streaming/container.py
diff --git a/speech/google/cloud/speech/streaming/endpointer_type.py b/speech/google/cloud/speech/streaming/endpointer_type.py
@@ -0,0 +1,14 @@
+class EndpointerType(object):
+    ENDPOINTER_EVENT_UNSPECIFIED = 0
+    START_OF_SPEECH = 1
+    END_OF_SPEECH = 2
+    END_OF_AUDIO = 3
+    END_OF_UTTERANCE = 4
+
+    reverse_map = {
+        0: 'ENDPOINTER_EVENT_UNSPECIFIED',
+        1: 'START_OF_SPEECH',
+        2: 'END_OF_SPEECH',
+        3: 'END_OF_AUDIO',
+        4: 'END_OF_UTTERANCE'
+    }
diff --git a/speech/google/cloud/speech/streaming/response.py b/speech/google/cloud/speech/streaming/response.py
@@ -14,6 +14,7 @@
 
 """Representation of a GAPIC Speech API response."""
 
+from google.cloud.speech.streaming.endpointer_type import EndpointerType
 from google.cloud.speech.streaming.result import StreamingSpeechResult
 
 
@@ -34,9 +35,12 @@ class StreamingSpeechResponse(object):
     :param result_index: Index for specific result set. Used for updating with
                          ``interim_results``.
     """
-    def __init__(self, error, endpointer_type, results, result_index):
+    def __init__(self, error=None, endpointer_type=None, results=None,
+                 result_index=None):
+        results = results or []
         self._error = error
-        self._endpointer_type = endpointer_type  # Should be enum.
+        self._endpointer_type = EndpointerType.reverse_map.get(
+            endpointer_type, None)
         self._result_index = result_index
         self._results = [StreamingSpeechResult.from_pb(result)
                          for result in results]
@@ -56,7 +60,41 @@ def from_pb(cls, pb_response):
         endpointer_type = pb_response.endpointer_type
         results = pb_response.results
         result_index = pb_response.result_index
-        return cls(error, endpointer_type, results, result_index)
+        return cls(error=error, endpointer_type=endpointer_type,
+                   results=results, result_index=result_index)
+
+    @property
+    def confidence(self):
+        """Confidence score for recognized speech.
+
+        :rtype: float
+        :returns: Confidence score of recognized speech [0.0-1.0].
+        """
+        if self.results and self.results[0].alternatives:
+                return self.results[0].alternatives[0].confidence
+        else:
+            return 0.0
+
+    @property
+    def endpointer_type(self):
+        """Endpointer indicating the state of the speech detection.
+
+        :rtype: str
+        :returns: String derived from :class:`~endpointer_type.EndpointerType`.
+        """
+        return self._endpointer_type
+
+    @property
+    def is_final(self):
+        """Represents an interim result that may change.
+
+        :rtype: bool
+        :returns: True if the result has completed it's processing.
+        """
+        if len(self.results):
+            return self.results[0].is_final
+        else:
+            return False
 
     @property
     def result_index(self):
@@ -75,3 +113,15 @@ def results(self):
         :returns: List of ``StreamingSpeechResult`` in this response.
         """
         return self._results
+
+    @property
+    def transcript(self):
+        """Get most likely transcript from response.
+
+        :rtype: str
+        :returns: Transcript text from response.
+        """
+        if self.results and self.results[0].alternatives:
+                return self.results[0].alternatives[0].transcript
+        else:
+            return ''
diff --git a/speech/google/cloud/speech/streaming/result.py b/speech/google/cloud/speech/streaming/result.py
@@ -70,4 +70,4 @@ def is_final(self):
         :rtype: bool
         :returns: True if the result has completed it's processing.
         """
-        return self._is_final
+        return bool(self._is_final)