Skip to content

Commit

Permalink
Speech v1 (#3266)
Browse files Browse the repository at this point in the history
This updates our manual client library to the Speech v1 API.

This entails several **backwards incompatible changes**:

  * The `language_code` parameter is no longer optional anywhere. It must be explicitly specified, and does _not_ default to `'en-US'`.
  * The `sync_recognize` method has been renamed to `recognize` on every class where it appears.
  * The `async_recognize` method has been renamed to `long_running_recognize` on every class where it appears.
  * The `sample_rate` parameter and property has been renamed to `sample_rate_hertz` everywhere it appears.

Additionally, the backend API contains a backwards incompatible change which does not require a code change in the client library, but will likely require one downstream: The `START_OF_SPEECH`, `END_OF_SPEECH`, and `END_OF_AUDIO` events have been removed.
  • Loading branch information
lukesneeringer authored Apr 5, 2017
1 parent bc6f2ef commit d225480
Show file tree
Hide file tree
Showing 17 changed files with 346 additions and 297 deletions.
141 changes: 75 additions & 66 deletions packages/google-cloud-python-speech/google/cloud/speech/_gax.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,15 @@

"""GAX/GAPIC module for managing Speech API requests."""


from google.cloud.gapic.speech.v1beta1.speech_client import SpeechClient
from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import RecognitionAudio
from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import (
from google.cloud.gapic.speech.v1.speech_client import SpeechClient
from google.cloud.proto.speech.v1.cloud_speech_pb2 import RecognitionAudio
from google.cloud.proto.speech.v1.cloud_speech_pb2 import (
RecognitionConfig)
from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import (
from google.cloud.proto.speech.v1.cloud_speech_pb2 import (
SpeechContext)
from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import (
from google.cloud.proto.speech.v1.cloud_speech_pb2 import (
StreamingRecognitionConfig)
from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import (
from google.cloud.proto.speech.v1.cloud_speech_pb2 import (
StreamingRecognizeRequest)
from google.longrunning import operations_grpc

Expand Down Expand Up @@ -62,23 +61,22 @@ def __init__(self, client=None):
OPERATIONS_API_HOST,
)

def async_recognize(self, sample, language_code=None,
max_alternatives=None, profanity_filter=None,
speech_context=None):
"""Asychronous Recognize request to Google Speech API.
def long_running_recognize(self, sample, language_code,
max_alternatives=None, profanity_filter=None,
speech_contexts=()):
"""Long-running Recognize request to Google Speech API.
.. _async_recognize: https://cloud.google.com/speech/reference/\
rest/v1beta1/speech/asyncrecognize
.. _long_running_recognize: https://cloud.google.com/speech/reference/\
rest/v1/speech/longrunningrecognize
See `async_recognize`_.
See `long_running_recognize`_.
:type sample: :class:`~google.cloud.speech.sample.Sample`
:param sample: Instance of ``Sample`` containing audio information.
:type language_code: str
:param language_code: (Optional) The language of the supplied audio as
BCP-47 language tag. Example: ``'en-GB'``.
If omitted, defaults to ``'en-US'``.
:param language_code: The language of the supplied audio as
BCP-47 language tag. Example: ``'en-US'``.
:type max_alternatives: int
:param max_alternatives: (Optional) Maximum number of recognition
Expand All @@ -94,8 +92,8 @@ def async_recognize(self, sample, language_code=None,
asterisks, e.g. ``'f***'``. If False or
omitted, profanities won't be filtered out.
:type speech_context: list
:param speech_context: A list of strings (max 50) containing words and
:type speech_contexts: list
:param speech_contexts: A list of strings (max 50) containing words and
phrases "hints" so that the speech recognition
is more likely to recognize them. This can be
used to improve the accuracy for specific words
Expand All @@ -106,21 +104,27 @@ def async_recognize(self, sample, language_code=None,
:returns: Instance of ``Operation`` to poll for results.
"""
config = RecognitionConfig(
encoding=sample.encoding, sample_rate=sample.sample_rate,
language_code=language_code, max_alternatives=max_alternatives,
encoding=sample.encoding,
language_code=language_code,
max_alternatives=max_alternatives,
profanity_filter=profanity_filter,
speech_context=SpeechContext(phrases=speech_context))
sample_rate_hertz=sample.sample_rate_hertz,
speech_contexts=[SpeechContext(phrases=speech_contexts)],
)

audio = RecognitionAudio(content=sample.content,
uri=sample.source_uri)
api = self._gapic_api
operation_future = api.async_recognize(config=config, audio=audio)
operation_future = api.long_running_recognize(
audio=audio,
config=config,
)

return Operation.from_pb(operation_future.last_operation_data(), self)

def streaming_recognize(self, sample, language_code=None,
def streaming_recognize(self, sample, language_code,
max_alternatives=None, profanity_filter=None,
speech_context=None, single_utterance=False,
speech_contexts=(), single_utterance=False,
interim_results=False):
"""Streaming speech recognition.
Expand All @@ -136,9 +140,8 @@ def streaming_recognize(self, sample, language_code=None,
:param sample: Instance of ``Sample`` containing audio information.
:type language_code: str
:param language_code: (Optional) The language of the supplied audio as
BCP-47 language tag. Example: ``'en-GB'``.
If omitted, defaults to ``'en-US'``.
:param language_code: The language of the supplied audio as
BCP-47 language tag. Example: ``'en-US'``.
:type max_alternatives: int
:param max_alternatives: (Optional) Maximum number of recognition
Expand All @@ -154,8 +157,8 @@ def streaming_recognize(self, sample, language_code=None,
asterisks, e.g. ``'f***'``. If False or
omitted, profanities won't be filtered out.
:type speech_context: list
:param speech_context: A list of strings (max 50) containing words and
:type speech_contexts: list
:param speech_contexts: A list of strings (max 50) containing words and
phrases "hints" so that the speech recognition
is more likely to recognize them. This can be
used to improve the accuracy for specific words
Expand Down Expand Up @@ -190,7 +193,7 @@ def streaming_recognize(self, sample, language_code=None,
:raises: :class:`ValueError` if sample.content is not a file-like
object. :class:`ValueError` if stream has closed.
:rtype: :class:`~google.cloud.grpc.speech.v1beta1\
:rtype: :class:`~google.cloud.grpc.speech.v1\
.cloud_speech_pb2.StreamingRecognizeResponse`
:returns: ``StreamingRecognizeResponse`` instances.
"""
Expand All @@ -200,29 +203,28 @@ def streaming_recognize(self, sample, language_code=None,
requests = _stream_requests(sample, language_code=language_code,
max_alternatives=max_alternatives,
profanity_filter=profanity_filter,
speech_context=speech_context,
speech_contexts=speech_contexts,
single_utterance=single_utterance,
interim_results=interim_results)
api = self._gapic_api
responses = api.streaming_recognize(requests)
return responses

def sync_recognize(self, sample, language_code=None, max_alternatives=None,
profanity_filter=None, speech_context=None):
def recognize(self, sample, language_code, max_alternatives=None,
profanity_filter=None, speech_contexts=()):
"""Synchronous Speech Recognition.
.. _sync_recognize: https://cloud.google.com/speech/reference/\
rest/v1beta1/speech/syncrecognize
.. _recognize: https://cloud.google.com/speech/reference/\
rest/v1/speech/recognize
See `sync_recognize`_.
See `recognize`_.
:type sample: :class:`~google.cloud.speech.sample.Sample`
:param sample: Instance of ``Sample`` containing audio information.
:type language_code: str
:param language_code: (Optional) The language of the supplied audio as
BCP-47 language tag. Example: ``'en-GB'``.
If omitted, defaults to ``'en-US'``.
:param language_code: The language of the supplied audio as
BCP-47 language tag. Example: ``'en-US'``.
:type max_alternatives: int
:param max_alternatives: (Optional) Maximum number of recognition
Expand All @@ -238,8 +240,8 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None,
asterisks, e.g. ``'f***'``. If False or
omitted, profanities won't be filtered out.
:type speech_context: list
:param speech_context: A list of strings (max 50) containing words and
:type speech_contexts: list
:param speech_contexts: A list of strings (max 50) containing words and
phrases "hints" so that the speech recognition
is more likely to recognize them. This can be
used to improve the accuracy for specific words
Expand All @@ -252,14 +254,17 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None,
:raises: ValueError if there are no results.
"""
config = RecognitionConfig(
encoding=sample.encoding, sample_rate=sample.sample_rate,
language_code=language_code, max_alternatives=max_alternatives,
encoding=sample.encoding,
language_code=language_code,
max_alternatives=max_alternatives,
profanity_filter=profanity_filter,
speech_context=SpeechContext(phrases=speech_context))
sample_rate_hertz=sample.sample_rate_hertz,
speech_contexts=[SpeechContext(phrases=speech_contexts)],
)
audio = RecognitionAudio(content=sample.content,
uri=sample.source_uri)
api = self._gapic_api
api_response = api.sync_recognize(config=config, audio=audio)
api_response = api.recognize(config=config, audio=audio)

# Sanity check: If we got no results back, raise an error.
if len(api_response.results) == 0:
Expand All @@ -269,18 +274,17 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None,
return [Result.from_pb(result) for result in api_response.results]


def _stream_requests(sample, language_code=None, max_alternatives=None,
profanity_filter=None, speech_context=None,
def _stream_requests(sample, language_code, max_alternatives=None,
profanity_filter=None, speech_contexts=(),
single_utterance=None, interim_results=None):
"""Generate stream of requests from sample.
:type sample: :class:`~google.cloud.speech.sample.Sample`
:param sample: Instance of ``Sample`` containing audio information.
:type language_code: str
:param language_code: (Optional) The language of the supplied audio as
BCP-47 language tag. Example: ``'en-GB'``.
If omitted, defaults to ``'en-US'``.
:param language_code: The language of the supplied audio as
BCP-47 language tag. Example: ``'en-US'``.
:type max_alternatives: int
:param max_alternatives: (Optional) Maximum number of recognition
Expand All @@ -296,13 +300,14 @@ def _stream_requests(sample, language_code=None, max_alternatives=None,
asterisks, e.g. ``'f***'``. If False or
omitted, profanities won't be filtered out.
:type speech_context: list
:param speech_context: (Optional) A list of strings (max 50) containing
words and phrases "hints" so that the speech
recognition is more likely to recognize them.
This can be used to improve the accuracy for
specific words and phrases. This can also be used to
add new words to the vocabulary of the recognizer.
:type speech_contexts: list
:param speech_contexts: (Optional) A list of strings (max 50) containing
words and phrases "hints" so that the speech
recognition is more likely to recognize them.
This can be used to improve the accuracy for
specific words and phrases. This can also be used
to add new words to the vocabulary of the
recognizer.
:type single_utterance: bool
:param single_utterance: (Optional) If false or omitted, the recognizer
Expand Down Expand Up @@ -333,7 +338,7 @@ def _stream_requests(sample, language_code=None, max_alternatives=None,
config_request = _make_streaming_request(
sample, language_code=language_code, max_alternatives=max_alternatives,
profanity_filter=profanity_filter,
speech_context=SpeechContext(phrases=speech_context),
speech_contexts=[SpeechContext(phrases=speech_contexts)],
single_utterance=single_utterance, interim_results=interim_results)

# The config request MUST go first and not contain any audio data.
Expand All @@ -348,7 +353,7 @@ def _stream_requests(sample, language_code=None, max_alternatives=None,

def _make_streaming_request(sample, language_code,
max_alternatives, profanity_filter,
speech_context, single_utterance,
speech_contexts, single_utterance,
interim_results):
"""Build streaming request.
Expand All @@ -374,8 +379,8 @@ def _make_streaming_request(sample, language_code,
asterisks, e.g. ``'f***'``. If False or
omitted, profanities won't be filtered out.
:type speech_context: list
:param speech_context: A list of strings (max 50) containing words and
:type speech_contexts: list
:param speech_contexts: A list of strings (max 50) containing words and
phrases "hints" so that the speech recognition
is more likely to recognize them. This can be
used to improve the accuracy for specific words
Expand Down Expand Up @@ -409,13 +414,17 @@ def _make_streaming_request(sample, language_code,
returned.
:rtype:
:class:`~grpc.speech.v1beta1.cloud_speech_pb2.StreamingRecognizeRequest`
:class:`~grpc.speech.v1.cloud_speech_pb2.StreamingRecognizeRequest`
:returns: Instance of ``StreamingRecognizeRequest``.
"""
config = RecognitionConfig(
encoding=sample.encoding, sample_rate=sample.sample_rate,
language_code=language_code, max_alternatives=max_alternatives,
profanity_filter=profanity_filter, speech_context=speech_context)
encoding=sample.encoding,
language_code=language_code,
max_alternatives=max_alternatives,
profanity_filter=profanity_filter,
sample_rate_hertz=sample.sample_rate_hertz,
speech_contexts=speech_contexts,
)

streaming_config = StreamingRecognitionConfig(
config=config, single_utterance=single_utterance,
Expand Down
Loading

0 comments on commit d225480

Please sign in to comment.