Skip to content
This repository has been archived by the owner on Apr 20, 2024. It is now read-only.

Commit

Permalink
docs(samples): Update code samples for adaptation and VAD (#462)
Browse files Browse the repository at this point in the history
Co-authored-by: AJ Morozoff <amorozoff@google.com>
Co-authored-by: Anthonios Partheniou <partheniou@google.com>
  • Loading branch information
3 people authored Oct 20, 2022
1 parent 22afd43 commit f785ac7
Show file tree
Hide file tree
Showing 15 changed files with 390 additions and 17 deletions.
6 changes: 1 addition & 5 deletions samples/snippets/adaptation_v2_custom_class_reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def adaptation_v2_custom_class_reference(project_id, recognizer_id, phrase_set_i
request = cloud_speech.CreateCustomClassRequest(
parent=f"projects/{project_id}/locations/global",
custom_class_id=custom_class_id,
custom_class=cloud_speech.CustomClass(items=[{"value": "Keem"}]))
custom_class=cloud_speech.CustomClass(items=[{"value": "fare"}]))

operation = client.create_custom_class(request=request)
custom_class = operation.result()
Expand All @@ -70,10 +70,6 @@ def adaptation_v2_custom_class_reference(project_id, recognizer_id, phrase_set_i
auto_decoding_config={}, adaptation=adaptation
)

print(custom_class)
print(phrase_set)
print(config)

request = cloud_speech.RecognizeRequest(
recognizer=recognizer.name, config=config, content=content
)
Expand Down
4 changes: 2 additions & 2 deletions samples/snippets/adaptation_v2_custom_class_reference_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,11 @@ def test_adaptation_v2_custom_class_reference(capsys):
phrase_set_id = "phrase-set-" + str(uuid4())
custom_class_id = "custom-class-" + str(uuid4())
response = adaptation_v2_custom_class_reference.adaptation_v2_custom_class_reference(
project_id, recognizer_id, phrase_set_id, custom_class_id, os.path.join(RESOURCES, "baby_keem.wav")
project_id, recognizer_id, phrase_set_id, custom_class_id, os.path.join(RESOURCES, "fair.wav")
)

assert re.search(
r"play Baby Keem",
r"the word is fare",
response.results[0].alternatives[0].transcript,
re.DOTALL | re.I,
)
Expand Down
4 changes: 2 additions & 2 deletions samples/snippets/adaptation_v2_inline_custom_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ def adaptation_v2_inline_custom_class(project_id, recognizer_id, audio_file):
content = f.read()

# Build inline phrase set to produce a more accurate transcript
phrase_set = cloud_speech.PhraseSet(phrases=[{"value": "${keem}", "boost": 20}])
custom_class = cloud_speech.CustomClass(name="keem", items=[{"value": "Keem"}])
phrase_set = cloud_speech.PhraseSet(phrases=[{"value": "${fare}", "boost": 20}])
custom_class = cloud_speech.CustomClass(name="fare", items=[{"value": "fare"}])
adaptation = cloud_speech.SpeechAdaptation(
phrase_sets=[
cloud_speech.SpeechAdaptation.AdaptationPhraseSet(
Expand Down
4 changes: 2 additions & 2 deletions samples/snippets/adaptation_v2_inline_custom_class_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ def test_adaptation_v2_inline_custom_class(capsys):

recognizer_id = "recognizer-" + str(uuid4())
response = adaptation_v2_inline_custom_class.adaptation_v2_inline_custom_class(
project_id, recognizer_id, os.path.join(RESOURCES, "baby_keem.wav")
project_id, recognizer_id, os.path.join(RESOURCES, "fair.wav")
)

assert re.search(
r"play Baby Keem",
r"the word is fare",
response.results[0].alternatives[0].transcript,
re.DOTALL | re.I,
)
Expand Down
2 changes: 1 addition & 1 deletion samples/snippets/adaptation_v2_inline_phrase_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def adaptation_v2_inline_phrase_set(project_id, recognizer_id, audio_file):
content = f.read()

# Build inline phrase set to produce a more accurate transcript
phrase_set = cloud_speech.PhraseSet(phrases=[{"value": "Keem", "boost": 10}])
phrase_set = cloud_speech.PhraseSet(phrases=[{"value": "fare", "boost": 10}])
adaptation = cloud_speech.SpeechAdaptation(
phrase_sets=[
cloud_speech.SpeechAdaptation.AdaptationPhraseSet(
Expand Down
4 changes: 2 additions & 2 deletions samples/snippets/adaptation_v2_inline_phrase_set_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ def test_adaptation_v2_inline_phrase_set(capsys):

recognizer_id = "recognizer-" + str(uuid4())
response = adaptation_v2_inline_phrase_set.adaptation_v2_inline_phrase_set(
project_id, recognizer_id, os.path.join(RESOURCES, "baby_keem.wav")
project_id, recognizer_id, os.path.join(RESOURCES, "fair.wav")
)

assert re.search(
r"play Baby Keem",
r"the word is fare",
response.results[0].alternatives[0].transcript,
re.DOTALL | re.I,
)
Expand Down
2 changes: 1 addition & 1 deletion samples/snippets/adaptation_v2_phrase_set_reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def adaptation_v2_phrase_set_reference(project_id, recognizer_id, phrase_set_id,
request = cloud_speech.CreatePhraseSetRequest(
parent=f"projects/{project_id}/locations/global",
phrase_set_id=phrase_set_id,
phrase_set=cloud_speech.PhraseSet(phrases=[{"value": "Keem", "boost": 10}]))
phrase_set=cloud_speech.PhraseSet(phrases=[{"value": "fare", "boost": 10}]))

operation = client.create_phrase_set(request=request)
phrase_set = operation.result()
Expand Down
4 changes: 2 additions & 2 deletions samples/snippets/adaptation_v2_phrase_set_reference_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ def test_adaptation_v2_phrase_set_reference(capsys):
recognizer_id = "recognizer-" + str(uuid4())
phrase_set_id = "phrase-set-" + str(uuid4())
response = adaptation_v2_phrase_set_reference.adaptation_v2_phrase_set_reference(
project_id, recognizer_id, phrase_set_id, os.path.join(RESOURCES, "baby_keem.wav")
project_id, recognizer_id, phrase_set_id, os.path.join(RESOURCES, "fair.wav")
)

assert re.search(
r"play Baby Keem",
r"the word is fare",
response.results[0].alternatives[0].transcript,
re.DOTALL | re.I,
)
Expand Down
Binary file not shown.
Binary file removed samples/snippets/resources/baby_keem.wav
Binary file not shown.
Binary file added samples/snippets/resources/fair.wav
Binary file not shown.
108 changes: 108 additions & 0 deletions samples/snippets/transcribe_streaming_voice_activity_events.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# Copyright 2022 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import argparse

# [START speech_transcribe_streaming_voice_activity_events]
import io

from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech


def transcribe_streaming_voice_activity_events(project_id, recognizer_id, audio_file):
# Instantiates a client
client = SpeechClient()

request = cloud_speech.CreateRecognizerRequest(
parent=f"projects/{project_id}/locations/global",
recognizer_id=recognizer_id,
recognizer=cloud_speech.Recognizer(
language_codes=["en-US"], model="latest_long"
),
)

# Creates a Recognizer
operation = client.create_recognizer(request=request)
recognizer = operation.result()

# Reads a file as bytes
with io.open(audio_file, "rb") as f:
content = f.read()

# In practice, stream should be a generator yielding chunks of audio data
chunk_length = len(content) // 5
stream = [
content[start : start + chunk_length]
for start in range(0, len(content), chunk_length)
]
audio_requests = (
cloud_speech.StreamingRecognizeRequest(audio=audio) for audio in stream
)

recognition_config = cloud_speech.RecognitionConfig(auto_decoding_config={})

# Sets the flag to enable voice activity events
streaming_features = cloud_speech.StreamingRecognitionFeatures(
enable_voice_activity_events=True
)
streaming_config = cloud_speech.StreamingRecognitionConfig(
config=recognition_config, streaming_features=streaming_features
)

config_request = cloud_speech.StreamingRecognizeRequest(
recognizer=recognizer.name, streaming_config=streaming_config
)

def requests(config, audio):
yield config
for message in audio:
yield message

# Transcribes the audio into text
responses_iterator = client.streaming_recognize(
requests=requests(config_request, audio_requests)
)
responses = []
for response in responses_iterator:
responses.append(response)
if (
response.speech_event_type
== cloud_speech.StreamingRecognizeResponse.SpeechEventType.SPEECH_ACTIVITY_BEGIN
):
print("Speech started.")
if (
response.speech_event_type
== cloud_speech.StreamingRecognizeResponse.SpeechEventType.SPEECH_ACTIVITY_END
):
print("Speech ended.")
for result in response.results:
print("Transcript: {}".format(result.alternatives[0].transcript))

return responses
# [END speech_transcribe_streaming_voice_activity_events]


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument("project_id", help="project to create recognizer in")
parser.add_argument("recognizer_id", help="name of recognizer to create")
parser.add_argument("audio_file", help="audio file to stream")
args = parser.parse_args()
transcribe_streaming_voice_activity_events(
args.project_id, args.recognizer_id, args.audio_file
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Copyright 2022, Google, Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import re
from uuid import uuid4

from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech

import transcribe_streaming_voice_activity_events

RESOURCES = os.path.join(os.path.dirname(__file__), "resources")


def delete_recognizer(name):
client = SpeechClient()
request = cloud_speech.DeleteRecognizerRequest(name=name)
client.delete_recognizer(request=request)


def test_transcribe_streaming_voice_activity_events(capsys):
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")

recognizer_id = "recognizer-" + str(uuid4())
responses = transcribe_streaming_voice_activity_events.transcribe_streaming_voice_activity_events(
project_id, recognizer_id, os.path.join(RESOURCES, "audio.wav")
)

transcript = ""
for response in responses:
for result in response.results:
transcript += result.alternatives[0].transcript

assert (
responses[0].speech_event_type
== cloud_speech.StreamingRecognizeResponse.SpeechEventType.SPEECH_ACTIVITY_BEGIN
)

assert re.search(
r"how old is the Brooklyn Bridge",
transcript,
re.DOTALL | re.I,
)

delete_recognizer(
f"projects/{project_id}/locations/global/recognizers/{recognizer_id}"
)
Loading

0 comments on commit f785ac7

Please sign in to comment.