Skip to content

Commit

Permalink
Simplified 5 speech samples (set-1) (#12110)
Browse files Browse the repository at this point in the history
* Simplified 5 samples
  • Loading branch information
Thoughtseize1 authored Aug 8, 2024
1 parent ec870b1 commit 8db931d
Show file tree
Hide file tree
Showing 10 changed files with 86 additions and 133 deletions.
36 changes: 15 additions & 21 deletions speech/snippets/transcribe_batch_dynamic_batching_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,24 @@
# limitations under the License.


import argparse

# [START speech_transcribe_batch_dynamic_batching_v2]
import os

from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech

PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")


def transcribe_batch_dynamic_batching_v2(
project_id: str,
gcs_uri: str,
audio_uri: str,
) -> cloud_speech.BatchRecognizeResults:
"""Transcribes audio from a Google Cloud Storage URI.
"""Transcribes audio from a Google Cloud Storage URI using dynamic batching.
Args:
project_id: The Google Cloud project ID.
gcs_uri: The Google Cloud Storage URI.
audio_uri (str): The Cloud Storage URI of the input audio.
E.g., gs://[BUCKET]/[FILE]
Returns:
The RecognizeResponse.
cloud_speech.BatchRecognizeResults: The response containing the transcription results.
"""
# Instantiates a client
client = SpeechClient()
Expand All @@ -42,10 +41,10 @@ def transcribe_batch_dynamic_batching_v2(
model="long",
)

file_metadata = cloud_speech.BatchRecognizeFileMetadata(uri=gcs_uri)
file_metadata = cloud_speech.BatchRecognizeFileMetadata(uri=audio_uri)

request = cloud_speech.BatchRecognizeRequest(
recognizer=f"projects/{project_id}/locations/global/recognizers/_",
recognizer=f"projects/{PROJECT_ID}/locations/global/recognizers/_",
config=config,
files=[file_metadata],
recognition_output_config=cloud_speech.RecognitionOutputConfig(
Expand All @@ -60,20 +59,15 @@ def transcribe_batch_dynamic_batching_v2(
print("Waiting for operation to complete...")
response = operation.result(timeout=120)

for result in response.results[gcs_uri].transcript.results:
for result in response.results[audio_uri].transcript.results:
print(f"Transcript: {result.alternatives[0].transcript}")

return response.results[gcs_uri].transcript
return response.results[audio_uri].transcript


# [END speech_transcribe_batch_dynamic_batching_v2]


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument("project_id", help="GCP Project ID")
parser.add_argument("gcs_uri", help="URI to GCS file")
args = parser.parse_args()
transcribe_batch_dynamic_batching_v2(args.project_id, args.gcs_uri)
audio_uri = "gs://cloud-samples-data/speech/audio.flac"
transcribe_batch_dynamic_batching_v2(audio_uri)
5 changes: 1 addition & 4 deletions speech/snippets/transcribe_batch_dynamic_batching_v2_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import re

from flaky import flaky
Expand All @@ -29,11 +28,9 @@
def test_transcribe_batch_dynamic_batching_v2(
capsys: pytest.CaptureFixture,
) -> None:
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")

response = (
transcribe_batch_dynamic_batching_v2.transcribe_batch_dynamic_batching_v2(
project_id, _TEST_AUDIO_FILE_PATH
_TEST_AUDIO_FILE_PATH
)
)

Expand Down
45 changes: 18 additions & 27 deletions speech/snippets/transcribe_batch_gcs_input_gcs_output_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,31 @@
# See the License for the specific language governing permissions and
# limitations under the License.


import argparse

# [START speech_transcribe_batch_gcs_input_gcs_output_v2]
import os

import re

from google.cloud import storage
from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech

PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")


def transcribe_batch_gcs_input_gcs_output_v2(
project_id: str,
gcs_uri: str,
audio_uri: str,
gcs_output_path: str,
) -> cloud_speech.BatchRecognizeResults:
"""Transcribes audio from a Google Cloud Storage URI.
"""Transcribes audio from a Google Cloud Storage URI using the Google Cloud Speech-to-Text API.
The transcription results are stored in another Google Cloud Storage bucket.
Args:
project_id: The Google Cloud project ID.
gcs_uri: The Google Cloud Storage URI.
gcs_output_path: The Cloud Storage URI to which to write the transcript.
audio_uri (str): The Google Cloud Storage URI of the input audio file.
E.g., gs://[BUCKET]/[FILE]
gcs_output_path (str): The Google Cloud Storage bucket URI where the output transcript will be stored.
E.g., gs://[BUCKET]
Returns:
The BatchRecognizeResults message.
cloud_speech.BatchRecognizeResults: The response containing the URI of the transcription results.
"""
# Instantiates a client
client = SpeechClient()
Expand All @@ -47,10 +47,10 @@ def transcribe_batch_gcs_input_gcs_output_v2(
model="long",
)

file_metadata = cloud_speech.BatchRecognizeFileMetadata(uri=gcs_uri)
file_metadata = cloud_speech.BatchRecognizeFileMetadata(uri=audio_uri)

request = cloud_speech.BatchRecognizeRequest(
recognizer=f"projects/{project_id}/locations/global/recognizers/_",
recognizer=f"projects/{PROJECT_ID}/locations/global/recognizers/_",
config=config,
files=[file_metadata],
recognition_output_config=cloud_speech.RecognitionOutputConfig(
Expand All @@ -66,7 +66,7 @@ def transcribe_batch_gcs_input_gcs_output_v2(
print("Waiting for operation to complete...")
response = operation.result(timeout=120)

file_results = response.results[gcs_uri]
file_results = response.results[audio_uri]

print(f"Operation finished. Fetching results from {file_results.uri}...")
output_bucket, output_object = re.match(
Expand Down Expand Up @@ -94,15 +94,6 @@ def transcribe_batch_gcs_input_gcs_output_v2(


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument("project_id", help="GCP Project ID")
parser.add_argument("gcs_uri", help="URI to GCS file")
parser.add_argument(
"gcs_output_path", help="GCS URI to which to write the transcript"
)
args = parser.parse_args()
transcribe_batch_gcs_input_gcs_output_v2(
args.project_id, args.gcs_uri, args.gcs_output_path
)
audio_uri = "gs://cloud-samples-data/speech/audio.flac"
output_bucket_name = "gs://your-bucket-unique-name"
transcribe_batch_gcs_input_gcs_output_v2(audio_uri, output_bucket_name)
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import re
from uuid import uuid4

Expand Down Expand Up @@ -43,10 +42,8 @@ def test_transcribe_batch_gcs_input_gcs_output_v2(
gcs_bucket: pytest.CaptureFixture,
capsys: pytest.CaptureFixture,
) -> None:
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")

response = transcribe_batch_gcs_input_gcs_output_v2.transcribe_batch_gcs_input_gcs_output_v2(
project_id, _TEST_AUDIO_FILE_PATH, f"gs://{gcs_bucket}"
_TEST_AUDIO_FILE_PATH, f"gs://{gcs_bucket}"
)

assert re.search(
Expand Down
38 changes: 16 additions & 22 deletions speech/snippets/transcribe_batch_gcs_input_inline_output_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.


import argparse

# [START speech_transcribe_batch_gcs_input_inline_output_v2]
import os

from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech

PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")


def transcribe_batch_gcs_input_inline_output_v2(
project_id: str,
gcs_uri: str,
audio_uri: str,
) -> cloud_speech.BatchRecognizeResults:
"""Transcribes audio from a Google Cloud Storage URI.
"""Transcribes audio from a Google Cloud Storage URI using the Google Cloud Speech-to-Text API.
The transcription results are returned inline in the response.
Args:
project_id: The Google Cloud project ID.
gcs_uri: The Google Cloud Storage URI.
audio_uri (str): The Google Cloud Storage URI of the input audio file.
E.g., gs://[BUCKET]/[FILE]
Returns:
The RecognizeResponse.
cloud_speech.BatchRecognizeResults: The response containing the transcription results.
"""
# Instantiates a client
client = SpeechClient()
Expand All @@ -42,10 +41,10 @@ def transcribe_batch_gcs_input_inline_output_v2(
model="long",
)

file_metadata = cloud_speech.BatchRecognizeFileMetadata(uri=gcs_uri)
file_metadata = cloud_speech.BatchRecognizeFileMetadata(uri=audio_uri)

request = cloud_speech.BatchRecognizeRequest(
recognizer=f"projects/{project_id}/locations/global/recognizers/_",
recognizer=f"projects/{PROJECT_ID}/locations/global/recognizers/_",
config=config,
files=[file_metadata],
recognition_output_config=cloud_speech.RecognitionOutputConfig(
Expand All @@ -59,20 +58,15 @@ def transcribe_batch_gcs_input_inline_output_v2(
print("Waiting for operation to complete...")
response = operation.result(timeout=120)

for result in response.results[gcs_uri].transcript.results:
for result in response.results[audio_uri].transcript.results:
print(f"Transcript: {result.alternatives[0].transcript}")

return response.results[gcs_uri].transcript
return response.results[audio_uri].transcript


# [END speech_transcribe_batch_gcs_input_inline_output_v2]


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument("project_id", help="GCP Project ID")
parser.add_argument("gcs_uri", help="URI to GCS file")
args = parser.parse_args()
transcribe_batch_gcs_input_inline_output_v2(args.project_id, args.gcs_uri)
audio_uri = "gs://cloud-samples-data/speech/audio.flac"
transcribe_batch_gcs_input_inline_output_v2(audio_uri)
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import re

from flaky import flaky
Expand All @@ -21,18 +20,15 @@

import transcribe_batch_gcs_input_inline_output_v2


_TEST_AUDIO_FILE_PATH = "gs://cloud-samples-data/speech/audio.flac"


@flaky(max_runs=10, min_passes=1)
def test_transcribe_batch_gcs_input_inline_output_v2(
capsys: pytest.CaptureFixture,
) -> None:
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")

response = transcribe_batch_gcs_input_inline_output_v2.transcribe_batch_gcs_input_inline_output_v2(
project_id, _TEST_AUDIO_FILE_PATH
_TEST_AUDIO_FILE_PATH
)

assert re.search(
Expand Down
46 changes: 19 additions & 27 deletions speech/snippets/transcribe_batch_multiple_files_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,32 +12,31 @@
# See the License for the specific language governing permissions and
# limitations under the License.


import argparse

# [START speech_transcribe_batch_multiple_files_v2]
import os
import re
from typing import List

from google.cloud import storage
from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech

PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")


def transcribe_batch_multiple_files_v2(
project_id: str,
gcs_uris: List[str],
audio_uris: List[str],
gcs_output_path: str,
) -> cloud_speech.BatchRecognizeResponse:
"""Transcribes audio from a Google Cloud Storage URI.
"""Transcribes audio from multiple Google Cloud Storage URIs using the Google Cloud Speech-to-Text API.
The transcription results are stored in another Google Cloud Storage bucket.
Args:
project_id: The Google Cloud project ID.
gcs_uris: The Google Cloud Storage URIs to transcribe.
gcs_output_path: The Cloud Storage URI to which to write the transcript.
audio_uris (List[str]): The list of Google Cloud Storage URIs of the input audio files.
E.g., ["gs://[BUCKET]/[FILE]", "gs://[BUCKET]/[FILE]"]
gcs_output_path (str): The Google Cloud Storage bucket URI where the output transcript will be stored.
E.g., gs://[BUCKET]
Returns:
The BatchRecognizeResponse message.
cloud_speech.BatchRecognizeResponse: The response containing the URIs of the transcription results.
"""
# Instantiates a client
client = SpeechClient()
Expand All @@ -48,10 +47,10 @@ def transcribe_batch_multiple_files_v2(
model="long",
)

files = [cloud_speech.BatchRecognizeFileMetadata(uri=uri) for uri in gcs_uris]
files = [cloud_speech.BatchRecognizeFileMetadata(uri=uri) for uri in audio_uris]

request = cloud_speech.BatchRecognizeRequest(
recognizer=f"projects/{project_id}/locations/global/recognizers/_",
recognizer=f"projects/{PROJECT_ID}/locations/global/recognizers/_",
config=config,
files=files,
recognition_output_config=cloud_speech.RecognitionOutputConfig(
Expand All @@ -68,7 +67,7 @@ def transcribe_batch_multiple_files_v2(
response = operation.result(timeout=120)

print("Operation finished. Fetching results from:")
for uri in gcs_uris:
for uri in audio_uris:
file_results = response.results[uri]
print(f" {file_results.uri}...")
output_bucket, output_object = re.match(
Expand Down Expand Up @@ -96,15 +95,8 @@ def transcribe_batch_multiple_files_v2(


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument("project_id", help="GCP Project ID")
parser.add_argument("gcs_uri", nargs="+", help="URI to GCS file")
parser.add_argument(
"gcs_output_path", help="GCS URI to which to write the transcript"
)
args = parser.parse_args()
transcribe_batch_multiple_files_v2(
args.project_id, args.gcs_uri, args.gcs_output_path
)
audio1 = "gs://cloud-samples-data/speech/audio.flac"
audio2 = "gs://cloud-samples-data/speech/corbeau_renard.flac"
uris_list = [audio1, audio2]
output_bucket_name = "gs://your-bucket-name"
transcribe_batch_multiple_files_v2(uris_list, output_bucket_name)
Loading

0 comments on commit 8db931d

Please sign in to comment.