Simplified 5 speech samples (set-1) (#12110)

* Simplified 5 samples
GoogleCloudPlatform · Aug 8, 2024 · 8db931d · 8db931d
1 parent ec870b1
commit 8db931d
Show file tree

Hide file tree

Showing 10 changed files with 86 additions and 133 deletions.
diff --git a/speech/snippets/transcribe_batch_dynamic_batching_v2.py b/speech/snippets/transcribe_batch_dynamic_batching_v2.py
@@ -13,25 +13,24 @@
 # limitations under the License.
 
 
-import argparse
-
 # [START speech_transcribe_batch_dynamic_batching_v2]
+import os
+
 from google.cloud.speech_v2 import SpeechClient
 from google.cloud.speech_v2.types import cloud_speech
 
+PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
+
 
 def transcribe_batch_dynamic_batching_v2(
-    project_id: str,
-    gcs_uri: str,
+    audio_uri: str,
 ) -> cloud_speech.BatchRecognizeResults:
-    """Transcribes audio from a Google Cloud Storage URI.
-
+    """Transcribes audio from a Google Cloud Storage URI using dynamic batching.
     Args:
-        project_id: The Google Cloud project ID.
-        gcs_uri: The Google Cloud Storage URI.
-
+        audio_uri (str): The Cloud Storage URI of the input audio.
+        E.g., gs://[BUCKET]/[FILE]
     Returns:
-        The RecognizeResponse.
+        cloud_speech.BatchRecognizeResults: The response containing the transcription results.
     """
     # Instantiates a client
     client = SpeechClient()
@@ -42,10 +41,10 @@ def transcribe_batch_dynamic_batching_v2(
         model="long",
     )
 
-    file_metadata = cloud_speech.BatchRecognizeFileMetadata(uri=gcs_uri)
+    file_metadata = cloud_speech.BatchRecognizeFileMetadata(uri=audio_uri)
 
     request = cloud_speech.BatchRecognizeRequest(
-        recognizer=f"projects/{project_id}/locations/global/recognizers/_",
+        recognizer=f"projects/{PROJECT_ID}/locations/global/recognizers/_",
         config=config,
         files=[file_metadata],
         recognition_output_config=cloud_speech.RecognitionOutputConfig(
@@ -60,20 +59,15 @@ def transcribe_batch_dynamic_batching_v2(
     print("Waiting for operation to complete...")
     response = operation.result(timeout=120)
 
-    for result in response.results[gcs_uri].transcript.results:
+    for result in response.results[audio_uri].transcript.results:
         print(f"Transcript: {result.alternatives[0].transcript}")
 
-    return response.results[gcs_uri].transcript
+    return response.results[audio_uri].transcript
 
 
 # [END speech_transcribe_batch_dynamic_batching_v2]
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
-    )
-    parser.add_argument("project_id", help="GCP Project ID")
-    parser.add_argument("gcs_uri", help="URI to GCS file")
-    args = parser.parse_args()
-    transcribe_batch_dynamic_batching_v2(args.project_id, args.gcs_uri)
+    audio_uri = "gs://cloud-samples-data/speech/audio.flac"
+    transcribe_batch_dynamic_batching_v2(audio_uri)
diff --git a/speech/snippets/transcribe_batch_dynamic_batching_v2_test.py b/speech/snippets/transcribe_batch_dynamic_batching_v2_test.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import re
 
 from flaky import flaky
@@ -29,11 +28,9 @@
 def test_transcribe_batch_dynamic_batching_v2(
     capsys: pytest.CaptureFixture,
 ) -> None:
-    project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
-
     response = (
         transcribe_batch_dynamic_batching_v2.transcribe_batch_dynamic_batching_v2(
-            project_id, _TEST_AUDIO_FILE_PATH
+            _TEST_AUDIO_FILE_PATH
         )
     )
 

diff --git a/speech/snippets/transcribe_batch_gcs_input_gcs_output_v2.py b/speech/snippets/transcribe_batch_gcs_input_gcs_output_v2.py
@@ -12,31 +12,31 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-import argparse
-
 # [START speech_transcribe_batch_gcs_input_gcs_output_v2]
+import os
+
 import re
 
 from google.cloud import storage
 from google.cloud.speech_v2 import SpeechClient
 from google.cloud.speech_v2.types import cloud_speech
 
+PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
+
 
 def transcribe_batch_gcs_input_gcs_output_v2(
-    project_id: str,
-    gcs_uri: str,
+    audio_uri: str,
     gcs_output_path: str,
 ) -> cloud_speech.BatchRecognizeResults:
-    """Transcribes audio from a Google Cloud Storage URI.
-
+    """Transcribes audio from a Google Cloud Storage URI using the Google Cloud Speech-to-Text API.
+    The transcription results are stored in another Google Cloud Storage bucket.
     Args:
-        project_id: The Google Cloud project ID.
-        gcs_uri: The Google Cloud Storage URI.
-        gcs_output_path: The Cloud Storage URI to which to write the transcript.
-
+        audio_uri (str): The Google Cloud Storage URI of the input audio file.
+            E.g., gs://[BUCKET]/[FILE]
+        gcs_output_path (str): The Google Cloud Storage bucket URI where the output transcript will be stored.
+            E.g., gs://[BUCKET]
     Returns:
-        The BatchRecognizeResults message.
+        cloud_speech.BatchRecognizeResults: The response containing the URI of the transcription results.
     """
     # Instantiates a client
     client = SpeechClient()
@@ -47,10 +47,10 @@ def transcribe_batch_gcs_input_gcs_output_v2(
         model="long",
     )
 
-    file_metadata = cloud_speech.BatchRecognizeFileMetadata(uri=gcs_uri)
+    file_metadata = cloud_speech.BatchRecognizeFileMetadata(uri=audio_uri)
 
     request = cloud_speech.BatchRecognizeRequest(
-        recognizer=f"projects/{project_id}/locations/global/recognizers/_",
+        recognizer=f"projects/{PROJECT_ID}/locations/global/recognizers/_",
         config=config,
         files=[file_metadata],
         recognition_output_config=cloud_speech.RecognitionOutputConfig(
@@ -66,7 +66,7 @@ def transcribe_batch_gcs_input_gcs_output_v2(
     print("Waiting for operation to complete...")
     response = operation.result(timeout=120)
 
-    file_results = response.results[gcs_uri]
+    file_results = response.results[audio_uri]
 
     print(f"Operation finished. Fetching results from {file_results.uri}...")
     output_bucket, output_object = re.match(
@@ -94,15 +94,6 @@ def transcribe_batch_gcs_input_gcs_output_v2(
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
-    )
-    parser.add_argument("project_id", help="GCP Project ID")
-    parser.add_argument("gcs_uri", help="URI to GCS file")
-    parser.add_argument(
-        "gcs_output_path", help="GCS URI to which to write the transcript"
-    )
-    args = parser.parse_args()
-    transcribe_batch_gcs_input_gcs_output_v2(
-        args.project_id, args.gcs_uri, args.gcs_output_path
-    )
+    audio_uri = "gs://cloud-samples-data/speech/audio.flac"
+    output_bucket_name = "gs://your-bucket-unique-name"
+    transcribe_batch_gcs_input_gcs_output_v2(audio_uri, output_bucket_name)
diff --git a/speech/snippets/transcribe_batch_gcs_input_gcs_output_v2_test.py b/speech/snippets/transcribe_batch_gcs_input_gcs_output_v2_test.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import re
 from uuid import uuid4
 
@@ -43,10 +42,8 @@ def test_transcribe_batch_gcs_input_gcs_output_v2(
     gcs_bucket: pytest.CaptureFixture,
     capsys: pytest.CaptureFixture,
 ) -> None:
-    project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
-
     response = transcribe_batch_gcs_input_gcs_output_v2.transcribe_batch_gcs_input_gcs_output_v2(
-        project_id, _TEST_AUDIO_FILE_PATH, f"gs://{gcs_bucket}"
+        _TEST_AUDIO_FILE_PATH, f"gs://{gcs_bucket}"
     )
 
     assert re.search(

diff --git a/speech/snippets/transcribe_batch_gcs_input_inline_output_v2.py b/speech/snippets/transcribe_batch_gcs_input_inline_output_v2.py
@@ -12,26 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-import argparse
-
 # [START speech_transcribe_batch_gcs_input_inline_output_v2]
+import os
+
 from google.cloud.speech_v2 import SpeechClient
 from google.cloud.speech_v2.types import cloud_speech
 
+PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
+
 
 def transcribe_batch_gcs_input_inline_output_v2(
-    project_id: str,
-    gcs_uri: str,
+    audio_uri: str,
 ) -> cloud_speech.BatchRecognizeResults:
-    """Transcribes audio from a Google Cloud Storage URI.
-
+    """Transcribes audio from a Google Cloud Storage URI using the Google Cloud Speech-to-Text API.
+        The transcription results are returned inline in the response.
     Args:
-        project_id: The Google Cloud project ID.
-        gcs_uri: The Google Cloud Storage URI.
-
+        audio_uri (str): The Google Cloud Storage URI of the input audio file.
+            E.g., gs://[BUCKET]/[FILE]
     Returns:
-        The RecognizeResponse.
+        cloud_speech.BatchRecognizeResults: The response containing the transcription results.
     """
     # Instantiates a client
     client = SpeechClient()
@@ -42,10 +41,10 @@ def transcribe_batch_gcs_input_inline_output_v2(
         model="long",
     )
 
-    file_metadata = cloud_speech.BatchRecognizeFileMetadata(uri=gcs_uri)
+    file_metadata = cloud_speech.BatchRecognizeFileMetadata(uri=audio_uri)
 
     request = cloud_speech.BatchRecognizeRequest(
-        recognizer=f"projects/{project_id}/locations/global/recognizers/_",
+        recognizer=f"projects/{PROJECT_ID}/locations/global/recognizers/_",
         config=config,
         files=[file_metadata],
         recognition_output_config=cloud_speech.RecognitionOutputConfig(
@@ -59,20 +58,15 @@ def transcribe_batch_gcs_input_inline_output_v2(
     print("Waiting for operation to complete...")
     response = operation.result(timeout=120)
 
-    for result in response.results[gcs_uri].transcript.results:
+    for result in response.results[audio_uri].transcript.results:
         print(f"Transcript: {result.alternatives[0].transcript}")
 
-    return response.results[gcs_uri].transcript
+    return response.results[audio_uri].transcript
 
 
 # [END speech_transcribe_batch_gcs_input_inline_output_v2]
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
-    )
-    parser.add_argument("project_id", help="GCP Project ID")
-    parser.add_argument("gcs_uri", help="URI to GCS file")
-    args = parser.parse_args()
-    transcribe_batch_gcs_input_inline_output_v2(args.project_id, args.gcs_uri)
+    audio_uri = "gs://cloud-samples-data/speech/audio.flac"
+    transcribe_batch_gcs_input_inline_output_v2(audio_uri)
diff --git a/speech/snippets/transcribe_batch_gcs_input_inline_output_v2_test.py b/speech/snippets/transcribe_batch_gcs_input_inline_output_v2_test.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 import re
 
 from flaky import flaky
@@ -21,18 +20,15 @@
 
 import transcribe_batch_gcs_input_inline_output_v2
 
-
 _TEST_AUDIO_FILE_PATH = "gs://cloud-samples-data/speech/audio.flac"
 
 
 @flaky(max_runs=10, min_passes=1)
 def test_transcribe_batch_gcs_input_inline_output_v2(
     capsys: pytest.CaptureFixture,
 ) -> None:
-    project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
-
     response = transcribe_batch_gcs_input_inline_output_v2.transcribe_batch_gcs_input_inline_output_v2(
-        project_id, _TEST_AUDIO_FILE_PATH
+        _TEST_AUDIO_FILE_PATH
     )
 
     assert re.search(

diff --git a/speech/snippets/transcribe_batch_multiple_files_v2.py b/speech/snippets/transcribe_batch_multiple_files_v2.py
@@ -12,32 +12,31 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-import argparse
-
 # [START speech_transcribe_batch_multiple_files_v2]
+import os
 import re
 from typing import List
 
 from google.cloud import storage
 from google.cloud.speech_v2 import SpeechClient
 from google.cloud.speech_v2.types import cloud_speech
 
+PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
+
 
 def transcribe_batch_multiple_files_v2(
-    project_id: str,
-    gcs_uris: List[str],
+    audio_uris: List[str],
     gcs_output_path: str,
 ) -> cloud_speech.BatchRecognizeResponse:
-    """Transcribes audio from a Google Cloud Storage URI.
-
+    """Transcribes audio from multiple Google Cloud Storage URIs using the Google Cloud Speech-to-Text API.
+    The transcription results are stored in another Google Cloud Storage bucket.
     Args:
-        project_id: The Google Cloud project ID.
-        gcs_uris: The Google Cloud Storage URIs to transcribe.
-        gcs_output_path: The Cloud Storage URI to which to write the transcript.
-
+        audio_uris (List[str]): The list of Google Cloud Storage URIs of the input audio files.
+            E.g., ["gs://[BUCKET]/[FILE]", "gs://[BUCKET]/[FILE]"]
+        gcs_output_path (str): The Google Cloud Storage bucket URI where the output transcript will be stored.
+            E.g., gs://[BUCKET]
     Returns:
-        The BatchRecognizeResponse message.
+        cloud_speech.BatchRecognizeResponse: The response containing the URIs of the transcription results.
     """
     # Instantiates a client
     client = SpeechClient()
@@ -48,10 +47,10 @@ def transcribe_batch_multiple_files_v2(
         model="long",
     )
 
-    files = [cloud_speech.BatchRecognizeFileMetadata(uri=uri) for uri in gcs_uris]
+    files = [cloud_speech.BatchRecognizeFileMetadata(uri=uri) for uri in audio_uris]
 
     request = cloud_speech.BatchRecognizeRequest(
-        recognizer=f"projects/{project_id}/locations/global/recognizers/_",
+        recognizer=f"projects/{PROJECT_ID}/locations/global/recognizers/_",
         config=config,
         files=files,
         recognition_output_config=cloud_speech.RecognitionOutputConfig(
@@ -68,7 +67,7 @@ def transcribe_batch_multiple_files_v2(
     response = operation.result(timeout=120)
 
     print("Operation finished. Fetching results from:")
-    for uri in gcs_uris:
+    for uri in audio_uris:
         file_results = response.results[uri]
         print(f"  {file_results.uri}...")
         output_bucket, output_object = re.match(
@@ -96,15 +95,8 @@ def transcribe_batch_multiple_files_v2(
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
-    )
-    parser.add_argument("project_id", help="GCP Project ID")
-    parser.add_argument("gcs_uri", nargs="+", help="URI to GCS file")
-    parser.add_argument(
-        "gcs_output_path", help="GCS URI to which to write the transcript"
-    )
-    args = parser.parse_args()
-    transcribe_batch_multiple_files_v2(
-        args.project_id, args.gcs_uri, args.gcs_output_path
-    )
+    audio1 = "gs://cloud-samples-data/speech/audio.flac"
+    audio2 = "gs://cloud-samples-data/speech/corbeau_renard.flac"
+    uris_list = [audio1, audio2]
+    output_bucket_name = "gs://your-bucket-name"
+    transcribe_batch_multiple_files_v2(uris_list, output_bucket_name)