samples: adds speech export to gcs sample (#176)

googleapis · Jun 15, 2021 · 3dd6e6d · 3dd6e6d
1 parent ee17e3a
commit 3dd6e6d
Show file tree

Hide file tree

Showing 6 changed files with 166 additions and 38 deletions.
diff --git a/google-cloud-speech/samples/microphone/noxfile.py b/google-cloud-speech/samples/microphone/noxfile.py
@@ -38,31 +38,29 @@
 
 TEST_CONFIG = {
     # You can opt out from the test for specific Python versions.
-    'ignored_versions': ["2.7"],
-
+    "ignored_versions": ["2.7"],
     # Old samples are opted out of enforcing Python type hints
     # All new samples should feature them
-    'enforce_type_hints': False,
-
+    "enforce_type_hints": False,
     # An envvar key for determining the project id to use. Change it
     # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a
     # build specific Cloud project. You can also use your own string
     # to use your own Cloud project.
-    'gcloud_project_env': 'GOOGLE_CLOUD_PROJECT',
+    "gcloud_project_env": "GOOGLE_CLOUD_PROJECT",
     # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT',
     # If you need to use a specific version of pip,
     # change pip_version_override to the string representation
     # of the version number, for example, "20.2.4"
     "pip_version_override": None,
     # A dictionary you want to inject into your test. Don't put any
     # secrets here. These values will override predefined values.
-    'envs': {},
+    "envs": {},
 }
 
 
 try:
     # Ensure we can import noxfile_config in the project's directory.
-    sys.path.append('.')
+    sys.path.append(".")
     from noxfile_config import TEST_CONFIG_OVERRIDE
 except ImportError as e:
     print("No user noxfile_config found: detail: {}".format(e))
@@ -77,12 +75,12 @@ def get_pytest_env_vars() -> Dict[str, str]:
     ret = {}
 
     # Override the GCLOUD_PROJECT and the alias.
-    env_key = TEST_CONFIG['gcloud_project_env']
+    env_key = TEST_CONFIG["gcloud_project_env"]
     # This should error out if not set.
-    ret['GOOGLE_CLOUD_PROJECT'] = os.environ[env_key]
+    ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key]
 
     # Apply user supplied envs.
-    ret.update(TEST_CONFIG['envs'])
+    ret.update(TEST_CONFIG["envs"])
     return ret
 
 
@@ -91,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]:
 ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"]
 
 # Any default versions that should be ignored.
-IGNORED_VERSIONS = TEST_CONFIG['ignored_versions']
+IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"]
 
 TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS])
 
@@ -140,7 +138,7 @@ def _determine_local_import_names(start_dir: str) -> List[str]:
 
 @nox.session
 def lint(session: nox.sessions.Session) -> None:
-    if not TEST_CONFIG['enforce_type_hints']:
+    if not TEST_CONFIG["enforce_type_hints"]:
         session.install("flake8", "flake8-import-order")
     else:
         session.install("flake8", "flake8-import-order", "flake8-annotations")
@@ -149,9 +147,11 @@ def lint(session: nox.sessions.Session) -> None:
     args = FLAKE8_COMMON_ARGS + [
         "--application-import-names",
         ",".join(local_names),
-        "."
+        ".",
     ]
     session.run("flake8", *args)
+
+
 #
 # Black
 #
@@ -164,6 +164,7 @@ def blacken(session: nox.sessions.Session) -> None:
 
     session.run("black", *python_files)
 
+
 #
 # Sample Tests
 #
@@ -212,9 +213,9 @@ def py(session: nox.sessions.Session) -> None:
     if session.python in TESTED_VERSIONS:
         _session_tests(session)
     else:
-        session.skip("SKIPPED: {} tests are disabled for this sample.".format(
-            session.python
-        ))
+        session.skip(
+            "SKIPPED: {} tests are disabled for this sample.".format(session.python)
+        )
 
 
 #

diff --git a/google-cloud-speech/samples/snippets/noxfile.py b/google-cloud-speech/samples/snippets/noxfile.py
@@ -38,31 +38,29 @@
 
 TEST_CONFIG = {
     # You can opt out from the test for specific Python versions.
-    'ignored_versions': ["2.7"],
-
+    "ignored_versions": ["2.7"],
     # Old samples are opted out of enforcing Python type hints
     # All new samples should feature them
-    'enforce_type_hints': False,
-
+    "enforce_type_hints": False,
     # An envvar key for determining the project id to use. Change it
     # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a
     # build specific Cloud project. You can also use your own string
     # to use your own Cloud project.
-    'gcloud_project_env': 'GOOGLE_CLOUD_PROJECT',
+    "gcloud_project_env": "GOOGLE_CLOUD_PROJECT",
     # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT',
     # If you need to use a specific version of pip,
     # change pip_version_override to the string representation
     # of the version number, for example, "20.2.4"
     "pip_version_override": None,
     # A dictionary you want to inject into your test. Don't put any
     # secrets here. These values will override predefined values.
-    'envs': {},
+    "envs": {},
 }
 
 
 try:
     # Ensure we can import noxfile_config in the project's directory.
-    sys.path.append('.')
+    sys.path.append(".")
     from noxfile_config import TEST_CONFIG_OVERRIDE
 except ImportError as e:
     print("No user noxfile_config found: detail: {}".format(e))
@@ -77,12 +75,12 @@ def get_pytest_env_vars() -> Dict[str, str]:
     ret = {}
 
     # Override the GCLOUD_PROJECT and the alias.
-    env_key = TEST_CONFIG['gcloud_project_env']
+    env_key = TEST_CONFIG["gcloud_project_env"]
     # This should error out if not set.
-    ret['GOOGLE_CLOUD_PROJECT'] = os.environ[env_key]
+    ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key]
 
     # Apply user supplied envs.
-    ret.update(TEST_CONFIG['envs'])
+    ret.update(TEST_CONFIG["envs"])
     return ret
 
 
@@ -91,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]:
 ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"]
 
 # Any default versions that should be ignored.
-IGNORED_VERSIONS = TEST_CONFIG['ignored_versions']
+IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"]
 
 TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS])
 
@@ -140,7 +138,7 @@ def _determine_local_import_names(start_dir: str) -> List[str]:
 
 @nox.session
 def lint(session: nox.sessions.Session) -> None:
-    if not TEST_CONFIG['enforce_type_hints']:
+    if not TEST_CONFIG["enforce_type_hints"]:
         session.install("flake8", "flake8-import-order")
     else:
         session.install("flake8", "flake8-import-order", "flake8-annotations")
@@ -149,9 +147,11 @@ def lint(session: nox.sessions.Session) -> None:
     args = FLAKE8_COMMON_ARGS + [
         "--application-import-names",
         ",".join(local_names),
-        "."
+        ".",
     ]
     session.run("flake8", *args)
+
+
 #
 # Black
 #
@@ -164,6 +164,7 @@ def blacken(session: nox.sessions.Session) -> None:
 
     session.run("black", *python_files)
 
+
 #
 # Sample Tests
 #
@@ -172,7 +173,9 @@ def blacken(session: nox.sessions.Session) -> None:
 PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"]
 
 
-def _session_tests(session: nox.sessions.Session, post_install: Callable = None) -> None:
+def _session_tests(
+    session: nox.sessions.Session, post_install: Callable = None
+) -> None:
     if TEST_CONFIG["pip_version_override"]:
         pip_version = TEST_CONFIG["pip_version_override"]
         session.install(f"pip=={pip_version}")
@@ -202,7 +205,7 @@ def _session_tests(session: nox.sessions.Session, post_install: Callable = None)
         # on travis where slow and flaky tests are excluded.
         # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html
         success_codes=[0, 5],
-        env=get_pytest_env_vars()
+        env=get_pytest_env_vars(),
     )
 
 
@@ -212,9 +215,9 @@ def py(session: nox.sessions.Session) -> None:
     if session.python in TESTED_VERSIONS:
         _session_tests(session)
     else:
-        session.skip("SKIPPED: {} tests are disabled for this sample.".format(
-            session.python
-        ))
+        session.skip(
+            "SKIPPED: {} tests are disabled for this sample.".format(session.python)
+        )
 
 
 #
@@ -223,7 +226,7 @@ def py(session: nox.sessions.Session) -> None:
 
 
 def _get_repo_root() -> Optional[str]:
-    """ Returns the root folder of the project. """
+    """Returns the root folder of the project."""
     # Get root of this repository. Assume we don't have directories nested deeper than 10 items.
     p = Path(os.getcwd())
     for i in range(10):

diff --git a/google-cloud-speech/samples/snippets/requirements.txt b/google-cloud-speech/samples/snippets/requirements.txt
@@ -1 +1,2 @@
 google-cloud-speech==2.4.0
+google-cloud-storage==1.38.0
diff --git a/google-cloud-speech/samples/snippets/speech_model_adaptation_beta.py b/google-cloud-speech/samples/snippets/speech_model_adaptation_beta.py
@@ -64,9 +64,7 @@ def transcribe_with_model_adaptation(
     # class and phrase set to send a transcription request with speech adaptation
 
     # Speech adaptation configuration
-    speech_adaptation = speech.SpeechAdaptation(
-        phrase_set_references=[phrase_set_name]
-    )
+    speech_adaptation = speech.SpeechAdaptation(phrase_set_references=[phrase_set_name])
 
     # speech configuration object
     config = speech.RecognitionConfig(

diff --git a/google-cloud-speech/samples/snippets/speech_to_storage_beta.py b/google-cloud-speech/samples/snippets/speech_to_storage_beta.py
@@ -0,0 +1,59 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# [START speech_transcribe_with_speech_to_storage_beta]
+
+from google.cloud import speech_v1p1beta1 as speech
+
+
+def export_transcript_to_storage_beta(
+    input_storage_uri, output_storage_uri, encoding, sample_rate_hertz, language_code
+):
+
+    # input_uri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
+    audio = speech.RecognitionAudio(uri=input_storage_uri)
+
+    # Pass in the URI of the Cloud Storage bucket to hold the transcription
+    output_config = speech.TranscriptOutputConfig(gcs_uri=output_storage_uri)
+
+    # Speech configuration object
+    config = speech.RecognitionConfig(
+        encoding=encoding,
+        sample_rate_hertz=sample_rate_hertz,
+        language_code=language_code,
+    )
+
+    # Compose the long-running request
+    request = speech.LongRunningRecognizeRequest(
+        audio=audio, config=config, output_config=output_config
+    )
+
+    # Create the speech client
+    speech_client = speech.SpeechClient()
+
+    operation = speech_client.long_running_recognize(request=request)
+
+    print("Waiting for operation to complete...")
+    response = operation.result(timeout=90)
+
+    # Each result is for a consecutive portion of the audio. Iterate through
+    # them to get the transcripts for the entire audio file.
+    for result in response.results:
+        # The first alternative is the most likely one for this portion.
+        print("Transcript: {}".format(result.alternatives[0].transcript))
+        print("Confidence: {}".format(result.alternatives[0].confidence))
+
+    # [END speech_transcribe_with_speech_to_storage_beta]
+    return response.results[0].alternatives[0].transcript
diff --git a/google-cloud-speech/samples/snippets/speech_to_storage_beta_test.py b/google-cloud-speech/samples/snippets/speech_to_storage_beta_test.py
@@ -0,0 +1,66 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import uuid
+
+from google.cloud import speech_v1p1beta1 as speech
+from google.cloud import storage
+import pytest
+
+import speech_to_storage_beta
+
+STORAGE_URI = "gs://cloud-samples-data/speech/brooklyn_bridge.raw"
+
+
+storage_client = storage.Client()
+
+BUCKET_UUID = str(uuid.uuid4())[:8]
+BUCKET_NAME = f"speech-{BUCKET_UUID}"
+BUCKET_PREFIX = "export-transcript-output-test"
+DELIMETER = None
+
+INPUT_STORAGE_URI = "gs://cloud-samples-data/speech/commercial_mono.wav"
+OUTPUT_STORAGE_URI = f"gs://{BUCKET_NAME}/{BUCKET_PREFIX}"
+encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
+sample_rate_hertz = 8000
+language_code = "en-US"
+
+
+def test_export_transcript_to_storage_beta(bucket, capsys):
+    transcript = speech_to_storage_beta.export_transcript_to_storage_beta(
+        INPUT_STORAGE_URI,
+        OUTPUT_STORAGE_URI,
+        encoding,
+        sample_rate_hertz,
+        language_code,
+    )
+    assert "I'm here" in transcript
+
+
+@pytest.fixture
+def bucket():
+    """Yields a bucket that is deleted after the test completes."""
+    bucket = None
+    while bucket is None or bucket.exists():
+        bucket = storage_client.bucket(BUCKET_NAME)
+    bucket.storage_class = "COLDLINE"
+    storage_client.create_bucket(bucket, location="us")
+    yield bucket
+
+    blobs = storage_client.list_blobs(BUCKET_NAME, prefix=BUCKET_PREFIX)
+
+    for blob in blobs:
+        blob.delete()
+
+    bucket.delete(force=True)