Skip to content

Commit

Permalink
samples: adds speech export to gcs sample (#176)
Browse files Browse the repository at this point in the history
  • Loading branch information
b-loved-dreamer authored Jun 15, 2021
1 parent ee17e3a commit 3dd6e6d
Show file tree
Hide file tree
Showing 6 changed files with 166 additions and 38 deletions.
33 changes: 17 additions & 16 deletions google-cloud-speech/samples/microphone/noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,31 +38,29 @@

TEST_CONFIG = {
# You can opt out from the test for specific Python versions.
'ignored_versions': ["2.7"],

"ignored_versions": ["2.7"],
# Old samples are opted out of enforcing Python type hints
# All new samples should feature them
'enforce_type_hints': False,

"enforce_type_hints": False,
# An envvar key for determining the project id to use. Change it
# to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a
# build specific Cloud project. You can also use your own string
# to use your own Cloud project.
'gcloud_project_env': 'GOOGLE_CLOUD_PROJECT',
"gcloud_project_env": "GOOGLE_CLOUD_PROJECT",
# 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT',
# If you need to use a specific version of pip,
# change pip_version_override to the string representation
# of the version number, for example, "20.2.4"
"pip_version_override": None,
# A dictionary you want to inject into your test. Don't put any
# secrets here. These values will override predefined values.
'envs': {},
"envs": {},
}


try:
# Ensure we can import noxfile_config in the project's directory.
sys.path.append('.')
sys.path.append(".")
from noxfile_config import TEST_CONFIG_OVERRIDE
except ImportError as e:
print("No user noxfile_config found: detail: {}".format(e))
Expand All @@ -77,12 +75,12 @@ def get_pytest_env_vars() -> Dict[str, str]:
ret = {}

# Override the GCLOUD_PROJECT and the alias.
env_key = TEST_CONFIG['gcloud_project_env']
env_key = TEST_CONFIG["gcloud_project_env"]
# This should error out if not set.
ret['GOOGLE_CLOUD_PROJECT'] = os.environ[env_key]
ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key]

# Apply user supplied envs.
ret.update(TEST_CONFIG['envs'])
ret.update(TEST_CONFIG["envs"])
return ret


Expand All @@ -91,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]:
ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"]

# Any default versions that should be ignored.
IGNORED_VERSIONS = TEST_CONFIG['ignored_versions']
IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"]

TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS])

Expand Down Expand Up @@ -140,7 +138,7 @@ def _determine_local_import_names(start_dir: str) -> List[str]:

@nox.session
def lint(session: nox.sessions.Session) -> None:
if not TEST_CONFIG['enforce_type_hints']:
if not TEST_CONFIG["enforce_type_hints"]:
session.install("flake8", "flake8-import-order")
else:
session.install("flake8", "flake8-import-order", "flake8-annotations")
Expand All @@ -149,9 +147,11 @@ def lint(session: nox.sessions.Session) -> None:
args = FLAKE8_COMMON_ARGS + [
"--application-import-names",
",".join(local_names),
"."
".",
]
session.run("flake8", *args)


#
# Black
#
Expand All @@ -164,6 +164,7 @@ def blacken(session: nox.sessions.Session) -> None:

session.run("black", *python_files)


#
# Sample Tests
#
Expand Down Expand Up @@ -212,9 +213,9 @@ def py(session: nox.sessions.Session) -> None:
if session.python in TESTED_VERSIONS:
_session_tests(session)
else:
session.skip("SKIPPED: {} tests are disabled for this sample.".format(
session.python
))
session.skip(
"SKIPPED: {} tests are disabled for this sample.".format(session.python)
)


#
Expand Down
41 changes: 22 additions & 19 deletions google-cloud-speech/samples/snippets/noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,31 +38,29 @@

TEST_CONFIG = {
# You can opt out from the test for specific Python versions.
'ignored_versions': ["2.7"],

"ignored_versions": ["2.7"],
# Old samples are opted out of enforcing Python type hints
# All new samples should feature them
'enforce_type_hints': False,

"enforce_type_hints": False,
# An envvar key for determining the project id to use. Change it
# to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a
# build specific Cloud project. You can also use your own string
# to use your own Cloud project.
'gcloud_project_env': 'GOOGLE_CLOUD_PROJECT',
"gcloud_project_env": "GOOGLE_CLOUD_PROJECT",
# 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT',
# If you need to use a specific version of pip,
# change pip_version_override to the string representation
# of the version number, for example, "20.2.4"
"pip_version_override": None,
# A dictionary you want to inject into your test. Don't put any
# secrets here. These values will override predefined values.
'envs': {},
"envs": {},
}


try:
# Ensure we can import noxfile_config in the project's directory.
sys.path.append('.')
sys.path.append(".")
from noxfile_config import TEST_CONFIG_OVERRIDE
except ImportError as e:
print("No user noxfile_config found: detail: {}".format(e))
Expand All @@ -77,12 +75,12 @@ def get_pytest_env_vars() -> Dict[str, str]:
ret = {}

# Override the GCLOUD_PROJECT and the alias.
env_key = TEST_CONFIG['gcloud_project_env']
env_key = TEST_CONFIG["gcloud_project_env"]
# This should error out if not set.
ret['GOOGLE_CLOUD_PROJECT'] = os.environ[env_key]
ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key]

# Apply user supplied envs.
ret.update(TEST_CONFIG['envs'])
ret.update(TEST_CONFIG["envs"])
return ret


Expand All @@ -91,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]:
ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"]

# Any default versions that should be ignored.
IGNORED_VERSIONS = TEST_CONFIG['ignored_versions']
IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"]

TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS])

Expand Down Expand Up @@ -140,7 +138,7 @@ def _determine_local_import_names(start_dir: str) -> List[str]:

@nox.session
def lint(session: nox.sessions.Session) -> None:
if not TEST_CONFIG['enforce_type_hints']:
if not TEST_CONFIG["enforce_type_hints"]:
session.install("flake8", "flake8-import-order")
else:
session.install("flake8", "flake8-import-order", "flake8-annotations")
Expand All @@ -149,9 +147,11 @@ def lint(session: nox.sessions.Session) -> None:
args = FLAKE8_COMMON_ARGS + [
"--application-import-names",
",".join(local_names),
"."
".",
]
session.run("flake8", *args)


#
# Black
#
Expand All @@ -164,6 +164,7 @@ def blacken(session: nox.sessions.Session) -> None:

session.run("black", *python_files)


#
# Sample Tests
#
Expand All @@ -172,7 +173,9 @@ def blacken(session: nox.sessions.Session) -> None:
PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"]


def _session_tests(session: nox.sessions.Session, post_install: Callable = None) -> None:
def _session_tests(
session: nox.sessions.Session, post_install: Callable = None
) -> None:
if TEST_CONFIG["pip_version_override"]:
pip_version = TEST_CONFIG["pip_version_override"]
session.install(f"pip=={pip_version}")
Expand Down Expand Up @@ -202,7 +205,7 @@ def _session_tests(session: nox.sessions.Session, post_install: Callable = None)
# on travis where slow and flaky tests are excluded.
# See http://doc.pytest.org/en/latest/_modules/_pytest/main.html
success_codes=[0, 5],
env=get_pytest_env_vars()
env=get_pytest_env_vars(),
)


Expand All @@ -212,9 +215,9 @@ def py(session: nox.sessions.Session) -> None:
if session.python in TESTED_VERSIONS:
_session_tests(session)
else:
session.skip("SKIPPED: {} tests are disabled for this sample.".format(
session.python
))
session.skip(
"SKIPPED: {} tests are disabled for this sample.".format(session.python)
)


#
Expand All @@ -223,7 +226,7 @@ def py(session: nox.sessions.Session) -> None:


def _get_repo_root() -> Optional[str]:
""" Returns the root folder of the project. """
"""Returns the root folder of the project."""
# Get root of this repository. Assume we don't have directories nested deeper than 10 items.
p = Path(os.getcwd())
for i in range(10):
Expand Down
1 change: 1 addition & 0 deletions google-cloud-speech/samples/snippets/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
google-cloud-speech==2.4.0
google-cloud-storage==1.38.0
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,7 @@ def transcribe_with_model_adaptation(
# class and phrase set to send a transcription request with speech adaptation

# Speech adaptation configuration
speech_adaptation = speech.SpeechAdaptation(
phrase_set_references=[phrase_set_name]
)
speech_adaptation = speech.SpeechAdaptation(phrase_set_references=[phrase_set_name])

# speech configuration object
config = speech.RecognitionConfig(
Expand Down
59 changes: 59 additions & 0 deletions google-cloud-speech/samples/snippets/speech_to_storage_beta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# [START speech_transcribe_with_speech_to_storage_beta]

from google.cloud import speech_v1p1beta1 as speech


def export_transcript_to_storage_beta(
input_storage_uri, output_storage_uri, encoding, sample_rate_hertz, language_code
):

# input_uri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
audio = speech.RecognitionAudio(uri=input_storage_uri)

# Pass in the URI of the Cloud Storage bucket to hold the transcription
output_config = speech.TranscriptOutputConfig(gcs_uri=output_storage_uri)

# Speech configuration object
config = speech.RecognitionConfig(
encoding=encoding,
sample_rate_hertz=sample_rate_hertz,
language_code=language_code,
)

# Compose the long-running request
request = speech.LongRunningRecognizeRequest(
audio=audio, config=config, output_config=output_config
)

# Create the speech client
speech_client = speech.SpeechClient()

operation = speech_client.long_running_recognize(request=request)

print("Waiting for operation to complete...")
response = operation.result(timeout=90)

# Each result is for a consecutive portion of the audio. Iterate through
# them to get the transcripts for the entire audio file.
for result in response.results:
# The first alternative is the most likely one for this portion.
print("Transcript: {}".format(result.alternatives[0].transcript))
print("Confidence: {}".format(result.alternatives[0].confidence))

# [END speech_transcribe_with_speech_to_storage_beta]
return response.results[0].alternatives[0].transcript
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import uuid

from google.cloud import speech_v1p1beta1 as speech
from google.cloud import storage
import pytest

import speech_to_storage_beta

STORAGE_URI = "gs://cloud-samples-data/speech/brooklyn_bridge.raw"


storage_client = storage.Client()

BUCKET_UUID = str(uuid.uuid4())[:8]
BUCKET_NAME = f"speech-{BUCKET_UUID}"
BUCKET_PREFIX = "export-transcript-output-test"
DELIMETER = None

INPUT_STORAGE_URI = "gs://cloud-samples-data/speech/commercial_mono.wav"
OUTPUT_STORAGE_URI = f"gs://{BUCKET_NAME}/{BUCKET_PREFIX}"
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
sample_rate_hertz = 8000
language_code = "en-US"


def test_export_transcript_to_storage_beta(bucket, capsys):
transcript = speech_to_storage_beta.export_transcript_to_storage_beta(
INPUT_STORAGE_URI,
OUTPUT_STORAGE_URI,
encoding,
sample_rate_hertz,
language_code,
)
assert "I'm here" in transcript


@pytest.fixture
def bucket():
"""Yields a bucket that is deleted after the test completes."""
bucket = None
while bucket is None or bucket.exists():
bucket = storage_client.bucket(BUCKET_NAME)
bucket.storage_class = "COLDLINE"
storage_client.create_bucket(bucket, location="us")
yield bucket

blobs = storage_client.list_blobs(BUCKET_NAME, prefix=BUCKET_PREFIX)

for blob in blobs:
blob.delete()

bucket.delete(force=True)

0 comments on commit 3dd6e6d

Please sign in to comment.