-
Notifications
You must be signed in to change notification settings - Fork 6.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Speech GCS #784
Speech GCS #784
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,9 @@ | |
"""Google Cloud Speech API sample application using the REST API for batch | ||
processing. | ||
|
||
Example usage: python transcribe.py resources/audio.raw | ||
Example usage: | ||
python transcribe.py resources/audio.raw | ||
python transcribe.py gs://cloud-samples-tests/speech/brooklyn.flac | ||
""" | ||
|
||
# [START import_libraries] | ||
|
@@ -26,12 +28,8 @@ | |
# [END import_libraries] | ||
|
||
|
||
def main(speech_file): | ||
"""Transcribe the given audio file. | ||
|
||
Args: | ||
speech_file: the name of the audio file. | ||
""" | ||
def transcribe_file(speech_file): | ||
"""Transcribe the given audio file.""" | ||
# [START authenticating] | ||
# Application default credentials provided by env variable | ||
# GOOGLE_APPLICATION_CREDENTIALS | ||
|
@@ -57,13 +55,48 @@ def main(speech_file): | |
# [END send_request] | ||
|
||
|
||
def transcribe_gcs(gcs_uri): | ||
"""Transcribes the audio file specified by the gcs_uri.""" | ||
# [START authenticating_gcs] | ||
# Application default credentials provided by env variable | ||
# GOOGLE_APPLICATION_CREDENTIALS | ||
from google.cloud import speech | ||
speech_client = speech.Client() | ||
# [END authenticating] | ||
|
||
# [START construct_request_gcs] | ||
audio_sample = speech_client.sample( | ||
None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. content as in there's either content or a gcs_uri |
||
source_uri=gcs_uri, | ||
encoding='FLAC', | ||
sample_rate=16000) | ||
# [END construct_request_gcs] | ||
|
||
# [START send_request_gcs] | ||
alternatives = speech_client.speech_api.sync_recognize(audio_sample) | ||
for alternative in alternatives: | ||
print('Transcript: {}'.format(alternative.transcript)) | ||
# [END send_request_gcs] | ||
|
||
|
||
def main(path): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We generally don't put main functions in our samples, just place this logic below or do separate subparsers for local path and gcs path. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, moved logic to right after the args are parsed |
||
"""Transcribe the given audio file. | ||
Args: | ||
path: the name of the audio file. | ||
""" | ||
if path.startswith('gs://'): | ||
transcribe_gcs(path) | ||
else: | ||
transcribe_file(path) | ||
|
||
|
||
# [START run_application] | ||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser( | ||
description=__doc__, | ||
formatter_class=argparse.RawDescriptionHelpFormatter) | ||
parser.add_argument( | ||
'speech_file', help='Full path of audio file to be recognized') | ||
'path', help='File or GCS path for audio file to be recognized') | ||
args = parser.parse_args() | ||
main(args.speech_file) | ||
main(args.path) | ||
# [END run_application] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,9 @@ | |
"""Google Cloud Speech API sample application using the REST API for async | ||
batch processing. | ||
|
||
Example usage: python transcribe_async.py resources/audio.raw | ||
Example usage: | ||
python transcribe_async.py resources/audio.raw | ||
python transcribe_async.py gs://cloud-samples-tests/speech/brooklyn.flac | ||
""" | ||
|
||
# [START import_libraries] | ||
|
@@ -27,12 +29,8 @@ | |
# [END import_libraries] | ||
|
||
|
||
def main(speech_file): | ||
"""Transcribe the given audio file asynchronously. | ||
|
||
Args: | ||
speech_file: the name of the audio file. | ||
""" | ||
def transcribe_file(speech_file): | ||
"""Transcribe the given audio file asynchronously.""" | ||
# [START authenticating] | ||
# Application default credentials provided by env variable | ||
# GOOGLE_APPLICATION_CREDENTIALS | ||
|
@@ -71,13 +69,62 @@ def main(speech_file): | |
# [END send_request] | ||
|
||
|
||
def transcribe_gcs(gcs_uri): | ||
"""Asynchronously transcribes the audio file specified by the gcs_uri.""" | ||
# [START authenticating_gcs] | ||
# Application default credentials provided by env variable | ||
# GOOGLE_APPLICATION_CREDENTIALS | ||
from google.cloud import speech | ||
speech_client = speech.Client() | ||
# [END authenticating_gcs] | ||
|
||
# [START construct_request_gcs] | ||
# Loads the audio into memory | ||
audio_sample = speech_client.sample( | ||
content=None, | ||
source_uri=gcs_uri, | ||
encoding='FLAC', | ||
sample_rate=16000) | ||
# [END construct_request_gcs] | ||
|
||
# [START send_request_gcs] | ||
operation = speech_client.speech_api.async_recognize(audio_sample) | ||
|
||
retry_count = 100 | ||
while retry_count > 0 and not operation.complete: | ||
retry_count -= 1 | ||
time.sleep(2) | ||
operation.poll() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought they were going to change this interface soon.... hmm. @dhermes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
/cc @lukesneeringer There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Alright, cool, we'll catch it when @dpebot bumps our dependencies. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll skip this for now. |
||
|
||
if not operation.complete: | ||
print("Operation not complete and retry limit reached.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. single quotes everywhere. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, I should add a module to my pavlok for such transgressions. |
||
return | ||
|
||
alternatives = operation.results | ||
for alternative in alternatives: | ||
print('Transcript: {}'.format(alternative.transcript)) | ||
print('Confidence: {}'.format(alternative.confidence)) | ||
# [END send_request_gcs] | ||
|
||
|
||
def main(path): | ||
"""Transcribe the given audio file. | ||
Args: | ||
path: the name of the audio file. | ||
""" | ||
if path.startswith('gs://'): | ||
transcribe_gcs(path) | ||
else: | ||
transcribe_file(path) | ||
|
||
|
||
# [START run_application] | ||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser( | ||
description=__doc__, | ||
formatter_class=argparse.RawDescriptionHelpFormatter) | ||
parser.add_argument( | ||
'speech_file', help='Full path of audio file to be recognized') | ||
'path', help='File or GCS path for audio file to be recognized') | ||
args = parser.parse_args() | ||
main(args.speech_file) | ||
main(args.path) | ||
# [END run_application] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,8 +16,15 @@ | |
from transcribe_async import main | ||
|
||
|
||
def test_main(resource, capsys): | ||
def test_transcribe(resource, capsys): | ||
main(resource('audio.raw')) | ||
out, err = capsys.readouterr() | ||
|
||
assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I) | ||
|
||
|
||
def test_transcribe_gcs(resource, capsys): | ||
main('gs://cloud-samples-tests/speech/brooklyn.flac') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer these to be in the python-docs-samples-tests bucket, provided by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
out, err = capsys.readouterr() | ||
|
||
assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Meta: seems unnecessary to chop this sample up into so many regions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1, these aren't even referenced anywhere in the docs yet, I'd just stuck to the inherited convention.