diff --git a/speech/cloud-client/README.md b/speech/cloud-client/README.md index 963e2294fc0..c5adea63b89 100644 --- a/speech/cloud-client/README.md +++ b/speech/cloud-client/README.md @@ -12,51 +12,70 @@ using the [Google Cloud Client Library for Java][google-cloud-java]. [speech]: https://cloud.google.com/speech/docs/ [google-cloud-java]: https://github.com/GoogleCloudPlatform/google-cloud-java -## Quickstart +## Setup Install [Maven](http://maven.apache.org/). Build your project with: - mvn clean compile assembly:single - -### Transcribe a local audio file (using the quickstart sample) +``` +mvn clean compile assembly:single +``` - java -cp target/speech-google-cloud-samples-1.0.0-jar-with-dependencies.jar \ - com.example.speech.QuickstartSample +## Quickstart +Transcribe a local audio file +``` +java -cp target/speech-google-cloud-samples-1.0.0-jar-with-dependencies.jar \ +com.example.speech.QuickstartSample +``` -### Transcribe a local audio file (using the recognize sample) +## Transcribe a audio file +Transcribe a local audio file ``` java -cp target/speech-google-cloud-samples-1.0.0-jar-with-dependencies.jar \ com.example.speech.Recognize syncrecognize ./resources/audio.raw ``` -### Asynchronously transcribe a local audio file (using the recognize sample) +Asynchronously transcribe a local audio file ``` java -cp target/speech-google-cloud-samples-1.0.0-jar-with-dependencies.jar \ com.example.speech.Recognize asyncrecognize ./resources/audio.raw ``` -### Transcribe a remote audio file (using the recognize sample) +Transcribe a remote audio file ``` java -cp target/speech-google-cloud-samples-1.0.0-jar-with-dependencies.jar \ com.example.speech.Recognize syncrecognize gs://cloud-samples-tests/speech/brooklyn.flac ``` -### Asynchronously transcribe a remote audio file (using the recognize sample) +Asynchronously transcribe a remote audio file ``` java -cp target/speech-google-cloud-samples-1.0.0-jar-with-dependencies.jar \ com.example.speech.Recognize asyncrecognize gs://cloud-samples-tests/speech/vr.flac ``` -### Synchronously transcribe an audio file and print word offsets +## Transcribe a audio file and print word offsets +Synchronously transcribe an audio file and print word offsets ``` java -cp target/speech-google-cloud-samples-1.0.0-jar-with-dependencies.jar \ com.example.speech.Recognize wordoffsets ./resources/audio.raw ``` -### Asynchronously transcribe a remote audio file and print word offsets +Asynchronously transcribe a remote audio file and print word offsets ``` java -cp target/speech-google-cloud-samples-1.0.0-jar-with-dependencies.jar \ com.example.speech.Recognize wordoffsets gs://cloud-samples-tests/speech/vr.flac ``` + +## Transcribe a video file +Synchronously transcribe a video file +``` + java -cp target/speech-google-cloud-samples-1.0.0-jar-with-dependencies.jar \ + com.example.speech.Recognize video ./resources/Google_Gnome.wav +``` + +Asynchronously transcribe a video file hosted on GCS +``` + java -cp target/speech-google-cloud-samples-1.0.0-jar-with-dependencies.jar \ + com.example.speech.Recognize video gs://cloud-samples-tests/speech/Google_Gnome.wav +``` diff --git a/speech/cloud-client/pom.xml b/speech/cloud-client/pom.xml index 8c984ccd3c5..2fdecf10f72 100644 --- a/speech/cloud-client/pom.xml +++ b/speech/cloud-client/pom.xml @@ -1,5 +1,5 @@ diff --git a/speech/cloud-client/resources/Google_Gnome.wav b/speech/cloud-client/resources/Google_Gnome.wav new file mode 100644 index 00000000000..2f497b7fbe7 Binary files /dev/null and b/speech/cloud-client/resources/Google_Gnome.wav differ diff --git a/speech/cloud-client/src/main/java/com/example/speech/QuickstartSample.java b/speech/cloud-client/src/main/java/com/example/speech/QuickstartSample.java index e9f5cef94f1..f47497e2c1f 100644 --- a/speech/cloud-client/src/main/java/com/example/speech/QuickstartSample.java +++ b/speech/cloud-client/src/main/java/com/example/speech/QuickstartSample.java @@ -1,5 +1,5 @@ /* - * Copyright 2017 Google Inc. + * Copyright 2018 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,14 +18,13 @@ // [START speech_quickstart] // Imports the Google Cloud client library - -import com.google.cloud.speech.v1.RecognitionAudio; -import com.google.cloud.speech.v1.RecognitionConfig; -import com.google.cloud.speech.v1.RecognitionConfig.AudioEncoding; -import com.google.cloud.speech.v1.RecognizeResponse; -import com.google.cloud.speech.v1.SpeechClient; -import com.google.cloud.speech.v1.SpeechRecognitionAlternative; -import com.google.cloud.speech.v1.SpeechRecognitionResult; +import com.google.cloud.speech.v1p1beta1.RecognitionAudio; +import com.google.cloud.speech.v1p1beta1.RecognitionConfig; +import com.google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding; +import com.google.cloud.speech.v1p1beta1.RecognizeResponse; +import com.google.cloud.speech.v1p1beta1.SpeechClient; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; import com.google.protobuf.ByteString; import java.nio.file.Files; import java.nio.file.Path; @@ -33,39 +32,43 @@ import java.util.List; public class QuickstartSample { + + /** + * Demonstrates using the Speech API to transcribe an audio file. + */ public static void main(String... args) throws Exception { // Instantiates a client - SpeechClient speech = SpeechClient.create(); + try (SpeechClient speechClient = SpeechClient.create()) { - // The path to the audio file to transcribe - String fileName = "./resources/audio.raw"; + // The path to the audio file to transcribe + String fileName = "./resources/audio.raw"; - // Reads the audio file into memory - Path path = Paths.get(fileName); - byte[] data = Files.readAllBytes(path); - ByteString audioBytes = ByteString.copyFrom(data); + // Reads the audio file into memory + Path path = Paths.get(fileName); + byte[] data = Files.readAllBytes(path); + ByteString audioBytes = ByteString.copyFrom(data); - // Builds the sync recognize request - RecognitionConfig config = RecognitionConfig.newBuilder() - .setEncoding(AudioEncoding.LINEAR16) - .setSampleRateHertz(16000) - .setLanguageCode("en-US") - .build(); - RecognitionAudio audio = RecognitionAudio.newBuilder() - .setContent(audioBytes) - .build(); + // Builds the sync recognize request + RecognitionConfig config = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setSampleRateHertz(16000) + .setLanguageCode("en-US") + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder() + .setContent(audioBytes) + .build(); - // Performs speech recognition on the audio file - RecognizeResponse response = speech.recognize(config, audio); - List results = response.getResultsList(); + // Performs speech recognition on the audio file + RecognizeResponse response = speechClient.recognize(config, audio); + List results = response.getResultsList(); - for (SpeechRecognitionResult result: results) { - // There can be several alternative transcripts for a given chunk of speech. Just use the - // first (most likely) one here. - SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); - System.out.printf("Transcription: %s%n", alternative.getTranscript()); + for (SpeechRecognitionResult result : results) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcription: %s%n", alternative.getTranscript()); + } } - speech.close(); } } // [END speech_quickstart] diff --git a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java index bffa00c770b..1976be3c98f 100644 --- a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java +++ b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java @@ -1,5 +1,5 @@ /* - * Copyright 2017 Google Inc. + * Copyright 2018 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,20 +19,20 @@ import com.google.api.gax.longrunning.OperationFuture; import com.google.api.gax.rpc.ApiStreamObserver; import com.google.api.gax.rpc.BidiStreamingCallable; -import com.google.cloud.speech.v1.LongRunningRecognizeMetadata; -import com.google.cloud.speech.v1.LongRunningRecognizeResponse; -import com.google.cloud.speech.v1.RecognitionAudio; -import com.google.cloud.speech.v1.RecognitionConfig; -import com.google.cloud.speech.v1.RecognitionConfig.AudioEncoding; -import com.google.cloud.speech.v1.RecognizeResponse; -import com.google.cloud.speech.v1.SpeechClient; -import com.google.cloud.speech.v1.SpeechRecognitionAlternative; -import com.google.cloud.speech.v1.SpeechRecognitionResult; -import com.google.cloud.speech.v1.StreamingRecognitionConfig; -import com.google.cloud.speech.v1.StreamingRecognitionResult; -import com.google.cloud.speech.v1.StreamingRecognizeRequest; -import com.google.cloud.speech.v1.StreamingRecognizeResponse; -import com.google.cloud.speech.v1.WordInfo; +import com.google.cloud.speech.v1p1beta1.LongRunningRecognizeMetadata; +import com.google.cloud.speech.v1p1beta1.LongRunningRecognizeResponse; +import com.google.cloud.speech.v1p1beta1.RecognitionAudio; +import com.google.cloud.speech.v1p1beta1.RecognitionConfig; +import com.google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding; +import com.google.cloud.speech.v1p1beta1.RecognizeResponse; +import com.google.cloud.speech.v1p1beta1.SpeechClient; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; +import com.google.cloud.speech.v1p1beta1.StreamingRecognitionConfig; +import com.google.cloud.speech.v1p1beta1.StreamingRecognitionResult; +import com.google.cloud.speech.v1p1beta1.StreamingRecognizeRequest; +import com.google.cloud.speech.v1p1beta1.StreamingRecognizeResponse; +import com.google.cloud.speech.v1p1beta1.WordInfo; import com.google.common.util.concurrent.SettableFuture; import com.google.protobuf.ByteString; import java.io.IOException; @@ -42,6 +42,10 @@ import java.util.List; public class Recognize { + + /** + * Run speech recognition tasks. + */ public static void main(String... args) throws Exception { if (args.length < 1) { System.out.println("Usage:"); @@ -78,8 +82,13 @@ public static void main(String... args) throws Exception { } } else if (command.equals("streamrecognize")) { streamingRecognizeFile(path); + } else if (command.equals("video")) { + if (path.startsWith("gs://")) { + transcribeGcsVideoFile(path); + } else { + transcribeVideoFile(path); + } } - } /** @@ -87,34 +96,33 @@ public static void main(String... args) throws Exception { * * @param fileName the path to a PCM audio file to transcribe. */ - public static void syncRecognizeFile(String fileName) throws Exception, IOException { - SpeechClient speech = SpeechClient.create(); - - Path path = Paths.get(fileName); - byte[] data = Files.readAllBytes(path); - ByteString audioBytes = ByteString.copyFrom(data); - - // Configure request with local raw PCM audio - RecognitionConfig config = RecognitionConfig.newBuilder() - .setEncoding(AudioEncoding.LINEAR16) - .setLanguageCode("en-US") - .setSampleRateHertz(16000) - .build(); - RecognitionAudio audio = RecognitionAudio.newBuilder() - .setContent(audioBytes) - .build(); - - // Use blocking call to get audio transcript - RecognizeResponse response = speech.recognize(config, audio); - List results = response.getResultsList(); - - for (SpeechRecognitionResult result: results) { - // There can be several alternative transcripts for a given chunk of speech. Just use the - // first (most likely) one here. - SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); - System.out.printf("Transcription: %s%n", alternative.getTranscript()); + public static void syncRecognizeFile(String fileName) throws Exception { + try (SpeechClient speech = SpeechClient.create()) { + Path path = Paths.get(fileName); + byte[] data = Files.readAllBytes(path); + ByteString audioBytes = ByteString.copyFrom(data); + + // Configure request with local raw PCM audio + RecognitionConfig config = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder() + .setContent(audioBytes) + .build(); + + // Use blocking call to get audio transcript + RecognizeResponse response = speech.recognize(config, audio); + List results = response.getResultsList(); + + for (SpeechRecognitionResult result : results) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcription: %s%n", alternative.getTranscript()); + } } - speech.close(); } /** @@ -122,121 +130,118 @@ public static void syncRecognizeFile(String fileName) throws Exception, IOExcept * * @param fileName the path to a PCM audio file to transcribe get offsets on. */ - public static void syncRecognizeWords(String fileName) throws Exception, IOException { - SpeechClient speech = SpeechClient.create(); - - Path path = Paths.get(fileName); - byte[] data = Files.readAllBytes(path); - ByteString audioBytes = ByteString.copyFrom(data); - - // Configure request with local raw PCM audio - RecognitionConfig config = RecognitionConfig.newBuilder() - .setEncoding(AudioEncoding.LINEAR16) - .setLanguageCode("en-US") - .setSampleRateHertz(16000) - .setEnableWordTimeOffsets(true) - .build(); - RecognitionAudio audio = RecognitionAudio.newBuilder() - .setContent(audioBytes) - .build(); - - // Use blocking call to get audio transcript - RecognizeResponse response = speech.recognize(config, audio); - List results = response.getResultsList(); - - for (SpeechRecognitionResult result: results) { - // There can be several alternative transcripts for a given chunk of speech. Just use the - // first (most likely) one here. - SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); - System.out.printf("Transcription: %s%n", alternative.getTranscript()); - for (WordInfo wordInfo: alternative.getWordsList()) { - System.out.println(wordInfo.getWord()); - System.out.printf("\t%s.%s sec - %s.%s sec\n", - wordInfo.getStartTime().getSeconds(), - wordInfo.getStartTime().getNanos() / 100000000, - wordInfo.getEndTime().getSeconds(), - wordInfo.getEndTime().getNanos() / 100000000); + public static void syncRecognizeWords(String fileName) throws Exception { + try (SpeechClient speech = SpeechClient.create()) { + Path path = Paths.get(fileName); + byte[] data = Files.readAllBytes(path); + ByteString audioBytes = ByteString.copyFrom(data); + + // Configure request with local raw PCM audio + RecognitionConfig config = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .setEnableWordTimeOffsets(true) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder() + .setContent(audioBytes) + .build(); + + // Use blocking call to get audio transcript + RecognizeResponse response = speech.recognize(config, audio); + List results = response.getResultsList(); + + for (SpeechRecognitionResult result : results) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcription: %s%n", alternative.getTranscript()); + for (WordInfo wordInfo : alternative.getWordsList()) { + System.out.println(wordInfo.getWord()); + System.out.printf("\t%s.%s sec - %s.%s sec\n", + wordInfo.getStartTime().getSeconds(), + wordInfo.getStartTime().getNanos() / 100000000, + wordInfo.getEndTime().getSeconds(), + wordInfo.getEndTime().getNanos() / 100000000); + } } } - speech.close(); } - /** * Performs speech recognition on remote FLAC file and prints the transcription. * * @param gcsUri the path to the remote FLAC audio file to transcribe. */ - public static void syncRecognizeGcs(String gcsUri) throws Exception, IOException { + public static void syncRecognizeGcs(String gcsUri) throws Exception { // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS - SpeechClient speech = SpeechClient.create(); - - // Builds the request for remote FLAC file - RecognitionConfig config = RecognitionConfig.newBuilder() - .setEncoding(AudioEncoding.FLAC) - .setLanguageCode("en-US") - .setSampleRateHertz(16000) - .build(); - RecognitionAudio audio = RecognitionAudio.newBuilder() - .setUri(gcsUri) - .build(); - - // Use blocking call for getting audio transcript - RecognizeResponse response = speech.recognize(config, audio); - List results = response.getResultsList(); - - for (SpeechRecognitionResult result: results) { - // There can be several alternative transcripts for a given chunk of speech. Just use the - // first (most likely) one here. - SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); - System.out.printf("Transcription: %s%n", alternative.getTranscript()); + try (SpeechClient speech = SpeechClient.create()) { + // Builds the request for remote FLAC file + RecognitionConfig config = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.FLAC) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder() + .setUri(gcsUri) + .build(); + + // Use blocking call for getting audio transcript + RecognizeResponse response = speech.recognize(config, audio); + List results = response.getResultsList(); + + for (SpeechRecognitionResult result : results) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcription: %s%n", alternative.getTranscript()); + } } - speech.close(); } - /* + /** * Performs non-blocking speech recognition on raw PCM audio and prints * the transcription. Note that transcription is limited to 60 seconds audio. * * @param fileName the path to a PCM audio file to transcribe. */ - public static void asyncRecognizeFile(String fileName) throws Exception, IOException { + public static void asyncRecognizeFile(String fileName) throws Exception { // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS - SpeechClient speech = SpeechClient.create(); - - Path path = Paths.get(fileName); - byte[] data = Files.readAllBytes(path); - ByteString audioBytes = ByteString.copyFrom(data); - - // Configure request with local raw PCM audio - RecognitionConfig config = RecognitionConfig.newBuilder() - .setEncoding(AudioEncoding.LINEAR16) - .setLanguageCode("en-US") - .setSampleRateHertz(16000) - .build(); - RecognitionAudio audio = RecognitionAudio.newBuilder() - .setContent(audioBytes) - .build(); - - // Use non-blocking call for getting file transcription - OperationFuture response = - speech.longRunningRecognizeAsync(config, audio); - - while (!response.isDone()) { - System.out.println("Waiting for response..."); - Thread.sleep(10000); - } + try (SpeechClient speech = SpeechClient.create()) { + + Path path = Paths.get(fileName); + byte[] data = Files.readAllBytes(path); + ByteString audioBytes = ByteString.copyFrom(data); + + // Configure request with local raw PCM audio + RecognitionConfig config = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder() + .setContent(audioBytes) + .build(); + + // Use non-blocking call for getting file transcription + OperationFuture response = + speech.longRunningRecognizeAsync(config, audio); + + while (!response.isDone()) { + System.out.println("Waiting for response..."); + Thread.sleep(10000); + } - List results = response.get().getResultsList(); + List results = response.get().getResultsList(); - for (SpeechRecognitionResult result: results) { - // There can be several alternative transcripts for a given chunk of speech. Just use the - // first (most likely) one here. - SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); - System.out.printf("Transcription: %s%n", alternative.getTranscript()); + for (SpeechRecognitionResult result : results) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcription: %s%n", alternative.getTranscript()); + } } - speech.close(); } /** @@ -245,46 +250,46 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep * * @param gcsUri the path to the remote LINEAR16 audio file to transcribe. */ - public static void asyncRecognizeWords(String gcsUri) throws Exception, IOException { + public static void asyncRecognizeWords(String gcsUri) throws Exception { // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS - SpeechClient speech = SpeechClient.create(); - - // Configure remote file request for Linear16 - RecognitionConfig config = RecognitionConfig.newBuilder() - .setEncoding(AudioEncoding.FLAC) - .setLanguageCode("en-US") - .setSampleRateHertz(16000) - .setEnableWordTimeOffsets(true) - .build(); - RecognitionAudio audio = RecognitionAudio.newBuilder() - .setUri(gcsUri) - .build(); - - // Use non-blocking call for getting file transcription - OperationFuture response = - speech.longRunningRecognizeAsync(config, audio); - while (!response.isDone()) { - System.out.println("Waiting for response..."); - Thread.sleep(10000); - } - - List results = response.get().getResultsList(); + try (SpeechClient speech = SpeechClient.create()) { + + // Configure remote file request for Linear16 + RecognitionConfig config = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.FLAC) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .setEnableWordTimeOffsets(true) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder() + .setUri(gcsUri) + .build(); + + // Use non-blocking call for getting file transcription + OperationFuture response = + speech.longRunningRecognizeAsync(config, audio); + while (!response.isDone()) { + System.out.println("Waiting for response..."); + Thread.sleep(10000); + } - for (SpeechRecognitionResult result: results) { - // There can be several alternative transcripts for a given chunk of speech. Just use the - // first (most likely) one here. - SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); - System.out.printf("Transcription: %s\n",alternative.getTranscript()); - for (WordInfo wordInfo: alternative.getWordsList()) { - System.out.println(wordInfo.getWord()); - System.out.printf("\t%s.%s sec - %s.%s sec\n", - wordInfo.getStartTime().getSeconds(), - wordInfo.getStartTime().getNanos() / 100000000, - wordInfo.getEndTime().getSeconds(), - wordInfo.getEndTime().getNanos() / 100000000); + List results = response.get().getResultsList(); + + for (SpeechRecognitionResult result : results) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcription: %s\n", alternative.getTranscript()); + for (WordInfo wordInfo : alternative.getWordsList()) { + System.out.println(wordInfo.getWord()); + System.out.printf("\t%s.%s sec - %s.%s sec\n", + wordInfo.getStartTime().getSeconds(), + wordInfo.getStartTime().getNanos() / 100000000, + wordInfo.getEndTime().getSeconds(), + wordInfo.getEndTime().getNanos() / 100000000); + } } } - speech.close(); } /** @@ -293,37 +298,37 @@ public static void asyncRecognizeWords(String gcsUri) throws Exception, IOExcept * * @param gcsUri the path to the remote LINEAR16 audio file to transcribe. */ - public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOException { + public static void asyncRecognizeGcs(String gcsUri) throws Exception { // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS - SpeechClient speech = SpeechClient.create(); - - // Configure remote file request for Linear16 - RecognitionConfig config = RecognitionConfig.newBuilder() - .setEncoding(AudioEncoding.FLAC) - .setLanguageCode("en-US") - .setSampleRateHertz(16000) - .build(); - RecognitionAudio audio = RecognitionAudio.newBuilder() - .setUri(gcsUri) - .build(); - - // Use non-blocking call for getting file transcription - OperationFuture response = - speech.longRunningRecognizeAsync(config, audio); - while (!response.isDone()) { - System.out.println("Waiting for response..."); - Thread.sleep(10000); - } + try (SpeechClient speech = SpeechClient.create()) { + + // Configure remote file request for Linear16 + RecognitionConfig config = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.FLAC) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder() + .setUri(gcsUri) + .build(); + + // Use non-blocking call for getting file transcription + OperationFuture response = + speech.longRunningRecognizeAsync(config, audio); + while (!response.isDone()) { + System.out.println("Waiting for response..."); + Thread.sleep(10000); + } - List results = response.get().getResultsList(); + List results = response.get().getResultsList(); - for (SpeechRecognitionResult result: results) { - // There can be several alternative transcripts for a given chunk of speech. Just use the - // first (most likely) one here. - SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); - System.out.printf("Transcription: %s\n",alternative.getTranscript()); + for (SpeechRecognitionResult result : results) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcription: %s\n", alternative.getTranscript()); + } } - speech.close(); } @@ -337,78 +342,158 @@ public static void streamingRecognizeFile(String fileName) throws Exception, IOE byte[] data = Files.readAllBytes(path); // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS - SpeechClient speech = SpeechClient.create(); - - // Configure request with local raw PCM audio - RecognitionConfig recConfig = RecognitionConfig.newBuilder() - .setEncoding(AudioEncoding.LINEAR16) - .setLanguageCode("en-US") - .setSampleRateHertz(16000) - .build(); - StreamingRecognitionConfig config = StreamingRecognitionConfig.newBuilder() - .setConfig(recConfig) - .build(); - - class ResponseApiStreamingObserver implements ApiStreamObserver { - private final SettableFuture> future = SettableFuture.create(); - private final List messages = new java.util.ArrayList(); - - @Override - public void onNext(T message) { - messages.add(message); + try (SpeechClient speech = SpeechClient.create()) { + + // Configure request with local raw PCM audio + RecognitionConfig recConfig = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .build(); + StreamingRecognitionConfig config = StreamingRecognitionConfig.newBuilder() + .setConfig(recConfig) + .build(); + + class ResponseApiStreamingObserver implements ApiStreamObserver { + private final SettableFuture> future = SettableFuture.create(); + private final List messages = new java.util.ArrayList(); + + @Override + public void onNext(T message) { + messages.add(message); + } + + @Override + public void onError(Throwable t) { + future.setException(t); + } + + @Override + public void onCompleted() { + future.set(messages); + } + + // Returns the SettableFuture object to get received messages / exceptions. + public SettableFuture> future() { + return future; + } } - @Override - public void onError(Throwable t) { - future.setException(t); - } + ResponseApiStreamingObserver responseObserver = + new ResponseApiStreamingObserver<>(); - @Override - public void onCompleted() { - future.set(messages); - } + BidiStreamingCallable callable = + speech.streamingRecognizeCallable(); + + ApiStreamObserver requestObserver = + callable.bidiStreamingCall(responseObserver); + + // The first request must **only** contain the audio configuration: + requestObserver.onNext(StreamingRecognizeRequest.newBuilder() + .setStreamingConfig(config) + .build()); + + // Subsequent requests must **only** contain the audio data. + requestObserver.onNext(StreamingRecognizeRequest.newBuilder() + .setAudioContent(ByteString.copyFrom(data)) + .build()); - // Returns the SettableFuture object to get received messages / exceptions. - public SettableFuture> future() { - return future; + // Mark transmission as completed after sending the data. + requestObserver.onCompleted(); + + List responses = responseObserver.future().get(); + + for (StreamingRecognizeResponse response : responses) { + // For streaming recognize, the results list has one is_final result (if available) followed + // by a number of in-progress results (if iterim_results is true) for subsequent utterances. + // Just print the first result here. + StreamingRecognitionResult result = response.getResultsList().get(0); + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript : %s\n", alternative.getTranscript()); } } + } - ResponseApiStreamingObserver responseObserver = - new ResponseApiStreamingObserver(); + // [START speech_transcribe_model_selection] + /** + * Performs transcription of the given audio file synchronously with + * video as the original media type. + * @param fileName the path to a video file to transcribe + */ + public static void transcribeVideoFile(String fileName) throws Exception { + Path path = Paths.get(fileName); + byte[] content = Files.readAllBytes(path); - BidiStreamingCallable callable = - speech.streamingRecognizeCallable(); + try (SpeechClient speech = SpeechClient.create()) { + // Configure request with video media type + RecognitionConfig recConfig = RecognitionConfig.newBuilder() + // encoding may either be omitted or must match the value in the file header + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + // sample rate hertz may be either be omitted or must match the value in the file header + .setSampleRateHertz(16000) + .setModel("video") + .build(); - ApiStreamObserver requestObserver = - callable.bidiStreamingCall(responseObserver); + RecognitionAudio recognitionAudio = RecognitionAudio.newBuilder() + .setContent(ByteString.copyFrom(content)) + .build(); - // The first request must **only** contain the audio configuration: - requestObserver.onNext(StreamingRecognizeRequest.newBuilder() - .setStreamingConfig(config) - .build()); - // Subsequent requests must **only** contain the audio data. - requestObserver.onNext(StreamingRecognizeRequest.newBuilder() - .setAudioContent(ByteString.copyFrom(data)) - .build()); + RecognizeResponse recognizeResponse = speech.recognize(recConfig, recognitionAudio); + // Just print the first result here. + SpeechRecognitionResult result = recognizeResponse.getResultsList().get(0); + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript : %s\n", alternative.getTranscript()); + } + // [END speech_transcribe_model_selection] + } - // Mark transmission as completed after sending the data. - requestObserver.onCompleted(); + // [START speech_transcribe_model_selection_gcs] + /** + * Performs transcription on remote video file and prints the transcription. + * + * @param gcsUri the path to the remote video file to transcribe. + */ + public static void transcribeGcsVideoFile(String gcsUri) throws Exception { + try (SpeechClient speech = SpeechClient.create()) { + + // Configure request with video media type + RecognitionConfig config = RecognitionConfig.newBuilder() + // encoding may either be omitted or must match the value in the file header + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + // sample rate hertz may be either be omitted or must match the value in the file header + .setSampleRateHertz(16000) + .setModel("video") + .build(); + + RecognitionAudio audio = RecognitionAudio.newBuilder() + .setUri(gcsUri) + .build(); + + // Use non-blocking call for getting file transcription + OperationFuture response = + speech.longRunningRecognizeAsync(config, audio); + + while (!response.isDone()) { + System.out.println("Waiting for response..."); + Thread.sleep(10000); + } - List responses = responseObserver.future().get(); + List results = response.get().getResultsList(); - for (StreamingRecognizeResponse response: responses) { - // For streaming recognize, the results list has one is_final result (if available) followed - // by a number of in-progress results (if iterim_results is true) for subsequent utterances. // Just print the first result here. - StreamingRecognitionResult result = response.getResultsList().get(0); + SpeechRecognitionResult result = results.get(0); // There can be several alternative transcripts for a given chunk of speech. Just use the // first (most likely) one here. SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); - System.out.println(alternative.getTranscript()); + System.out.printf("Transcript : %s\n", alternative.getTranscript()); } - speech.close(); + // [END speech_transcribe_model_selection_gcs] } - } diff --git a/speech/cloud-client/src/test/java/com/example/speech/QuickstartSampleIT.java b/speech/cloud-client/src/test/java/com/example/speech/QuickstartSampleIT.java index 4b6fead1e81..22be282b8e1 100644 --- a/speech/cloud-client/src/test/java/com/example/speech/QuickstartSampleIT.java +++ b/speech/cloud-client/src/test/java/com/example/speech/QuickstartSampleIT.java @@ -1,5 +1,5 @@ /* - * Copyright 2017 Google Inc. + * Copyright 2018 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java b/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java index ea4cddb0b5f..2970647a010 100644 --- a/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java +++ b/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java @@ -1,5 +1,5 @@ /* - * Copyright 2017 Google Inc. + * Copyright 2018 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,8 +39,12 @@ public class RecognizeIT { private PrintStream out; // The path to the audio file to transcribe - private String fileName = "./resources/audio.raw"; - private String gcsPath = "gs://" + BUCKET + "/speech/brooklyn.flac"; + private String audioFileName = "./resources/audio.raw"; + private String gcsAudioPath = "gs://" + BUCKET + "/speech/brooklyn.flac"; + + // The path to the video file to transcribe + private String videoFileName = "./resources/Google_Gnome.wav"; + private String gcsVideoPath = "gs://" + BUCKET + "/speech/Google_Gnome.wav"; @Before public void setUp() { @@ -56,14 +60,14 @@ public void tearDown() { @Test public void testRecognizeFile() throws Exception { - Recognize.syncRecognizeFile(fileName); + Recognize.syncRecognizeFile(audioFileName); String got = bout.toString(); assertThat(got).contains("how old is the Brooklyn Bridge"); } @Test public void testRecognizeWordoffset() throws Exception { - Recognize.syncRecognizeWords(fileName); + Recognize.syncRecognizeWords(audioFileName); String got = bout.toString(); assertThat(got).contains("how old is the Brooklyn Bridge"); assertThat(got).contains("\t0.0 sec -"); @@ -71,28 +75,28 @@ public void testRecognizeWordoffset() throws Exception { @Test public void testRecognizeGcs() throws Exception { - Recognize.syncRecognizeGcs(gcsPath); + Recognize.syncRecognizeGcs(gcsAudioPath); String got = bout.toString(); assertThat(got).contains("how old is the Brooklyn Bridge"); } @Test public void testAsyncRecognizeFile() throws Exception { - Recognize.asyncRecognizeFile(fileName); + Recognize.asyncRecognizeFile(audioFileName); String got = bout.toString(); assertThat(got).contains("how old is the Brooklyn Bridge"); } @Test public void testAsyncRecognizeGcs() throws Exception { - Recognize.asyncRecognizeGcs(gcsPath); + Recognize.asyncRecognizeGcs(gcsAudioPath); String got = bout.toString(); assertThat(got).contains("how old is the Brooklyn Bridge"); } @Test public void testAsyncWordoffset() throws Exception { - Recognize.asyncRecognizeWords(gcsPath); + Recognize.asyncRecognizeWords(gcsAudioPath); String got = bout.toString(); assertThat(got).contains("how old is the Brooklyn Bridge"); assertThat(got).contains("\t0.0 sec -"); @@ -100,8 +104,24 @@ public void testAsyncWordoffset() throws Exception { @Test public void testStreamRecognize() throws Exception { - Recognize.streamingRecognizeFile(fileName); + Recognize.streamingRecognizeFile(audioFileName); String got = bout.toString(); assertThat(got).contains("how old is the Brooklyn Bridge"); } + + @Test + public void testVideoTranscription() throws Exception { + Recognize.transcribeVideoFile(videoFileName); + String got = bout.toString(); + assertThat(got).contains("OK Google"); + assertThat(got).contains("the weather outside is sunny"); + } + + @Test + public void testGcsVideoTranscription() throws Exception { + Recognize.transcribeGcsVideoFile(gcsVideoPath); + String got = bout.toString(); + assertThat(got).contains("OK Google"); + assertThat(got).contains("the weather outside is sunny"); + } }