diff --git a/speech/cloud-client/README.md b/speech/cloud-client/README.md index e6c5d7b1c9d..c0397107edb 100644 --- a/speech/cloud-client/README.md +++ b/speech/cloud-client/README.md @@ -34,5 +34,5 @@ You can then run a given `ClassName` via: ### Transcribe a remote audio file (using the recognize sample) ``` mvn exec:java -Dexec.mainClass=com.example.speech.Recognize \ - -Dexec.args="syncrecognize 'gs://java-docs-samples-tests/speech/brooklyn.flac'" + -Dexec.args="syncrecognize 'gs://cloud-samples-tests/speech/brooklyn.flac'" ``` diff --git a/speech/cloud-client/pom.xml b/speech/cloud-client/pom.xml index 218a2cfda58..1d5009b0cac 100644 --- a/speech/cloud-client/pom.xml +++ b/speech/cloud-client/pom.xml @@ -37,8 +37,8 @@ com.google.cloud - google-cloud-speech - 0.8.1-alpha + google-cloud + 0.11.2-alpha diff --git a/speech/cloud-client/src/main/java/com/example/speech/QuickstartSample.java b/speech/cloud-client/src/main/java/com/example/speech/QuickstartSample.java index 73e234836f9..955f971eb29 100644 --- a/speech/cloud-client/src/main/java/com/example/speech/QuickstartSample.java +++ b/speech/cloud-client/src/main/java/com/example/speech/QuickstartSample.java @@ -18,13 +18,13 @@ // [START speech_quickstart] // Imports the Google Cloud client library -import com.google.cloud.speech.spi.v1beta1.SpeechClient; -import com.google.cloud.speech.v1beta1.RecognitionAudio; -import com.google.cloud.speech.v1beta1.RecognitionConfig; -import com.google.cloud.speech.v1beta1.RecognitionConfig.AudioEncoding; -import com.google.cloud.speech.v1beta1.SpeechRecognitionAlternative; -import com.google.cloud.speech.v1beta1.SpeechRecognitionResult; -import com.google.cloud.speech.v1beta1.SyncRecognizeResponse; +import com.google.cloud.speech.spi.v1.SpeechClient; +import com.google.cloud.speech.v1.RecognitionAudio; +import com.google.cloud.speech.v1.RecognitionConfig; +import com.google.cloud.speech.v1.RecognitionConfig.AudioEncoding; +import com.google.cloud.speech.v1.RecognizeResponse; +import com.google.cloud.speech.v1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1.SpeechRecognitionResult; import com.google.protobuf.ByteString; import java.nio.file.Files; @@ -48,14 +48,15 @@ public static void main(String... args) throws Exception { // Builds the sync recognize request RecognitionConfig config = RecognitionConfig.newBuilder() .setEncoding(AudioEncoding.LINEAR16) - .setSampleRate(16000) + .setSampleRateHertz(16000) + .setLanguageCode("en-US") .build(); RecognitionAudio audio = RecognitionAudio.newBuilder() .setContent(audioBytes) .build(); // Performs speech recognition on the audio file - SyncRecognizeResponse response = speech.syncRecognize(config, audio); + RecognizeResponse response = speech.recognize(config, audio); List results = response.getResultsList(); for (SpeechRecognitionResult result: results) { diff --git a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java index 6b6abef94d2..a6480b50f92 100644 --- a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java +++ b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java @@ -16,15 +16,22 @@ package com.example.speech; +import com.google.api.gax.core.ApiStreamObserver; import com.google.api.gax.grpc.OperationFuture; -import com.google.cloud.speech.spi.v1beta1.SpeechClient; -import com.google.cloud.speech.v1beta1.AsyncRecognizeResponse; -import com.google.cloud.speech.v1beta1.RecognitionAudio; -import com.google.cloud.speech.v1beta1.RecognitionConfig; -import com.google.cloud.speech.v1beta1.RecognitionConfig.AudioEncoding; -import com.google.cloud.speech.v1beta1.SpeechRecognitionAlternative; -import com.google.cloud.speech.v1beta1.SpeechRecognitionResult; -import com.google.cloud.speech.v1beta1.SyncRecognizeResponse; +import com.google.api.gax.grpc.StreamingCallable; +import com.google.cloud.speech.spi.v1.SpeechClient; +import com.google.cloud.speech.v1.LongRunningRecognizeResponse; +import com.google.cloud.speech.v1.RecognitionAudio; +import com.google.cloud.speech.v1.RecognitionConfig; +import com.google.cloud.speech.v1.RecognitionConfig.AudioEncoding; +import com.google.cloud.speech.v1.RecognizeResponse; +import com.google.cloud.speech.v1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1.SpeechRecognitionResult; +import com.google.cloud.speech.v1.StreamingRecognitionConfig; +import com.google.cloud.speech.v1.StreamingRecognitionResult; +import com.google.cloud.speech.v1.StreamingRecognizeRequest; +import com.google.cloud.speech.v1.StreamingRecognizeResponse; +import com.google.common.util.concurrent.SettableFuture; import com.google.protobuf.ByteString; import java.io.IOException; @@ -40,7 +47,7 @@ public static void main(String... args) throws Exception { System.out.printf( "\tjava %s \"\" \"\"\n" + "Commands:\n" - + "\tsyncrecognize | asyncrecognize\n" + + "\tsyncrecognize | asyncrecognize | streamrecognize\n" + "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI " + "for a Cloud Storage resource (gs://...)\n", Recognize.class.getCanonicalName()); @@ -62,7 +69,11 @@ public static void main(String... args) throws Exception { } else { asyncRecognizeFile(path); } + } else if (command.equals("streamrecognize")) { + streamingRecognizeFile(path); + //streamingRecognizeEasy(path); } + } /** @@ -80,14 +91,15 @@ public static void syncRecognizeFile(String fileName) throws Exception, IOExcept // Configure request with local raw PCM audio RecognitionConfig config = RecognitionConfig.newBuilder() .setEncoding(AudioEncoding.LINEAR16) - .setSampleRate(16000) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) .build(); RecognitionAudio audio = RecognitionAudio.newBuilder() .setContent(audioBytes) .build(); // Use blocking call to get audio transcript - SyncRecognizeResponse response = speech.syncRecognize(config, audio); + RecognizeResponse response = speech.recognize(config, audio); List results = response.getResultsList(); for (SpeechRecognitionResult result: results) { @@ -111,14 +123,15 @@ public static void syncRecognizeGcs(String gcsUri) throws Exception, IOException // Builds the request for remote FLAC file RecognitionConfig config = RecognitionConfig.newBuilder() .setEncoding(AudioEncoding.FLAC) - .setSampleRate(16000) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) .build(); RecognitionAudio audio = RecognitionAudio.newBuilder() .setUri(gcsUri) .build(); // Use blocking call for getting audio transcript - SyncRecognizeResponse response = speech.syncRecognize(config, audio); + RecognizeResponse response = speech.recognize(config, audio); List results = response.getResultsList(); for (SpeechRecognitionResult result: results) { @@ -130,6 +143,7 @@ public static void syncRecognizeGcs(String gcsUri) throws Exception, IOException speech.close(); } + /* /** * Performs non-blocking speech recognition on raw PCM audio and prints * the transcription. @@ -147,14 +161,16 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep // Configure request with local raw PCM audio RecognitionConfig config = RecognitionConfig.newBuilder() .setEncoding(AudioEncoding.LINEAR16) - .setSampleRate(16000) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) .build(); RecognitionAudio audio = RecognitionAudio.newBuilder() .setContent(audioBytes) .build(); // Use non-blocking call for getting file transcription - OperationFuture response = speech.asyncRecognizeAsync(config, audio); + OperationFuture response = + speech.longRunningRecognizeAsync(config, audio); while (!response.isDone()) { System.out.println("Waiting for response..."); Thread.sleep(200); @@ -175,23 +191,25 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep * Performs non-blocking speech recognition on remote FLAC file and prints * the transcription. * - * @param gcsUri the path to the remote FLAC audio file to transcribe. + * @param gcsUri the path to the remote LINEAR16 audio file to transcribe. */ public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOException { // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS SpeechClient speech = SpeechClient.create(); - // Configure remote file request for FLAC file + // Configure remote file request for Linear16 RecognitionConfig config = RecognitionConfig.newBuilder() .setEncoding(AudioEncoding.FLAC) - .setSampleRate(16000) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) .build(); RecognitionAudio audio = RecognitionAudio.newBuilder() .setUri(gcsUri) .build(); // Use non-blocking call for getting file transcription - OperationFuture response = speech.asyncRecognizeAsync(config, audio); + OperationFuture response = + speech.longRunningRecognizeAsync(config, audio); while (!response.isDone()) { System.out.println("Waiting for response..."); Thread.sleep(200); @@ -207,4 +225,86 @@ public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOExceptio } speech.close(); } + + /** + * Performs streaming speech recognition on raw PCM audio data. + * + * @param fileName the path to a PCM audio file to transcribe. + */ + public static void streamingRecognizeFile(String fileName) throws Exception, IOException { + Path path = Paths.get(fileName); + byte[] data = Files.readAllBytes(path); + + // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS + SpeechClient speech = SpeechClient.create(); + + // Configure request with local raw PCM audio + RecognitionConfig recConfig = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .build(); + StreamingRecognitionConfig config = StreamingRecognitionConfig.newBuilder() + .setConfig(recConfig) + .build(); + + class ResponseApiStreamingObserver implements ApiStreamObserver { + private final SettableFuture> future = SettableFuture.create(); + private final List messages = new java.util.ArrayList(); + + @Override + public void onNext(T message) { + messages.add(message); + } + + @Override + public void onError(Throwable t) { + future.setException(t); + } + + @Override + public void onCompleted() { + future.set(messages); + } + + // Returns the SettableFuture object to get received messages / exceptions. + public SettableFuture> future() { + return future; + } + } + + ResponseApiStreamingObserver responseObserver = + new ResponseApiStreamingObserver(); + + StreamingCallable callable = + speech.streamingRecognizeCallable(); + + ApiStreamObserver requestObserver = + callable.bidiStreamingCall(responseObserver); + + // The first request must **only** contain the audio configuration: + requestObserver.onNext(StreamingRecognizeRequest.newBuilder() + .setStreamingConfig(config) + .build()); + + // Subsequent requests must **only** contain the audio data. + requestObserver.onNext(StreamingRecognizeRequest.newBuilder() + .setAudioContent(ByteString.copyFrom(data)) + .build()); + + // Mark transmission as completed after sending the data. + requestObserver.onCompleted(); + + List responses = responseObserver.future().get(); + + for (StreamingRecognizeResponse response: responses) { + for (StreamingRecognitionResult result: response.getResultsList()) { + for (SpeechRecognitionAlternative alternative : result.getAlternativesList()) { + System.out.println(alternative.getTranscript()); + } + } + } + speech.close(); + } + } diff --git a/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java b/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java index d001c410e38..dac2f6be8ef 100644 --- a/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java +++ b/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java @@ -61,25 +61,32 @@ public void testRecognizeFile() throws Exception { String got = bout.toString(); assertThat(got).contains("how old is the Brooklyn Bridge"); } - + @Test public void testRecognizeGcs() throws Exception { Recognize.syncRecognizeGcs(gcsPath); String got = bout.toString(); assertThat(got).contains("how old is the Brooklyn Bridge"); } - + @Test public void testAsyncRecognizeFile() throws Exception { Recognize.asyncRecognizeFile(fileName); String got = bout.toString(); assertThat(got).contains("how old is the Brooklyn Bridge"); } - + @Test public void testAsyncRecognizeGcs() throws Exception { Recognize.asyncRecognizeGcs(gcsPath); String got = bout.toString(); assertThat(got).contains("how old is the Brooklyn Bridge"); } + + @Test + public void testStreamRecognize() throws Exception { + Recognize.streamingRecognizeFile(fileName); + String got = bout.toString(); + assertThat(got).contains("how old is the Brooklyn Bridge"); + } }