Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speech v1 #601

Merged
merged 5 commits into from
Apr 7, 2017
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion speech/cloud-client/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,5 @@ You can then run a given `ClassName` via:
### Transcribe a remote audio file (using the recognize sample)
```
mvn exec:java -Dexec.mainClass=com.example.speech.Recognize \
-Dexec.args="syncrecognize 'gs://java-docs-samples-tests/speech/brooklyn.flac'"
-Dexec.args="syncrecognize 'gs://cloud-samples-tests/speech/brooklyn.flac'"
```
4 changes: 2 additions & 2 deletions speech/cloud-client/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@
<!-- [START dependencies] -->
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-speech</artifactId>
<version>0.8.1-alpha</version>
<artifactId>google-cloud</artifactId>
<version>0.11.2-alpha</version>
</dependency>
<!-- [END dependencies] -->

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@

// [START speech_quickstart]
// Imports the Google Cloud client library
import com.google.cloud.speech.spi.v1beta1.SpeechClient;
import com.google.cloud.speech.v1beta1.RecognitionAudio;
import com.google.cloud.speech.v1beta1.RecognitionConfig;
import com.google.cloud.speech.v1beta1.RecognitionConfig.AudioEncoding;
import com.google.cloud.speech.v1beta1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1beta1.SpeechRecognitionResult;
import com.google.cloud.speech.v1beta1.SyncRecognizeResponse;
import com.google.cloud.speech.spi.v1.SpeechClient;
import com.google.cloud.speech.v1.RecognitionAudio;
import com.google.cloud.speech.v1.RecognitionConfig;
import com.google.cloud.speech.v1.RecognitionConfig.AudioEncoding;
import com.google.cloud.speech.v1.RecognizeResponse;
import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1.SpeechRecognitionResult;
import com.google.protobuf.ByteString;

import java.nio.file.Files;
Expand All @@ -48,14 +48,15 @@ public static void main(String... args) throws Exception {
// Builds the sync recognize request
RecognitionConfig config = RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.LINEAR16)
.setSampleRate(16000)
.setSampleRateHertz(16000)
.setLanguageCode("en-US")
.build();
RecognitionAudio audio = RecognitionAudio.newBuilder()
.setContent(audioBytes)
.build();

// Performs speech recognition on the audio file
SyncRecognizeResponse response = speech.syncRecognize(config, audio);
RecognizeResponse response = speech.recognize(config, audio);
List<SpeechRecognitionResult> results = response.getResultsList();

for (SpeechRecognitionResult result: results) {
Expand Down
138 changes: 119 additions & 19 deletions speech/cloud-client/src/main/java/com/example/speech/Recognize.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,22 @@

package com.example.speech;

import com.google.api.gax.core.ApiStreamObserver;
import com.google.api.gax.grpc.OperationFuture;
import com.google.cloud.speech.spi.v1beta1.SpeechClient;
import com.google.cloud.speech.v1beta1.AsyncRecognizeResponse;
import com.google.cloud.speech.v1beta1.RecognitionAudio;
import com.google.cloud.speech.v1beta1.RecognitionConfig;
import com.google.cloud.speech.v1beta1.RecognitionConfig.AudioEncoding;
import com.google.cloud.speech.v1beta1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1beta1.SpeechRecognitionResult;
import com.google.cloud.speech.v1beta1.SyncRecognizeResponse;
import com.google.api.gax.grpc.StreamingCallable;
import com.google.cloud.speech.spi.v1.SpeechClient;
import com.google.cloud.speech.v1.LongRunningRecognizeResponse;
import com.google.cloud.speech.v1.RecognitionAudio;
import com.google.cloud.speech.v1.RecognitionConfig;
import com.google.cloud.speech.v1.RecognitionConfig.AudioEncoding;
import com.google.cloud.speech.v1.RecognizeResponse;
import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1.SpeechRecognitionResult;
import com.google.cloud.speech.v1.StreamingRecognitionConfig;
import com.google.cloud.speech.v1.StreamingRecognitionResult;
import com.google.cloud.speech.v1.StreamingRecognizeRequest;
import com.google.cloud.speech.v1.StreamingRecognizeResponse;
import com.google.common.util.concurrent.SettableFuture;
import com.google.protobuf.ByteString;

import java.io.IOException;
Expand All @@ -40,7 +47,7 @@ public static void main(String... args) throws Exception {
System.out.printf(
"\tjava %s \"<command>\" \"<path-to-image>\"\n"
+ "Commands:\n"
+ "\tsyncrecognize | asyncrecognize\n"
+ "\tsyncrecognize | asyncrecognize | streamrecognize\n"
+ "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
+ "for a Cloud Storage resource (gs://...)\n",
Recognize.class.getCanonicalName());
Expand All @@ -62,7 +69,11 @@ public static void main(String... args) throws Exception {
} else {
asyncRecognizeFile(path);
}
} else if (command.equals("streamrecognize")) {
streamingRecognizeFile(path);
//streamingRecognizeEasy(path);
}

}

/**
Expand All @@ -80,14 +91,15 @@ public static void syncRecognizeFile(String fileName) throws Exception, IOExcept
// Configure request with local raw PCM audio
RecognitionConfig config = RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.LINEAR16)
.setSampleRate(16000)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build();
RecognitionAudio audio = RecognitionAudio.newBuilder()
.setContent(audioBytes)
.build();

// Use blocking call to get audio transcript
SyncRecognizeResponse response = speech.syncRecognize(config, audio);
RecognizeResponse response = speech.recognize(config, audio);
List<SpeechRecognitionResult> results = response.getResultsList();

for (SpeechRecognitionResult result: results) {
Expand All @@ -111,14 +123,15 @@ public static void syncRecognizeGcs(String gcsUri) throws Exception, IOException
// Builds the request for remote FLAC file
RecognitionConfig config = RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.FLAC)
.setSampleRate(16000)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build();
RecognitionAudio audio = RecognitionAudio.newBuilder()
.setUri(gcsUri)
.build();

// Use blocking call for getting audio transcript
SyncRecognizeResponse response = speech.syncRecognize(config, audio);
RecognizeResponse response = speech.recognize(config, audio);
List<SpeechRecognitionResult> results = response.getResultsList();

for (SpeechRecognitionResult result: results) {
Expand All @@ -130,6 +143,7 @@ public static void syncRecognizeGcs(String gcsUri) throws Exception, IOException
speech.close();
}

/*
/**
* Performs non-blocking speech recognition on raw PCM audio and prints
* the transcription.
Expand All @@ -147,14 +161,16 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep
// Configure request with local raw PCM audio
RecognitionConfig config = RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.LINEAR16)
.setSampleRate(16000)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build();
RecognitionAudio audio = RecognitionAudio.newBuilder()
.setContent(audioBytes)
.build();

// Use non-blocking call for getting file transcription
OperationFuture<AsyncRecognizeResponse> response = speech.asyncRecognizeAsync(config, audio);
OperationFuture<LongRunningRecognizeResponse> response =
speech.longRunningRecognizeAsync(config, audio);
while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(200);
Expand All @@ -175,23 +191,25 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep
* Performs non-blocking speech recognition on remote FLAC file and prints
* the transcription.
*
* @param gcsUri the path to the remote FLAC audio file to transcribe.
* @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
*/
public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOException {
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
SpeechClient speech = SpeechClient.create();

// Configure remote file request for FLAC file
// Configure remote file request for Linear16
RecognitionConfig config = RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.FLAC)
.setSampleRate(16000)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build();
RecognitionAudio audio = RecognitionAudio.newBuilder()
.setUri(gcsUri)
.build();

// Use non-blocking call for getting file transcription
OperationFuture<AsyncRecognizeResponse> response = speech.asyncRecognizeAsync(config, audio);
OperationFuture<LongRunningRecognizeResponse> response =
speech.longRunningRecognizeAsync(config, audio);
while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(200);
Expand All @@ -207,4 +225,86 @@ public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOExceptio
}
speech.close();
}

/**
* Performs streaming speech recognition on raw PCM audio data.
*
* @param fileName the path to a PCM audio file to transcribe.
*/
public static void streamingRecognizeFile(String fileName) throws Exception, IOException {
Path path = Paths.get(fileName);
byte[] data = Files.readAllBytes(path);

// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
SpeechClient speech = SpeechClient.create();

// Configure request with local raw PCM audio
RecognitionConfig recConfig = RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.LINEAR16)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build();
StreamingRecognitionConfig config = StreamingRecognitionConfig.newBuilder()
.setConfig(recConfig)
.build();

class ResponseApiStreamingObserver<T> implements ApiStreamObserver<T> {
private final SettableFuture<List<T>> future = SettableFuture.create();
private final List<T> messages = new java.util.ArrayList<T>();

@Override
public void onNext(T message) {
messages.add(message);
}

@Override
public void onError(Throwable t) {
future.setException(t);
}

@Override
public void onCompleted() {
future.set(messages);
}

// Returns the SettableFuture object to get received messages / exceptions.
public SettableFuture<List<T>> future() {
return future;
}
}

ResponseApiStreamingObserver<StreamingRecognizeResponse> responseObserver =
new ResponseApiStreamingObserver<StreamingRecognizeResponse>();

StreamingCallable<StreamingRecognizeRequest,StreamingRecognizeResponse> callable =
speech.streamingRecognizeCallable();

ApiStreamObserver<StreamingRecognizeRequest> requestObserver =
callable.bidiStreamingCall(responseObserver);

// The first request must **only** contain the audio configuration:
requestObserver.onNext(StreamingRecognizeRequest.newBuilder()
.setStreamingConfig(config)
.build());

// Subsequent requests must **only** contain the audio data.
requestObserver.onNext(StreamingRecognizeRequest.newBuilder()
.setAudioContent(ByteString.copyFrom(data))
.build());

// Mark transmission as completed after sending the data.
requestObserver.onCompleted();

List<StreamingRecognizeResponse> responses = responseObserver.future().get();

for (StreamingRecognizeResponse response: responses) {
for (StreamingRecognitionResult result: response.getResultsList()) {
for (SpeechRecognitionAlternative alternative : result.getAlternativesList()) {
System.out.println(alternative.getTranscript());
}
}
}
speech.close();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -61,25 +61,32 @@ public void testRecognizeFile() throws Exception {
String got = bout.toString();
assertThat(got).contains("how old is the Brooklyn Bridge");
}

@Test
public void testRecognizeGcs() throws Exception {
Recognize.syncRecognizeGcs(gcsPath);
String got = bout.toString();
assertThat(got).contains("how old is the Brooklyn Bridge");
}

@Test
public void testAsyncRecognizeFile() throws Exception {
Recognize.asyncRecognizeFile(fileName);
String got = bout.toString();
assertThat(got).contains("how old is the Brooklyn Bridge");
}

@Test
public void testAsyncRecognizeGcs() throws Exception {
Recognize.asyncRecognizeGcs(gcsPath);
String got = bout.toString();
assertThat(got).contains("how old is the Brooklyn Bridge");
}

@Test
public void testStreamRecognize() throws Exception {
Recognize.streamingRecognizeFile(fileName);
String got = bout.toString();
assertThat(got).contains("how old is the Brooklyn Bridge");
}
}