<dependency>
<groupId>com.ibm.watson.developer_cloud</groupId>
<artifactId>speech-to-text</artifactId>
<version>3.5.1</version>
</dependency>
'com.ibm.watson.developer_cloud:speech-to-text:3.5.1'
Use the Speech to Text service to recognize the text from a .wav file.
SpeechToText service = new SpeechToText();
service.setUsernameAndPassword("<username>", "<password>");
File audio = new File("src/test/resources/sample1.wav");
SpeechResults transcript = service.recognize(audio, HttpMediaType.AUDIO_WAV).execute();
System.out.println(transcript);
Speech to Text supports WebSocket, the url is:
wss://stream.watsonplatform.net/speech-to-text/api/v1/recognize
SpeechToText service = new SpeechToText();
service.setUsernameAndPassword("<username>", "<password>");
File audio = new File("src/test/resources/sample1.wav");
RecognizeOptions options = new RecognizeOptions.Builder()
.continuous(true)
.interimResults(true)
.contentType(HttpMediaType.AUDIO_WAV)
.build();
service.recognizeUsingWebSocket(audio, options, new BaseRecognizeCallback() {
@Override
public void onTranscription(SpeechResults speechResults) {
System.out.println(speechResults);
}
);
// wait 20 seconds for the asynchronous response
Thread.sleep(20000);
Use your microphone to recognize audio for 30 seconds.
SpeechToText service = new SpeechToText();
service.setUsernameAndPassword("<username>", "<password>");
// Signed PCM AudioFormat with 16kHz, 16 bit sample size, mono
int sampleRate = 16000;
AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false);
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
if (!AudioSystem.isLineSupported(info)) {
System.out.println("Line not supported");
System.exit(0);
}
TargetDataLine line = (TargetDataLine) AudioSystem.getLine(info);
line.open(format);
line.start();
AudioInputStream audio = new AudioInputStream(line);
RecognizeOptions options = new RecognizeOptions.Builder()
.continuous(true)
.interimResults(true)
//.inactivityTimeout(5) // use this to stop listening when the speaker pauses, i.e. for 5s
.contentType(HttpMediaType.AUDIO_RAW + "; rate=" + sampleRate)
.build();
service.recognizeUsingWebSocket(audio, options, new BaseRecognizeCallback() {
@Override
public void onTranscription(SpeechResults speechResults) {
System.out.println(speechResults);
}
});
System.out.println("Listening to your voice for the next 30s...");
Thread.sleep(30 * 1000);
// closing the WebSockets underlying InputStream will close the WebSocket itself.
line.stop();
line.close();
System.out.println("Fin.");