From 41ee3ad38bfc77260b29ed3525dcca5d3187a478 Mon Sep 17 00:00:00 2001 From: Eric Schmidt Date: Tue, 16 Jun 2020 15:06:16 -0700 Subject: [PATCH] feat(samples): demonstrate real-time conversation translation (#48) This PR improves the Media Translation microphone sample so that it is more aware of "state." When the API returns the SpeechEventType.END_OF_SINGLE_UTTERANCE event, the stream is closed and the user can initiate another call to the API. The following region tags are affected: media_translation_translate_from_mic --- media-translation/translate_from_mic.js | 67 ++++++++++++++++--------- 1 file changed, 44 insertions(+), 23 deletions(-) diff --git a/media-translation/translate_from_mic.js b/media-translation/translate_from_mic.js index b9557c3b2e..73339c9290 100644 --- a/media-translation/translate_from_mic.js +++ b/media-translation/translate_from_mic.js @@ -39,6 +39,24 @@ function main(encoding, sampleRateHertz, sourceLanguage, targetLanguage) { // [START media_translation_translate_from_mic] + // Allow user input from terminal + const readline = require('readline'); + + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }); + + function doTranslationLoop() { + rl.question("Press any key to translate or 'q' to quit: ", answer => { + if (answer.toLowerCase() === 'q') { + rl.close(); + } else { + translateFromMicrophone(); + } + }); + } + // Node-Record-lpcm16 const recorder = require('node-record-lpcm16'); @@ -58,6 +76,7 @@ function main(encoding, sampleRateHertz, sourceLanguage, targetLanguage) { //const sampleRateHertz = 16000; //const sourceLanguage = 'Language to translate from, as BCP-47 locale'; //const targetLanguage = 'Language to translate to, as BCP-47 locale'; + console.log('Begin speaking ...'); const config = { audioConfig: { @@ -65,6 +84,7 @@ function main(encoding, sampleRateHertz, sourceLanguage, targetLanguage) { sourceLanguageCode: sourceLanguage, targetLanguageCode: targetLanguage, }, + singleUtterance: true, }; // First request needs to have only a streaming config, no data. @@ -73,6 +93,8 @@ function main(encoding, sampleRateHertz, sourceLanguage, targetLanguage) { audioContent: null, }; + let currentTranslation = ''; + let currentRecognition = ''; // Create a recognize stream const stream = client .streamingTranslateSpeech() @@ -84,31 +106,30 @@ function main(encoding, sampleRateHertz, sourceLanguage, targetLanguage) { } }) .on('data', response => { - const {result} = response; - if (result.textTranslationResult.isFinal) { - console.log( - `\nFinal translation: ${result.textTranslationResult.translation}` - ); - console.log(`Final recognition result: ${result.recognitionResult}`); + const {result, speechEventType} = response; + if (speechEventType === 'END_OF_SINGLE_UTTERANCE') { + console.log(`\nFinal translation: ${currentTranslation}`); + console.log(`Final recognition result: ${currentRecognition}`); + + stream.destroy(); + recording.stop(); } else { - console.log( - `\nPartial translation: ${result.textTranslationResult.translation}` - ); - console.log( - `Partial recognition result: ${result.recognitionResult}` - ); + currentTranslation = result.textTranslationResult.translation; + currentRecognition = result.recognitionResult; + console.log(`\nPartial translation: ${currentTranslation}`); + console.log(`Partial recognition result: ${currentRecognition}`); } }); let isFirst = true; // Start recording and send microphone input to the Media Translation API - recorder - .record({ - sampleRateHertz: sampleRateHertz, - threshold: 0, //silence threshold - recordProgram: 'rec', - silence: '5.0', //seconds of silence before ending - }) + const recording = recorder.record({ + sampleRateHertz: sampleRateHertz, + threshold: 0, //silence threshold + recordProgram: 'rec', + silence: '5.0', //seconds of silence before ending + }); + recording .stream() .on('data', chunk => { if (isFirst) { @@ -120,13 +141,13 @@ function main(encoding, sampleRateHertz, sourceLanguage, targetLanguage) { audioContent: chunk.toString('base64'), }; stream.write(request); + }) + .on('close', () => { + doTranslationLoop(); }); - - console.log('Listening, press Ctrl+C to stop.'); } - translateFromMicrophone(); - + doTranslationLoop(); // [END media_translation_translate_from_mic] } main(...process.argv.slice(2));