Skip to content

Commit

Permalink
feat(samples): bump auto-punctuation sample to v1 and move diarizatio…
Browse files Browse the repository at this point in the history
…n to v1 (#463)

* bumped diarization and autocapitalization samples to v1

* Remove extra commited file

* fixed linting error
  • Loading branch information
bradmiro authored and Ace Nassri committed Nov 17, 2022
1 parent 48d06d2 commit d1721c3
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 2 deletions.
72 changes: 70 additions & 2 deletions speech/recognize.js
Original file line number Diff line number Diff line change
Expand Up @@ -544,12 +544,12 @@ async function syncRecognizeWithAutoPunctuation(
languageCode
) {
// [START speech_transcribe_auto_punctuation]
// Imports the Google Cloud client library for Beta API
// Imports the Google Cloud client library for API
/**
* TODO(developer): Update client library import to use new
* version of API when desired features become available
*/
const speech = require('@google-cloud/speech').v1p1beta1;
const speech = require('@google-cloud/speech');
const fs = require('fs');

// Creates a client
Expand Down Expand Up @@ -712,6 +712,62 @@ async function syncRecognizeWithMultiChannelGCS(gcsUri) {
// [END speech_transcribe_multichannel_gcs]
}

async function speechTranscribeDiarization(fileName) {
// [START speech_transcribe_diarization]
const fs = require('fs');

// Imports the Google Cloud client library
const speech = require('@google-cloud/speech');

// Creates a client
const client = new speech.SpeechClient();

// Set config for Diarization
const diarizationConfig = {
enableSpeakerDiarization: true,
maxSpeakerCount: 2,
};

const config = {
encoding: `LINEAR16`,
sampleRateHertz: 8000,
languageCode: `en-US`,
diarizationConfig: diarizationConfig,
model: `phone_call`,
};

/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
// const fileName = 'Local path to audio file, e.g. /path/to/audio.raw';

const audio = {
content: fs.readFileSync(fileName).toString('base64'),
};

const request = {
config: config,
audio: audio,
};

const [response] = await client.recognize(request);
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: ${transcription}`);
console.log(`Speaker Diarization:`);
const result = response.results[response.results.length - 1];
const wordsInfo = result.alternatives[0].words;
// Note: The transcript within each result is separate and sequential per result.
// However, the words list within an alternative includes all the words
// from all the results thus far. Thus, to get all the words with speaker
// tags, you only have to take the words list from the last result:
wordsInfo.forEach(a =>
console.log(` word: ${a.word}, speakerTag: ${a.speakerTag}`)
);
// [END speech_transcribe_diarization]
}

require(`yargs`) // eslint-disable-line
.demand(1)
.command(
Expand Down Expand Up @@ -883,6 +939,12 @@ require(`yargs`) // eslint-disable-line
opts.languageCode
)
)
.command(
`Diarization`,
`Isolate distinct speakers in an audio file`,
{},
opts => speechTranscribeDiarization(opts.speechFile)
)
.options({
encoding: {
alias: 'e',
Expand All @@ -905,6 +967,12 @@ require(`yargs`) // eslint-disable-line
requiresArg: true,
type: 'string',
},
speechFile: {
alias: 'f',
global: true,
requiresArg: false,
type: 'string',
},
})
.example(`node $0 sync ./resources/audio.raw -e LINEAR16 -r 16000`)
.example(`node $0 async-gcs gs://gcs-test-data/vr.flac -e FLAC -r 16000`)
Expand Down
5 changes: 5 additions & 0 deletions speech/system-test/recognize.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -132,4 +132,9 @@ describe('Recognize', () => {
);
assert.match(output, /Channel Tag: 2/);
});

it('should run speech diarization on a local file', async () => {
const output = execSync(`${cmd} Diarization -f ${filepath2}`);
assert.match(output, /speakerTag:/);
});
});

0 comments on commit d1721c3

Please sign in to comment.