Skip to content

Commit fba6d67

Browse files
nirupa-kumarAce Nassri
authored and
Ace Nassri
committed
Multi-channel GA sample (#295)
1 parent fefd349 commit fba6d67

File tree

2 files changed

+120
-0
lines changed

2 files changed

+120
-0
lines changed

speech/recognize.js

+105
Original file line numberDiff line numberDiff line change
@@ -635,6 +635,86 @@ async function syncRecognizeWithEnhancedModel(
635635
// [END speech_transcribe_enhanced_model]
636636
}
637637

638+
async function syncRecognizeWithMultiChannel(fileName) {
639+
// [START speech_transcribe_multichannel]
640+
const fs = require('fs');
641+
642+
// Imports the Google Cloud client library
643+
const speech = require('@google-cloud/speech').v1;
644+
645+
// Creates a client
646+
const client = new speech.SpeechClient();
647+
648+
/**
649+
* TODO(developer): Uncomment the following lines before running the sample.
650+
*/
651+
// const fileName = 'Local path to audio file, e.g. /path/to/audio.raw';
652+
653+
const config = {
654+
encoding: `LINEAR16`,
655+
languageCode: `en-US`,
656+
audioChannelCount: 2,
657+
enableSeparateRecognitionPerChannel: true,
658+
};
659+
660+
const audio = {
661+
content: fs.readFileSync(fileName).toString('base64'),
662+
};
663+
664+
const request = {
665+
config: config,
666+
audio: audio,
667+
};
668+
669+
const [response] = await client.recognize(request);
670+
const transcription = response.results
671+
.map(
672+
result =>
673+
` Channel Tag: ${result.channelTag} ${
674+
result.alternatives[0].transcript
675+
}`
676+
)
677+
.join('\n');
678+
console.log(`Transcription: \n${transcription}`);
679+
// [END speech_transcribe_multichannel]
680+
}
681+
682+
async function syncRecognizeWithMultiChannelGCS(gcsUri) {
683+
// [START speech_transcribe_multichannel_gcs]
684+
const speech = require('@google-cloud/speech').v1;
685+
686+
// Creates a client
687+
const client = new speech.SpeechClient();
688+
689+
const config = {
690+
encoding: 'LINEAR16',
691+
languageCode: `en-US`,
692+
audioChannelCount: 2,
693+
enableSeparateRecognitionPerChannel: true,
694+
};
695+
696+
const audio = {
697+
uri: gcsUri,
698+
};
699+
700+
const request = {
701+
config: config,
702+
audio: audio,
703+
};
704+
705+
const [response] = await client.recognize(request);
706+
const transcription = response.results
707+
.map(
708+
result =>
709+
` Channel Tag: ${result.channelTag} ${
710+
result.alternatives[0].transcript
711+
}`
712+
)
713+
.join('\n');
714+
console.log(`Transcription: \n${transcription}`);
715+
// [END speech_transcribe_multichannel_gcs]
716+
}
717+
638718
require(`yargs`) // eslint-disable-line
639719
.demand(1)
640720
.command(
@@ -782,6 +862,30 @@ require(`yargs`) // eslint-disable-line
782862
opts.languageCode
783863
)
784864
)
865+
.command(
866+
`sync-multi-channel <filename>`,
867+
`Differentiates input by audio channel in local audio file.`,
868+
{},
869+
opts =>
870+
syncRecognizeWithMultiChannel(
871+
opts.filename,
872+
opts.encoding,
873+
opts.sampleRateHertz,
874+
opts.languageCode
875+
)
876+
)
877+
.command(
878+
`sync-multi-channel-gcs <gcsUri>`,
879+
`Differentiates input by audio channel in an audio file located in a Google Cloud Storage bucket.`,
880+
{},
881+
opts =>
882+
syncRecognizeWithMultiChannelGCS(
883+
opts.gcsUri,
884+
opts.encoding,
885+
opts.sampleRateHertz,
886+
opts.languageCode
887+
)
888+
)
785889
.options({
786890
encoding: {
787891
alias: 'e',
@@ -817,6 +921,7 @@ require(`yargs`) // eslint-disable-line
817921
)
818922
.example(`node $0 sync-auto-punctuation ./resources/commercial_mono.wav`)
819923
.example(`node $0 sync-enhanced-model ./resources/commercial_mono.wav`)
924+
.example(`node $0 sync-multi-channel ./resources/commercial_stereo.wav`)
820925
.wrap(120)
821926
.recommendCommands()
822927
.epilogue(`For more information, see https://cloud.google.com/speech/docs`)

speech/system-test/recognize.test.js

+15
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,11 @@ const resourcePath = path.join(__dirname, '..', 'resources');
2929
const filename = `audio.raw`;
3030
const filename1 = `Google_Gnome.wav`;
3131
const filename2 = `commercial_mono.wav`;
32+
const filename3 = `commercial_stereo.wav`;
3233
const filepath = path.join(resourcePath, filename);
3334
const filepath1 = path.join(resourcePath, filename1);
3435
const filepath2 = path.join(resourcePath, filename2);
36+
const filepath3 = path.join(resourcePath, filename3);
3537
const text = 'how old is the Brooklyn Bridge';
3638
const text1 = 'the weather outside is sunny';
3739
const text2 = `Terrific. It's on the way.`;
@@ -43,6 +45,7 @@ describe('Recognize', () => {
4345
const [bucket] = await storage.createBucket(bucketName);
4446
await bucket.upload(filepath);
4547
await bucket.upload(filepath1);
48+
await bucket.upload(filepath3);
4649
});
4750

4851
after(async () => {
@@ -119,4 +122,16 @@ describe('Recognize', () => {
119122
const output = await exec(`${cmd} sync-enhanced-model ${filepath2}`);
120123
assert.match(output, new RegExp(text3));
121124
});
125+
126+
it('should run multi channel transcription on a local file', async () => {
127+
const output = await exec(`${cmd} sync-multi-channel ${filepath3}`);
128+
assert.match(output, /Channel Tag: 2/);
129+
});
130+
131+
it('should run multi channel transcription on GCS file', async () => {
132+
const output = await exec(
133+
`${cmd} sync-multi-channel-gcs gs://${bucketName}/${filename3}`
134+
);
135+
assert.match(output, /Channel Tag: 2/);
136+
});
122137
});

0 commit comments

Comments
 (0)