Skip to content

Commit c330971

Browse files
authored
Speech upgrade (#427)
* Upgrades speech client to semi-gapic * Adds library with streaming fixes
1 parent d401ab6 commit c330971

File tree

5 files changed

+91
-78
lines changed

5 files changed

+91
-78
lines changed

speech/package.json

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@
1818
"test": "npm run system-test"
1919
},
2020
"dependencies": {
21-
"@google-cloud/speech": "0.9.3",
22-
"@google-cloud/storage": "1.1.1",
21+
"@google-cloud/speech": "^0.10.1",
22+
"@google-cloud/storage": "^1.2.0",
2323
"node-record-lpcm16": "0.3.0",
24-
"yargs": "8.0.2"
24+
"yargs": "^8.0.2"
2525
},
2626
"devDependencies": {
27-
"@google-cloud/nodejs-repo-tools": "1.4.15",
28-
"ava": "0.19.1",
27+
"@google-cloud/nodejs-repo-tools": "^1.4.15",
28+
"ava": "^0.19.1",
2929
"proxyquire": "1.8.0",
3030
"sinon": "2.3.4"
3131
},

speech/quickstart.js

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@
1818
// [START speech_quickstart]
1919
// Imports the Google Cloud client library
2020
const Speech = require('@google-cloud/speech');
21+
const fs = require('fs');
2122

2223
// Your Google Cloud Platform project ID
23-
const projectId = 'YOUR_PROJECT_ID';
24+
const projectId = 'your-project-id';
2425

2526
// Instantiates a client
2627
const speechClient = Speech({
@@ -30,17 +31,28 @@ const speechClient = Speech({
3031
// The name of the audio file to transcribe
3132
const fileName = './resources/audio.raw';
3233

34+
// Reads a local audio file and converts it to base64
35+
const file = fs.readFileSync(fileName);
36+
const audioBytes = file.toString('base64');
37+
3338
// The audio file's encoding, sample rate in hertz, and BCP-47 language code
34-
const options = {
39+
const audio = {
40+
content: audioBytes
41+
};
42+
const config = {
3543
encoding: 'LINEAR16',
3644
sampleRateHertz: 16000,
3745
languageCode: 'en-US'
3846
};
47+
const request = {
48+
audio: audio,
49+
config: config
50+
};
3951

4052
// Detects speech in the audio file
41-
speechClient.recognize(fileName, options)
53+
speechClient.recognize(request)
4254
.then((results) => {
43-
const transcription = results[0];
55+
const transcription = results[0].results[0].alternatives[0].transcript;
4456
console.log(`Transcription: ${transcription}`);
4557
})
4658
.catch((err) => {

speech/recognize.js

Lines changed: 58 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
function syncRecognize (filename, encoding, sampleRateHertz, languageCode) {
2727
// [START speech_sync_recognize]
2828
// Imports the Google Cloud client library
29+
const fs = require('fs');
2930
const Speech = require('@google-cloud/speech');
3031

3132
// Instantiates a client
@@ -43,18 +44,25 @@ function syncRecognize (filename, encoding, sampleRateHertz, languageCode) {
4344
// The BCP-47 language code to use, e.g. 'en-US'
4445
// const languageCode = 'en-US';
4546

46-
const request = {
47+
const config = {
4748
encoding: encoding,
4849
sampleRateHertz: sampleRateHertz,
4950
languageCode: languageCode
5051
};
52+
const audio = {
53+
content: fs.readFileSync(filename).toString('base64')
54+
};
55+
56+
const request = {
57+
config: config,
58+
audio: audio
59+
};
5160

5261
// Detects speech in the audio file
53-
speech.recognize(filename, request)
62+
speech.recognize(request)
5463
.then((results) => {
55-
const transcription = results[0];
56-
57-
console.log(`Transcription: ${transcription}`);
64+
const transcription = results[0].results[0].alternatives[0].transcript;
65+
console.log(`Transcription: `, transcription);
5866
})
5967
.catch((err) => {
6068
console.error('ERROR:', err);
@@ -82,18 +90,25 @@ function syncRecognizeGCS (gcsUri, encoding, sampleRateHertz, languageCode) {
8290
// The BCP-47 language code to use, e.g. 'en-US'
8391
// const languageCode = 'en-US';
8492

85-
const request = {
93+
const config = {
8694
encoding: encoding,
8795
sampleRateHertz: sampleRateHertz,
8896
languageCode: languageCode
8997
};
98+
const audio = {
99+
uri: gcsUri
100+
};
101+
102+
const request = {
103+
config: config,
104+
audio: audio
105+
};
90106

91107
// Detects speech in the audio file
92-
speech.recognize(gcsUri, request)
108+
speech.recognize(request)
93109
.then((results) => {
94-
const transcription = results[0];
95-
96-
console.log(`Transcription: ${transcription}`);
110+
const transcription = results[0].results[0].alternatives[0].transcript;
111+
console.log(`Transcription: `, transcription);
97112
})
98113
.catch((err) => {
99114
console.error('ERROR:', err);
@@ -105,6 +120,7 @@ function asyncRecognize (filename, encoding, sampleRateHertz, languageCode) {
105120
// [START speech_async_recognize]
106121
// Imports the Google Cloud client library
107122
const Speech = require('@google-cloud/speech');
123+
const fs = require('fs');
108124

109125
// Instantiates a client
110126
const speech = Speech();
@@ -121,22 +137,30 @@ function asyncRecognize (filename, encoding, sampleRateHertz, languageCode) {
121137
// The BCP-47 language code to use, e.g. 'en-US'
122138
// const languageCode = 'en-US';
123139

124-
const request = {
140+
const config = {
125141
encoding: encoding,
126142
sampleRateHertz: sampleRateHertz,
127143
languageCode: languageCode
128144
};
145+
const audio = {
146+
content: fs.readFileSync(filename).toString('base64')
147+
};
148+
149+
const request = {
150+
config: config,
151+
audio: audio
152+
};
129153

130154
// Detects speech in the audio file. This creates a recognition job that you
131155
// can wait for now, or get its result later.
132-
speech.startRecognition(filename, request)
156+
speech.longRunningRecognize(request)
133157
.then((results) => {
134158
const operation = results[0];
135159
// Get a Promise representation of the final result of the job
136160
return operation.promise();
137161
})
138162
.then((results) => {
139-
const transcription = results[0];
163+
const transcription = results[0].results[0].alternatives[0].transcript;
140164
console.log(`Transcription: ${transcription}`);
141165
})
142166
.catch((err) => {
@@ -165,22 +189,31 @@ function asyncRecognizeGCS (gcsUri, encoding, sampleRateHertz, languageCode) {
165189
// The BCP-47 language code to use, e.g. 'en-US'
166190
// const languageCode = 'en-US';
167191

168-
const request = {
192+
const config = {
169193
encoding: encoding,
170194
sampleRateHertz: sampleRateHertz,
171195
languageCode: languageCode
172196
};
173197

198+
const audio = {
199+
uri: gcsUri
200+
};
201+
202+
const request = {
203+
config: config,
204+
audio: audio
205+
};
206+
174207
// Detects speech in the audio file. This creates a recognition job that you
175208
// can wait for now, or get its result later.
176-
speech.startRecognition(gcsUri, request)
209+
speech.longRunningRecognize(request)
177210
.then((results) => {
178211
const operation = results[0];
179212
// Get a Promise representation of the final result of the job
180213
return operation.promise();
181214
})
182215
.then((results) => {
183-
const transcription = results[0];
216+
const transcription = results[0].results[0].alternatives[0].transcript;
184217
console.log(`Transcription: ${transcription}`);
185218
})
186219
.catch((err) => {
@@ -221,10 +254,11 @@ function streamingRecognize (filename, encoding, sampleRateHertz, languageCode)
221254
};
222255

223256
// Stream the audio to the Google Cloud Speech API
224-
const recognizeStream = speech.createRecognizeStream(request)
257+
const recognizeStream = speech.streamingRecognize(request)
225258
.on('error', console.error)
226259
.on('data', (data) => {
227-
console.log(`Transcription: ${data.results}`);
260+
console.log(
261+
`Transcription: ${data.results[0].alternatives[0].transcript}`);
228262
});
229263

230264
// Stream an audio file from disk to the Speech API, e.g. "./resources/audio.raw"
@@ -261,9 +295,13 @@ function streamingMicRecognize (encoding, sampleRateHertz, languageCode) {
261295
};
262296

263297
// Create a recognize stream
264-
const recognizeStream = speech.createRecognizeStream(request)
298+
const recognizeStream = speech.streamingRecognize(request)
265299
.on('error', console.error)
266-
.on('data', (data) => process.stdout.write(data.results));
300+
.on('data', (data) =>
301+
process.stdout.write(
302+
(data.results[0] && data.results[0].alternatives[0])
303+
? `Transcription: ${data.results[0].alternatives[0].transcript}\n`
304+
: `\n\nReached transcription time limit, press Ctrl+C\n`));
267305

268306
// Start recording and send the microphone input to the Speech API
269307
record

speech/system-test/quickstart.test.js

Lines changed: 10 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -16,57 +16,20 @@
1616
'use strict';
1717

1818
const path = require(`path`);
19-
const proxyquire = require(`proxyquire`).noPreserveCache();
20-
const sinon = require(`sinon`);
21-
const speech = proxyquire(`@google-cloud/speech`, {})();
2219
const test = require(`ava`);
2320

21+
const cmd = `node quickstart.js`;
22+
const cwd = path.join(__dirname, `..`);
23+
const text = `how old is the Brooklyn Bridge`;
24+
2425
const {
25-
checkCredentials,
26-
stubConsole,
27-
restoreConsole
26+
runAsync
2827
} = require(`@google-cloud/nodejs-repo-tools`);
2928

30-
const fileName = path.join(__dirname, `../resources/audio.raw`);
31-
const config = {
32-
encoding: `LINEAR16`,
33-
sampleRateHertz: 16000,
34-
languageCode: `en-US`
35-
};
36-
37-
test.before(checkCredentials);
38-
test.before(stubConsole);
39-
test.after.always(restoreConsole);
40-
41-
test.cb(`should detect speech`, (t) => {
42-
const expectedFileName = `./resources/audio.raw`;
43-
const expectedText = `how old is the Brooklyn Bridge`;
44-
45-
const speechMock = {
46-
recognize: (_fileName, _config) => {
47-
t.is(_fileName, expectedFileName);
48-
t.deepEqual(_config, config);
49-
50-
return speech.recognize(fileName, config)
51-
.then(([transcription]) => {
52-
t.is(transcription, expectedText);
53-
54-
setTimeout(() => {
55-
try {
56-
t.is(console.log.callCount, 1);
57-
t.deepEqual(console.log.getCall(0).args, [`Transcription: ${expectedText}`]);
58-
t.end();
59-
} catch (err) {
60-
t.end(err);
61-
}
62-
}, 200);
63-
64-
return [transcription];
65-
});
66-
}
67-
};
29+
test.before(async () => {
30+
});
6831

69-
proxyquire(`../quickstart`, {
70-
'@google-cloud/speech': sinon.stub().returns(speechMock)
71-
});
32+
test(`should run quickstart`, async (t) => {
33+
const output = await runAsync(`${cmd}`, cwd);
34+
t.true(output.includes(`Transcription: ${text}`));
7235
});

speech/system-test/recognize.test.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,12 @@ test.after.always(async () => {
4545

4646
test(`should run sync recognize`, async (t) => {
4747
const output = await runAsync(`${cmd} sync ${filepath}`, cwd);
48-
t.true(output.includes(`Transcription: ${text}`));
48+
t.true(output.includes(`Transcription: ${text}`));
4949
});
5050

5151
test(`should run sync recognize on a GCS file`, async (t) => {
5252
const output = await runAsync(`${cmd} sync-gcs gs://${bucketName}/${filename}`, cwd);
53-
t.true(output.includes(`Transcription: ${text}`));
53+
t.true(output.includes(`Transcription: ${text}`));
5454
});
5555

5656
test(`should run async recognize on a local file`, async (t) => {

0 commit comments

Comments
 (0)