Skip to content

Commit 322b15c

Browse files
speech: update to v1 (#2175)
1 parent b315302 commit 322b15c

File tree

9 files changed

+472
-289
lines changed

9 files changed

+472
-289
lines changed

packages/google-cloud-speech/README.md

+4-21
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ var speech = require('@google-cloud/speech')({
1919
// Detect the speech in an audio file.
2020
speech.recognize('./audio.raw', {
2121
encoding: 'LINEAR16',
22-
sampleRate: 16000
22+
sampleRateHertz: 16000
2323
}, function(err, transcript) {
2424
// transcript = 'how old is the Brooklyn Bridge'
2525
});
@@ -30,37 +30,20 @@ fs.createReadStream('./audio.raw')
3030
.pipe(speech.createRecognizeStream({
3131
config: {
3232
encoding: 'LINEAR16',
33-
sampleRate: 16000
33+
sampleRateHertz: 16000
3434
},
3535
singleUtterance: false,
3636
interimResults: false
3737
}))
3838
.on('error', console.error)
3939
.on('data', function(data) {
40-
// The first "data" event emitted might look like:
41-
// data = {
42-
// endpointerType: Speech.endpointerTypes.START_OF_SPEECH,
43-
// ...
44-
// }
45-
//
46-
// A later "data" event emitted might look like:
47-
// data = {
48-
// endpointerType: Speech.endpointerTypes.END_OF_AUDIO,
49-
// ...
50-
// }
51-
//
52-
// A final "data" event emitted might look like:
53-
// data = {
54-
// endpointerType: Speech.endpointerTypes.END_OF_AUDIO,
55-
// results: "how old is the Brooklyn Bridge",
56-
// ...
57-
// }
40+
// data.results = "how old is the Brooklyn Bridge"
5841
});
5942

6043
// Promises are also supported by omitting callbacks.
6144
speech.recognize('./audio.raw', {
6245
encoding: 'LINEAR16',
63-
sampleRate: 16000
46+
sampleRateHertz: 16000
6447
}).then(function(data) {
6548
var transcript = data[0];
6649
});

packages/google-cloud-speech/package.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@
5858
"@google-cloud/common-grpc": "^0.3.0",
5959
"events-intercept": "^2.0.0",
6060
"extend": "^3.0.0",
61-
"google-gax": "^0.12.0",
62-
"google-proto-files": "^0.10.0",
61+
"google-gax": "^0.12.3",
62+
"google-proto-files": "^0.11.0",
6363
"is": "^3.1.0",
6464
"propprop": "^0.3.1",
6565
"pumpify": "^1.3.5",

packages/google-cloud-speech/src/index.js

+45-92
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ var request = require('request');
3535
var streamEvents = require('stream-events');
3636
var through = require('through2');
3737
var util = require('util');
38-
var v1beta1 = require('./v1beta1');
38+
var v1 = require('./v1');
3939

4040
/**
4141
* The [Cloud Speech API](https://cloud.google.com/speech/docs) enables easy
@@ -67,19 +67,14 @@ function Speech(options) {
6767
});
6868

6969
this.api = {
70-
Speech: v1beta1(options).speechClient(options)
70+
Speech: v1(options).speechClient(options)
7171
};
7272

7373
var config = {
7474
baseUrl: 'speech.googleapis.com',
7575
projectIdRequired: false,
7676
service: 'speech',
7777
protoServices: {
78-
Speech: {
79-
path: googleProtoFiles.speech.v1beta1,
80-
service: 'cloud.speech',
81-
apiVersion: 'v1beta1'
82-
},
8378
Operations: {
8479
path: googleProtoFiles('longrunning', 'operations.proto'),
8580
service: 'longrunning'
@@ -97,20 +92,13 @@ function Speech(options) {
9792
util.inherits(Speech, commonGrpc.Service);
9893

9994
/**
100-
* The endpointer types that the Speech API will return while processing a
95+
* The event types that the Speech API will return while processing a
10196
* {module:speech#createRecognizeStream} request. You can track the progress of
102-
* audio recognition by comparing the `data.endpointerType` property with these
97+
* audio recognition by comparing the `data.eventType` property with these
10398
* values.
10499
*
105-
* - `Speech.endpointerTypes.ENDPOINTER_EVENT_UNSPECIFIED`: No endpointer
106-
* event specified.
107-
* - `Speech.endpointerTypes.START_OF_SPEECH`: Speech has been detected in the
108-
* audio stream.
109-
* - `Speech.endpointerTypes.END_OF_SPEECH`: Speech has ceased to be detected
110-
* in the audio stream.
111-
* - `Speech.endpointerTypes.END_OF_AUDIO`: The end of the audio stream has
112-
* been reached and it is being processed.
113-
* - `Speech.endpointerTypes.END_OF_UTTERANCE`: This event is only sent when
100+
* - `Speech.eventTypes.ENDPOINTER_EVENT_UNSPECIFIED`: No event specified.
101+
* - `Speech.eventTypes.END_OF_SINGLE_UTTERANCE`: This event is only sent when
114102
* `config.singleUtterance` passed to {module:speech#createRecognizeStream}
115103
* is `true`. It indicates that the server has detected the end of the
116104
* user's speech utterance and expects no additional speech. Therefore, the
@@ -119,13 +107,10 @@ util.inherits(Speech, commonGrpc.Service);
119107
*
120108
* @type {object}
121109
*/
122-
Speech.endpointerTypes =
123-
Speech.prototype.endpointerTypes = {
124-
END_OF_AUDIO: 'END_OF_AUDIO',
125-
END_OF_SPEECH: 'END_OF_SPEECH',
126-
END_OF_UTTERANCE: 'END_OF_UTTERANCE',
127-
ENDPOINTER_EVENT_UNSPECIFIED: 'ENDPOINTER_EVENT_UNSPECIFIED',
128-
START_OF_SPEECH: 'START_OF_SPEECH'
110+
Speech.eventTypes =
111+
Speech.prototype.eventTypes = {
112+
END_OF_SINGLE_UTTERANCE: 'END_OF_SINGLE_UTTERANCE',
113+
ENDPOINTER_EVENT_UNSPECIFIED: 'ENDPOINTER_EVENT_UNSPECIFIED'
129114
};
130115

131116
/**
@@ -371,7 +356,7 @@ Speech.formatResults_ = function(resultSets, verboseMode) {
371356
* [`StreamingRecognizeResponse`](https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#streamingrecognizeresponse)
372357
* object, containing these properties:
373358
*
374-
* - **`endpointerType`** See {module:speech#endpointerTypes}.
359+
* - **`eventType`** See {module:speech#eventTypes}.
375360
* - **`results`** By default, a combined string of transcripts. When
376361
* `config.verbose` is enabled, this is an object including a `transcript`
377362
* property, a `confidence` score from `0` - `100`, and an `alternatives`
@@ -405,7 +390,7 @@ Speech.formatResults_ = function(resultSets, verboseMode) {
405390
* var request = {
406391
* config: {
407392
* encoding: 'LINEAR16',
408-
* sampleRate: 16000
393+
* sampleRateHertz: 16000
409394
* },
410395
* singleUtterance: false,
411396
* interimResults: false
@@ -416,27 +401,7 @@ Speech.formatResults_ = function(resultSets, verboseMode) {
416401
* .pipe(speech.createRecognizeStream(request))
417402
* .on('error', console.error)
418403
* .on('data', function(data) {
419-
* // The first "data" event emitted might look like:
420-
* // data = {
421-
* // endpointerType: Speech.endpointerTypes.START_OF_SPEECH,
422-
* // results: "",
423-
* // ...
424-
* // }
425-
*
426-
* // A later "data" event emitted might look like:
427-
* // data = {
428-
* // endpointerType: Speech.endpointerTypes.END_OF_AUDIO,
429-
* // results: "",
430-
* // ...
431-
* // }
432-
*
433-
* // A final "data" event emitted might look like:
434-
* // data = {
435-
* // endpointerType:
436-
* // Speech.endpointerTypes.ENDPOINTER_EVENT_UNSPECIFIED,
437-
* // results: "how old is the Brooklyn Bridge",
438-
* // ...
439-
* // }
404+
* // data.results = "how old is the Brooklyn Bridge"
440405
* });
441406
*
442407
* //-
@@ -445,7 +410,7 @@ Speech.formatResults_ = function(resultSets, verboseMode) {
445410
* var request = {
446411
* config: {
447412
* encoding: 'LINEAR16',
448-
* sampleRate: 16000
413+
* sampleRateHertz: 16000
449414
* },
450415
* singleUtterance: false,
451416
* interimResults: false,
@@ -457,32 +422,7 @@ Speech.formatResults_ = function(resultSets, verboseMode) {
457422
* .pipe(speech.createRecognizeStream(request))
458423
* .on('error', console.error)
459424
* .on('data', function(data) {
460-
* // The first "data" event emitted might look like:
461-
* // data = {
462-
* // endpointerType: Speech.endpointerTypes.START_OF_SPEECH,
463-
* // results: [],
464-
* // ...
465-
* // }
466-
*
467-
* // A later "data" event emitted might look like:
468-
* // data = {
469-
* // endpointerType: Speech.endpointerTypes.END_OF_AUDIO,
470-
* // results: [],
471-
* // ...
472-
* // }
473-
*
474-
* // A final "data" event emitted might look like:
475-
* // data = {
476-
* // endpointerType:
477-
* // Speech.endpointerTypes.ENDPOINTER_EVENT_UNSPECIFIED,
478-
* // results: [
479-
* // {
480-
* // transcript: "how old is the Brooklyn Bridge",
481-
* // confidence: 88.15
482-
* // }
483-
* // ],
484-
* // ...
485-
* // }
425+
* // data.results = "how old is the Brooklyn Bridge"
486426
* });
487427
*/
488428
Speech.prototype.createRecognizeStream = function(config) {
@@ -492,6 +432,12 @@ Speech.prototype.createRecognizeStream = function(config) {
492432
throw new Error('A recognize request requires a configuration object.');
493433
}
494434

435+
config = extend(true, {
436+
config: {
437+
languageCode: 'en-US'
438+
}
439+
}, config);
440+
495441
var verboseMode = config.verbose === true;
496442
delete config.verbose;
497443

@@ -507,6 +453,10 @@ Speech.prototype.createRecognizeStream = function(config) {
507453
recognizeStream.once('writing', function() {
508454
var requestStream = self.api.Speech.streamingRecognize(gaxOptions);
509455

456+
requestStream.on('error', function(err) {
457+
recognizeStream.destroy(err);
458+
});
459+
510460
requestStream.on('response', function(response) {
511461
recognizeStream.emit('response', response);
512462
});
@@ -564,8 +514,8 @@ Speech.prototype.operation = function(name) {
564514
* larger files, you will need to use {module:speech#startRecognition} or
565515
* {module:speech#createRecognizeStream}.
566516
*
567-
* @resource [SyncRecognize API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.Speech.SyncRecognize}
568-
* @resource [SyncRecognizeRequest API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.SyncRecognizeRequest}
517+
* @resource [Recognize API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.Speech.Recognize}
518+
* @resource [RecognizeRequest API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.RecognizeRequest}
569519
*
570520
* @param {string|object|module:storage/file} file - The source file to run the
571521
* detection on. It can be either a local file path, a remote file URL, a
@@ -585,12 +535,12 @@ Speech.prototype.operation = function(name) {
585535
* array consisting of other transcription possibilities. See the examples
586536
* below for more.
587537
* @param {object} callback.apiResponse - Raw API response. See
588-
* [`SyncRecognizeResponse`](https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#syncrecognizeresponse).
538+
* [`RecognizeResponse`](https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#recognizeresponse).
589539
*
590540
* @example
591541
* var config = {
592542
* encoding: 'LINEAR16',
593-
* sampleRate: 16000
543+
* sampleRateHertz: 16000
594544
* };
595545
*
596546
* function callback(err, transcript, apiResponse) {
@@ -632,7 +582,7 @@ Speech.prototype.operation = function(name) {
632582
* //-
633583
* var config = {
634584
* encoding: 'LINEAR16',
635-
* sampleRate: 16000,
585+
* sampleRateHertz: 16000,
636586
* verbose: true
637587
* };
638588
*
@@ -670,7 +620,9 @@ Speech.prototype.recognize = function(file, config, callback) {
670620
throw new Error('A recognize request requires a configuration object.');
671621
}
672622

673-
config = extend({}, config);
623+
config = extend({
624+
languageCode: 'en-US'
625+
}, config);
674626

675627
if (!config.encoding) {
676628
config.encoding = Speech.detectEncoding_(file);
@@ -685,7 +637,7 @@ Speech.prototype.recognize = function(file, config, callback) {
685637
return;
686638
}
687639

688-
self.api.Speech.syncRecognize({
640+
self.api.Speech.recognize({
689641
config: config,
690642
audio: foundFile
691643
}, function(err, resp) {
@@ -694,8 +646,7 @@ Speech.prototype.recognize = function(file, config, callback) {
694646
return;
695647
}
696648

697-
var response = new self.protos.Speech.SyncRecognizeResponse(resp);
698-
var results = Speech.formatResults_(response.results, verboseMode);
649+
var results = Speech.formatResults_(resp.results, verboseMode);
699650

700651
callback(null, results, resp);
701652
});
@@ -710,9 +661,9 @@ Speech.prototype.recognize = function(file, config, callback) {
710661
* events to see how the operation finishes. Follow along with the examples
711662
* below.
712663
*
713-
* @resource [AsyncRecognize API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.Speech.AsyncRecognize}
714-
* @resource [AsyncRecognizeRequest API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.AsyncRecognizeRequest}
715-
* @resource [AsyncRecognizeResponse API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.AsyncRecognizeResponse}
664+
* @resource [LongRunningRecognize API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1.Speech.LongRunningRecognize}
665+
* @resource [LongRunningRecognize API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1.LongRunningRecognizeRequest}
666+
* @resource [LongRunningRecognize API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1.LongRunningRecognizeResponse}
716667
*
717668
* @param {string|object|module:storage/file} file - The source file to run the
718669
* detection on. It can be either a local file path, a remote file URL, a
@@ -732,7 +683,7 @@ Speech.prototype.recognize = function(file, config, callback) {
732683
* @example
733684
* var config = {
734685
* encoding: 'LINEAR16',
735-
* sampleRate: 16000
686+
* sampleRateHertz: 16000
736687
* };
737688
*
738689
* function callback(err, operation, apiResponse) {
@@ -781,7 +732,7 @@ Speech.prototype.recognize = function(file, config, callback) {
781732
* //-
782733
* var config = {
783734
* encoding: 'LINEAR16',
784-
* sampleRate: 16000,
735+
* sampleRateHertz: 16000,
785736
* verbose: true
786737
* };
787738
*
@@ -813,7 +764,9 @@ Speech.prototype.recognize = function(file, config, callback) {
813764
Speech.prototype.startRecognition = function(file, config, callback) {
814765
var self = this;
815766

816-
config = extend({}, config);
767+
config = extend({
768+
languageCode: 'en-US'
769+
}, config);
817770

818771
if (!config.encoding) {
819772
config.encoding = Speech.detectEncoding_(file);
@@ -828,7 +781,7 @@ Speech.prototype.startRecognition = function(file, config, callback) {
828781
return;
829782
}
830783

831-
self.api.Speech.asyncRecognize({
784+
self.api.Speech.longRunningRecognize({
832785
config: config,
833786
audio: foundFile
834787
}, function(err, operation, resp) {
@@ -857,4 +810,4 @@ common.util.promisifyAll(Speech, {
857810
});
858811

859812
module.exports = Speech;
860-
module.exports.v1beta1 = v1beta1;
813+
module.exports.v1 = v1;

packages/google-cloud-speech/src/v1beta1/index.js renamed to packages/google-cloud-speech/src/v1/index.js

+5-5
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@ var speechClient = require('./speech_client');
1919
var gax = require('google-gax');
2020
var extend = require('extend');
2121

22-
function v1beta1(options) {
22+
function v1(options) {
2323
options = extend({
24-
scopes: v1beta1.ALL_SCOPES
24+
scopes: v1.ALL_SCOPES
2525
}, options);
2626
var gaxGrpc = gax.grpc(options);
2727
return speechClient(gaxGrpc);
2828
}
2929

30-
v1beta1.SERVICE_ADDRESS = speechClient.SERVICE_ADDRESS;
31-
v1beta1.ALL_SCOPES = speechClient.ALL_SCOPES;
30+
v1.SERVICE_ADDRESS = speechClient.SERVICE_ADDRESS;
31+
v1.ALL_SCOPES = speechClient.ALL_SCOPES;
3232

33-
module.exports = v1beta1;
33+
module.exports = v1;

0 commit comments

Comments
 (0)