Skip to content

Commit f741212

Browse files
yoshi-automationJustinBeckwith
authored andcommitted
chore: update proto comments and grpc timeouts (#234)
1 parent 1774752 commit f741212

File tree

4 files changed

+92
-62
lines changed

4 files changed

+92
-62
lines changed

packages/google-cloud-node/protos/google/cloud/speech/v1p1beta1/cloud_speech.proto

+44-29
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2018 Google Inc.
1+
// Copyright 2018 Google LLC.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -11,6 +11,7 @@
1111
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
14+
//
1415

1516
syntax = "proto3";
1617

@@ -20,6 +21,7 @@ import "google/api/annotations.proto";
2021
import "google/longrunning/operations.proto";
2122
import "google/protobuf/any.proto";
2223
import "google/protobuf/duration.proto";
24+
import "google/protobuf/empty.proto";
2325
import "google/protobuf/timestamp.proto";
2426
import "google/rpc/status.proto";
2527

@@ -54,7 +56,8 @@ service Speech {
5456

5557
// Performs bidirectional streaming speech recognition: receive results while
5658
// sending audio. This method is only available via the gRPC API (not REST).
57-
rpc StreamingRecognize(stream StreamingRecognizeRequest) returns (stream StreamingRecognizeResponse);
59+
rpc StreamingRecognize(stream StreamingRecognizeRequest) returns (stream StreamingRecognizeResponse) {
60+
}
5861
}
5962

6063
// The top-level message sent by the client for the `Recognize` method.
@@ -98,7 +101,7 @@ message StreamingRecognizeRequest {
98101
// `audio_content` data. The audio bytes must be encoded as specified in
99102
// `RecognitionConfig`. Note: as with all bytes fields, protobuffers use a
100103
// pure binary representation (not base64). See
101-
// [audio limits](https://cloud.google.com/speech/limits#content).
104+
// [content limits](/speech-to-text/quotas#content).
102105
bytes audio_content = 2;
103106
}
104107
}
@@ -218,36 +221,36 @@ message RecognitionConfig {
218221
// Valid values for OGG_OPUS are '1'-'254'.
219222
// Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`.
220223
// If `0` or omitted, defaults to one channel (mono).
221-
// NOTE: We only recognize the first channel by default.
224+
// Note: We only recognize the first channel by default.
222225
// To perform independent recognition on each channel set
223-
// enable_separate_recognition_per_channel to 'true'.
226+
// `enable_separate_recognition_per_channel` to 'true'.
224227
int32 audio_channel_count = 7;
225228

226-
// This needs to be set to ‘true’ explicitly and audio_channel_count > 1
229+
// This needs to be set to ‘true’ explicitly and `audio_channel_count` > 1
227230
// to get each channel recognized separately. The recognition result will
228-
// contain a channel_tag field to state which channel that result belongs to.
229-
// If this is not true, we will only recognize the first channel.
230-
// NOTE: The request is also billed cumulatively for all channels recognized:
231-
// (audio_channel_count times the audio length)
231+
// contain a `channel_tag` field to state which channel that result belongs
232+
// to. If this is not true, we will only recognize the first channel. The
233+
// request is billed cumulatively for all channels recognized:
234+
// `audio_channel_count` multiplied by the length of the audio.
232235
bool enable_separate_recognition_per_channel = 12;
233236

234237
// *Required* The language of the supplied audio as a
235238
// [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
236239
// Example: "en-US".
237-
// See [Language Support](https://cloud.google.com/speech/docs/languages)
240+
// See [Language Support](/speech-to-text/docs/languages)
238241
// for a list of the currently supported language codes.
239242
string language_code = 3;
240243

241244
// *Optional* A list of up to 3 additional
242245
// [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags,
243246
// listing possible alternative languages of the supplied audio.
244-
// See [Language Support](https://cloud.google.com/speech/docs/languages)
247+
// See [Language Support](/speech-to-text/docs/languages)
245248
// for a list of the currently supported language codes.
246249
// If alternative languages are listed, recognition result will contain
247250
// recognition in the most likely language detected including the main
248251
// language_code. The recognition result will include the language tag
249252
// of the language detected in the audio.
250-
// NOTE: This feature is only supported for Voice Command and Voice Search
253+
// Note: This feature is only supported for Voice Command and Voice Search
251254
// use cases and performance may vary for other use cases (e.g., phone call
252255
// transcription).
253256
repeated string alternative_language_codes = 18;
@@ -266,7 +269,9 @@ message RecognitionConfig {
266269
// won't be filtered out.
267270
bool profanity_filter = 5;
268271

269-
// *Optional* A means to provide context to assist the speech recognition.
272+
// *Optional* array of [SpeechContext][google.cloud.speech.v1p1beta1.SpeechContext].
273+
// A means to provide context to assist the speech recognition. For more
274+
// information, see [Phrase Hints](/speech-to-text/docs/basics#phrase-hints).
270275
repeated SpeechContext speech_contexts = 6;
271276

272277
// *Optional* If `true`, the top result includes a list of words and
@@ -284,18 +289,20 @@ message RecognitionConfig {
284289
// This feature is only available in select languages. Setting this for
285290
// requests in other languages has no effect at all.
286291
// The default 'false' value does not add punctuation to result hypotheses.
287-
// NOTE: "This is currently offered as an experimental service, complimentary
292+
// Note: This is currently offered as an experimental service, complimentary
288293
// to all users. In the future this may be exclusively available as a
289-
// premium feature."
294+
// premium feature.
290295
bool enable_automatic_punctuation = 11;
291296

292297
// *Optional* If 'true', enables speaker detection for each recognized word in
293298
// the top alternative of the recognition result using a speaker_tag provided
294299
// in the WordInfo.
295300
// Note: When this is true, we send all the words from the beginning of the
296-
// audio for the top alternative in every consecutive responses.
301+
// audio for the top alternative in every consecutive STREAMING responses.
297302
// This is done in order to improve our speaker tags as our models learn to
298303
// identify the speakers in the conversation over time.
304+
// For non-streaming requests, the diarization results will be provided only
305+
// in the top alternative of the FINAL SpeechRecognitionResult.
299306
bool enable_speaker_diarization = 16;
300307

301308
// *Optional*
@@ -342,14 +349,18 @@ message RecognitionConfig {
342349
string model = 13;
343350

344351
// *Optional* Set to true to use an enhanced model for speech recognition.
345-
// You must also set the `model` field to a valid, enhanced model. If
346-
// `use_enhanced` is set to true and the `model` field is not set, then
347-
// `use_enhanced` is ignored. If `use_enhanced` is true and an enhanced
348-
// version of the specified model does not exist, then the speech is
349-
// recognized using the standard version of the specified model.
352+
// If `use_enhanced` is set to true and the `model` field is not set, then
353+
// an appropriate enhanced model is chosen if:
354+
// 1. project is eligible for requesting enhanced models
355+
// 2. an enhanced model exists for the audio
356+
//
357+
// If `use_enhanced` is true and an enhanced version of the specified model
358+
// does not exist, then the speech is recognized using the standard version
359+
// of the specified model.
350360
//
351-
// Enhanced speech models require that you opt-in to the audio logging using
352-
// instructions in the [alpha documentation](/speech/data-sharing). If you set
361+
// Enhanced speech models require that you opt-in to data logging using
362+
// instructions in the
363+
// [documentation](/speech-to-text/docs/enable-data-logging). If you set
353364
// `use_enhanced` to true and you have not enabled audio logging, then you
354365
// will receive an error.
355366
bool use_enhanced = 14;
@@ -494,14 +505,14 @@ message SpeechContext {
494505
// to improve the accuracy for specific words and phrases, for example, if
495506
// specific commands are typically spoken by the user. This can also be used
496507
// to add additional words to the vocabulary of the recognizer. See
497-
// [usage limits](https://cloud.google.com/speech/limits#content).
508+
// [usage limits](/speech-to-text/quotas#content).
498509
repeated string phrases = 1;
499510
}
500511

501512
// Contains audio data in the encoding specified in the `RecognitionConfig`.
502513
// Either `content` or `uri` must be supplied. Supplying both or neither
503514
// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See
504-
// [audio limits](https://cloud.google.com/speech/limits#content).
515+
// [content limits](/speech-to-text/quotas#content).
505516
message RecognitionAudio {
506517
// The audio source, which is either inline content or a Google Cloud
507518
// Storage uri.
@@ -512,7 +523,8 @@ message RecognitionAudio {
512523
bytes content = 1;
513524

514525
// URI that points to a file that contains audio data bytes as specified in
515-
// `RecognitionConfig`. Currently, only Google Cloud Storage URIs are
526+
// `RecognitionConfig`. The file must not be compressed (for example, gzip).
527+
// Currently, only Google Cloud Storage URIs are
516528
// supported, which must be specified in the following format:
517529
// `gs://bucket_name/object_name` (other URI formats return
518530
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
@@ -658,6 +670,10 @@ message StreamingRecognitionResult {
658670
// The default of 0.0 is a sentinel value indicating `stability` was not set.
659671
float stability = 3;
660672

673+
// Output only. Time offset of the end of this result relative to the
674+
// beginning of the audio.
675+
google.protobuf.Duration result_end_time = 4;
676+
661677
// For multi-channel audio, this is the channel number corresponding to the
662678
// recognized result for the audio from that channel.
663679
// For audio_channel_count = N, its output values can range from '1' to 'N'.
@@ -705,7 +721,7 @@ message SpeechRecognitionAlternative {
705721
float confidence = 2;
706722

707723
// Output only. A list of word-specific information for each recognized word.
708-
// Note: When enable_speaker_diarization is true, you will see all the words
724+
// Note: When `enable_speaker_diarization` is true, you will see all the words
709725
// from the beginning of the audio.
710726
repeated WordInfo words = 3;
711727
}
@@ -746,5 +762,4 @@ message WordInfo {
746762
// speaker_tag is set if enable_speaker_diarization = 'true' and only in the
747763
// top alternative.
748764
int32 speaker_tag = 5;
749-
750765
}

packages/google-cloud-node/src/v1/speech_client_config.json

+3-3
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,17 @@
2121
},
2222
"methods": {
2323
"Recognize": {
24-
"timeout_millis": 1000000,
24+
"timeout_millis": 200000,
2525
"retry_codes_name": "idempotent",
2626
"retry_params_name": "default"
2727
},
2828
"LongRunningRecognize": {
29-
"timeout_millis": 60000,
29+
"timeout_millis": 200000,
3030
"retry_codes_name": "non_idempotent",
3131
"retry_params_name": "default"
3232
},
3333
"StreamingRecognize": {
34-
"timeout_millis": 1000000,
34+
"timeout_millis": 200000,
3535
"retry_codes_name": "idempotent",
3636
"retry_params_name": "default"
3737
}

packages/google-cloud-node/src/v1p1beta1/doc/google/cloud/speech/v1p1beta1/doc_cloud_speech.js

+42-27
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ const LongRunningRecognizeRequest = {
8282
* `audio_content` data. The audio bytes must be encoded as specified in
8383
* `RecognitionConfig`. Note: as with all bytes fields, protobuffers use a
8484
* pure binary representation (not base64). See
85-
* [audio limits](https://cloud.google.com/speech/limits#content).
85+
* [content limits](https://cloud.google.com/speech-to-text/quotas#content).
8686
*
8787
* @typedef StreamingRecognizeRequest
8888
* @memberof google.cloud.speech.v1p1beta1
@@ -156,36 +156,36 @@ const StreamingRecognitionConfig = {
156156
* Valid values for OGG_OPUS are '1'-'254'.
157157
* Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`.
158158
* If `0` or omitted, defaults to one channel (mono).
159-
* NOTE: We only recognize the first channel by default.
159+
* Note: We only recognize the first channel by default.
160160
* To perform independent recognition on each channel set
161-
* enable_separate_recognition_per_channel to 'true'.
161+
* `enable_separate_recognition_per_channel` to 'true'.
162162
*
163163
* @property {boolean} enableSeparateRecognitionPerChannel
164-
* This needs to be set to ‘true’ explicitly and audio_channel_count > 1
164+
* This needs to be set to ‘true’ explicitly and `audio_channel_count` > 1
165165
* to get each channel recognized separately. The recognition result will
166-
* contain a channel_tag field to state which channel that result belongs to.
167-
* If this is not true, we will only recognize the first channel.
168-
* NOTE: The request is also billed cumulatively for all channels recognized:
169-
* (audio_channel_count times the audio length)
166+
* contain a `channel_tag` field to state which channel that result belongs
167+
* to. If this is not true, we will only recognize the first channel. The
168+
* request is billed cumulatively for all channels recognized:
169+
* `audio_channel_count` multiplied by the length of the audio.
170170
*
171171
* @property {string} languageCode
172172
* *Required* The language of the supplied audio as a
173173
* [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
174174
* Example: "en-US".
175-
* See [Language Support](https://cloud.google.com/speech/docs/languages)
175+
* See [Language Support](https://cloud.google.com/speech-to-text/docs/languages)
176176
* for a list of the currently supported language codes.
177177
*
178178
* @property {string[]} alternativeLanguageCodes
179179
* *Optional* A list of up to 3 additional
180180
* [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags,
181181
* listing possible alternative languages of the supplied audio.
182-
* See [Language Support](https://cloud.google.com/speech/docs/languages)
182+
* See [Language Support](https://cloud.google.com/speech-to-text/docs/languages)
183183
* for a list of the currently supported language codes.
184184
* If alternative languages are listed, recognition result will contain
185185
* recognition in the most likely language detected including the main
186186
* language_code. The recognition result will include the language tag
187187
* of the language detected in the audio.
188-
* NOTE: This feature is only supported for Voice Command and Voice Search
188+
* Note: This feature is only supported for Voice Command and Voice Search
189189
* use cases and performance may vary for other use cases (e.g., phone call
190190
* transcription).
191191
*
@@ -204,7 +204,9 @@ const StreamingRecognitionConfig = {
204204
* won't be filtered out.
205205
*
206206
* @property {Object[]} speechContexts
207-
* *Optional* A means to provide context to assist the speech recognition.
207+
* *Optional* array of SpeechContext.
208+
* A means to provide context to assist the speech recognition. For more
209+
* information, see [Phrase Hints](https://cloud.google.com/speech-to-text/docs/basics#phrase-hints).
208210
*
209211
* This object should have the same structure as [SpeechContext]{@link google.cloud.speech.v1p1beta1.SpeechContext}
210212
*
@@ -224,18 +226,20 @@ const StreamingRecognitionConfig = {
224226
* This feature is only available in select languages. Setting this for
225227
* requests in other languages has no effect at all.
226228
* The default 'false' value does not add punctuation to result hypotheses.
227-
* NOTE: "This is currently offered as an experimental service, complimentary
229+
* Note: This is currently offered as an experimental service, complimentary
228230
* to all users. In the future this may be exclusively available as a
229-
* premium feature."
231+
* premium feature.
230232
*
231233
* @property {boolean} enableSpeakerDiarization
232234
* *Optional* If 'true', enables speaker detection for each recognized word in
233235
* the top alternative of the recognition result using a speaker_tag provided
234236
* in the WordInfo.
235237
* Note: When this is true, we send all the words from the beginning of the
236-
* audio for the top alternative in every consecutive responses.
238+
* audio for the top alternative in every consecutive STREAMING responses.
237239
* This is done in order to improve our speaker tags as our models learn to
238240
* identify the speakers in the conversation over time.
241+
* For non-streaming requests, the diarization results will be provided only
242+
* in the top alternative of the FINAL SpeechRecognitionResult.
239243
*
240244
* @property {number} diarizationSpeakerCount
241245
* *Optional*
@@ -284,14 +288,18 @@ const StreamingRecognitionConfig = {
284288
*
285289
* @property {boolean} useEnhanced
286290
* *Optional* Set to true to use an enhanced model for speech recognition.
287-
* You must also set the `model` field to a valid, enhanced model. If
288-
* `use_enhanced` is set to true and the `model` field is not set, then
289-
* `use_enhanced` is ignored. If `use_enhanced` is true and an enhanced
290-
* version of the specified model does not exist, then the speech is
291-
* recognized using the standard version of the specified model.
292-
*
293-
* Enhanced speech models require that you opt-in to the audio logging using
294-
* instructions in the [alpha documentation](https://cloud.google.com/speech/data-sharing). If you set
291+
* If `use_enhanced` is set to true and the `model` field is not set, then
292+
* an appropriate enhanced model is chosen if:
293+
* 1. project is eligible for requesting enhanced models
294+
* 2. an enhanced model exists for the audio
295+
*
296+
* If `use_enhanced` is true and an enhanced version of the specified model
297+
* does not exist, then the speech is recognized using the standard version
298+
* of the specified model.
299+
*
300+
* Enhanced speech models require that you opt-in to data logging using
301+
* instructions in the
302+
* [documentation](https://cloud.google.com/speech-to-text/docs/enable-data-logging). If you set
295303
* `use_enhanced` to true and you have not enabled audio logging, then you
296304
* will receive an error.
297305
*
@@ -617,7 +625,7 @@ const RecognitionMetadata = {
617625
* to improve the accuracy for specific words and phrases, for example, if
618626
* specific commands are typically spoken by the user. This can also be used
619627
* to add additional words to the vocabulary of the recognizer. See
620-
* [usage limits](https://cloud.google.com/speech/limits#content).
628+
* [usage limits](https://cloud.google.com/speech-to-text/quotas#content).
621629
*
622630
* @typedef SpeechContext
623631
* @memberof google.cloud.speech.v1p1beta1
@@ -631,7 +639,7 @@ const SpeechContext = {
631639
* Contains audio data in the encoding specified in the `RecognitionConfig`.
632640
* Either `content` or `uri` must be supplied. Supplying both or neither
633641
* returns google.rpc.Code.INVALID_ARGUMENT. See
634-
* [audio limits](https://cloud.google.com/speech/limits#content).
642+
* [content limits](https://cloud.google.com/speech-to-text/quotas#content).
635643
*
636644
* @property {string} content
637645
* The audio data bytes encoded as specified in
@@ -640,7 +648,8 @@ const SpeechContext = {
640648
*
641649
* @property {string} uri
642650
* URI that points to a file that contains audio data bytes as specified in
643-
* `RecognitionConfig`. Currently, only Google Cloud Storage URIs are
651+
* `RecognitionConfig`. The file must not be compressed (for example, gzip).
652+
* Currently, only Google Cloud Storage URIs are
644653
* supported, which must be specified in the following format:
645654
* `gs://bucket_name/object_name` (other URI formats return
646655
* google.rpc.Code.INVALID_ARGUMENT). For more information, see
@@ -850,6 +859,12 @@ const StreamingRecognizeResponse = {
850859
* This field is only provided for interim results (`is_final=false`).
851860
* The default of 0.0 is a sentinel value indicating `stability` was not set.
852861
*
862+
* @property {Object} resultEndTime
863+
* Output only. Time offset of the end of this result relative to the
864+
* beginning of the audio.
865+
*
866+
* This object should have the same structure as [Duration]{@link google.protobuf.Duration}
867+
*
853868
* @property {number} channelTag
854869
* For multi-channel audio, this is the channel number corresponding to the
855870
* recognized result for the audio from that channel.
@@ -916,7 +931,7 @@ const SpeechRecognitionResult = {
916931
*
917932
* @property {Object[]} words
918933
* Output only. A list of word-specific information for each recognized word.
919-
* Note: When enable_speaker_diarization is true, you will see all the words
934+
* Note: When `enable_speaker_diarization` is true, you will see all the words
920935
* from the beginning of the audio.
921936
*
922937
* This object should have the same structure as [WordInfo]{@link google.cloud.speech.v1p1beta1.WordInfo}

0 commit comments

Comments
 (0)