Skip to content

Commit b7ff4e3

Browse files
dpebotcrwilcox
authored andcommitted
Re-generate library using /synth.py (#110)
* Re-generate library using /synth.py
1 parent 86da74e commit b7ff4e3

File tree

2 files changed

+183
-0
lines changed

2 files changed

+183
-0
lines changed

packages/google-cloud-node/protos/google/cloud/speech/v1p1beta1/cloud_speech.proto

+92
Original file line numberDiff line numberDiff line change
@@ -212,13 +212,46 @@ message RecognitionConfig {
212212
// for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
213213
int32 sample_rate_hertz = 2;
214214

215+
// *Optional* The number of channels in the input audio data.
216+
// ONLY set this for MULTI-CHANNEL recognition.
217+
// Valid values for LINEAR16 and FLAC are `1`-`8`.
218+
// Valid values for OGG_OPUS are '1'-'254'.
219+
// Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`.
220+
// If `0` or omitted, defaults to one channel (mono).
221+
// NOTE: We only recognize the first channel by default.
222+
// To perform independent recognition on each channel set
223+
// enable_separate_recognition_per_channel to 'true'.
224+
int32 audio_channel_count = 7;
225+
226+
// This needs to be set to ‘true’ explicitly and audio_channel_count > 1
227+
// to get each channel recognized separately. The recognition result will
228+
// contain a channel_tag field to state which channel that result belongs to.
229+
// If this is not ‘true’, we will only recognize the first channel.
230+
// NOTE: The request is also billed cumulatively for all channels recognized:
231+
// (audio_channel_count times the audio length)
232+
bool enable_separate_recognition_per_channel = 12;
233+
215234
// *Required* The language of the supplied audio as a
216235
// [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
217236
// Example: "en-US".
218237
// See [Language Support](https://cloud.google.com/speech/docs/languages)
219238
// for a list of the currently supported language codes.
220239
string language_code = 3;
221240

241+
// *Optional* A list of up to 3 additional
242+
// [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags,
243+
// listing possible alternative languages of the supplied audio.
244+
// See [Language Support](https://cloud.google.com/speech/docs/languages)
245+
// for a list of the currently supported language codes.
246+
// If alternative languages are listed, recognition result will contain
247+
// recognition in the most likely language detected including the main
248+
// language_code. The recognition result will include the language tag
249+
// of the language detected in the audio.
250+
// NOTE: This feature is only supported for Voice Command and Voice Search
251+
// use cases and performance may vary for other use cases (e.g., phone call
252+
// transcription).
253+
repeated string alternative_language_codes = 18;
254+
222255
// *Optional* Maximum number of recognition hypotheses to be returned.
223256
// Specifically, the maximum number of `SpeechRecognitionAlternative` messages
224257
// within each `SpeechRecognitionResult`.
@@ -242,6 +275,11 @@ message RecognitionConfig {
242275
// `false`.
243276
bool enable_word_time_offsets = 8;
244277

278+
// *Optional* If `true`, the top result includes a list of words and the
279+
// confidence for those words. If `false`, no word-level confidence
280+
// information is returned. The default is `false`.
281+
bool enable_word_confidence = 15;
282+
245283
// *Optional* If 'true', adds punctuation to recognition result hypotheses.
246284
// This feature is only available in select languages. Setting this for
247285
// requests in other languages has no effect at all.
@@ -251,6 +289,21 @@ message RecognitionConfig {
251289
// premium feature."
252290
bool enable_automatic_punctuation = 11;
253291

292+
// *Optional* If 'true', enables speaker detection for each recognized word in
293+
// the top alternative of the recognition result using a speaker_tag provided
294+
// in the WordInfo.
295+
// Note: When this is true, we send all the words from the beginning of the
296+
// audio for the top alternative in every consecutive responses.
297+
// This is done in order to improve our speaker tags as our models learn to
298+
// identify the speakers in the conversation over time.
299+
bool enable_speaker_diarization = 16;
300+
301+
// *Optional*
302+
// If set, specifies the estimated number of speakers in the conversation.
303+
// If not set, defaults to '2'.
304+
// Ignored unless enable_speaker_diarization is set to true."
305+
int32 diarization_speaker_count = 17;
306+
254307
// *Optional* Metadata regarding this request.
255308
RecognitionMetadata metadata = 9;
256309

@@ -604,6 +657,17 @@ message StreamingRecognitionResult {
604657
// This field is only provided for interim results (`is_final=false`).
605658
// The default of 0.0 is a sentinel value indicating `stability` was not set.
606659
float stability = 3;
660+
661+
// For multi-channel audio, this is the channel number corresponding to the
662+
// recognized result for the audio from that channel.
663+
// For audio_channel_count = N, its output values can range from '1' to 'N'.
664+
int32 channel_tag = 5;
665+
666+
// Output only. The
667+
// [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag of the
668+
// language in this result. This language code was detected to have the most
669+
// likelihood of being spoken in the audio.
670+
string language_code = 6;
607671
}
608672

609673
// A speech recognition result corresponding to a portion of the audio.
@@ -613,6 +677,17 @@ message SpeechRecognitionResult {
613677
// These alternatives are ordered in terms of accuracy, with the top (first)
614678
// alternative being the most probable, as ranked by the recognizer.
615679
repeated SpeechRecognitionAlternative alternatives = 1;
680+
681+
// For multi-channel audio, this is the channel number corresponding to the
682+
// recognized result for the audio from that channel.
683+
// For audio_channel_count = N, its output values can range from '1' to 'N'.
684+
int32 channel_tag = 2;
685+
686+
// Output only. The
687+
// [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag of the
688+
// language in this result. This language code was detected to have the most
689+
// likelihood of being spoken in the audio.
690+
string language_code = 5;
616691
}
617692

618693
// Alternative hypotheses (a.k.a. n-best list).
@@ -655,4 +730,21 @@ message WordInfo {
655730

656731
// Output only. The word corresponding to this set of information.
657732
string word = 3;
733+
734+
// Output only. The confidence estimate between 0.0 and 1.0. A higher number
735+
// indicates an estimated greater likelihood that the recognized words are
736+
// correct. This field is set only for the top alternative of a non-streaming
737+
// result or, of a streaming result where `is_final=true`.
738+
// This field is not guaranteed to be accurate and users should not rely on it
739+
// to be always provided.
740+
// The default of 0.0 is a sentinel value indicating `confidence` was not set.
741+
float confidence = 4;
742+
743+
// Output only. A distinct integer value is assigned for every speaker within
744+
// the audio. This field specifies which one of those speakers was detected to
745+
// have spoken this word. Value ranges from '1' to diarization_speaker_count.
746+
// speaker_tag is set if enable_speaker_diarization = 'true' and only in the
747+
// top alternative.
748+
int32 speaker_tag = 5;
749+
658750
}

packages/google-cloud-node/src/v1p1beta1/doc/google/cloud/speech/v1p1beta1/doc_cloud_speech.js

+91
Original file line numberDiff line numberDiff line change
@@ -149,13 +149,46 @@ var StreamingRecognitionConfig = {
149149
* This field is optional for `FLAC` and `WAV` audio files and required
150150
* for all other audio formats. For details, see AudioEncoding.
151151
*
152+
* @property {number} audioChannelCount
153+
* *Optional* The number of channels in the input audio data.
154+
* ONLY set this for MULTI-CHANNEL recognition.
155+
* Valid values for LINEAR16 and FLAC are `1`-`8`.
156+
* Valid values for OGG_OPUS are '1'-'254'.
157+
* Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`.
158+
* If `0` or omitted, defaults to one channel (mono).
159+
* NOTE: We only recognize the first channel by default.
160+
* To perform independent recognition on each channel set
161+
* enable_separate_recognition_per_channel to 'true'.
162+
*
163+
* @property {boolean} enableSeparateRecognitionPerChannel
164+
* This needs to be set to ‘true’ explicitly and audio_channel_count > 1
165+
* to get each channel recognized separately. The recognition result will
166+
* contain a channel_tag field to state which channel that result belongs to.
167+
* If this is not ‘true’, we will only recognize the first channel.
168+
* NOTE: The request is also billed cumulatively for all channels recognized:
169+
* (audio_channel_count times the audio length)
170+
*
152171
* @property {string} languageCode
153172
* *Required* The language of the supplied audio as a
154173
* [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
155174
* Example: "en-US".
156175
* See [Language Support](https://cloud.google.com/speech/docs/languages)
157176
* for a list of the currently supported language codes.
158177
*
178+
* @property {string[]} alternativeLanguageCodes
179+
* *Optional* A list of up to 3 additional
180+
* [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags,
181+
* listing possible alternative languages of the supplied audio.
182+
* See [Language Support](https://cloud.google.com/speech/docs/languages)
183+
* for a list of the currently supported language codes.
184+
* If alternative languages are listed, recognition result will contain
185+
* recognition in the most likely language detected including the main
186+
* language_code. The recognition result will include the language tag
187+
* of the language detected in the audio.
188+
* NOTE: This feature is only supported for Voice Command and Voice Search
189+
* use cases and performance may vary for other use cases (e.g., phone call
190+
* transcription).
191+
*
159192
* @property {number} maxAlternatives
160193
* *Optional* Maximum number of recognition hypotheses to be returned.
161194
* Specifically, the maximum number of `SpeechRecognitionAlternative` messages
@@ -181,6 +214,11 @@ var StreamingRecognitionConfig = {
181214
* `false`, no word-level time offset information is returned. The default is
182215
* `false`.
183216
*
217+
* @property {boolean} enableWordConfidence
218+
* *Optional* If `true`, the top result includes a list of words and the
219+
* confidence for those words. If `false`, no word-level confidence
220+
* information is returned. The default is `false`.
221+
*
184222
* @property {boolean} enableAutomaticPunctuation
185223
* *Optional* If 'true', adds punctuation to recognition result hypotheses.
186224
* This feature is only available in select languages. Setting this for
@@ -190,6 +228,21 @@ var StreamingRecognitionConfig = {
190228
* to all users. In the future this may be exclusively available as a
191229
* premium feature."
192230
*
231+
* @property {boolean} enableSpeakerDiarization
232+
* *Optional* If 'true', enables speaker detection for each recognized word in
233+
* the top alternative of the recognition result using a speaker_tag provided
234+
* in the WordInfo.
235+
* Note: When this is true, we send all the words from the beginning of the
236+
* audio for the top alternative in every consecutive responses.
237+
* This is done in order to improve our speaker tags as our models learn to
238+
* identify the speakers in the conversation over time.
239+
*
240+
* @property {number} diarizationSpeakerCount
241+
* *Optional*
242+
* If set, specifies the estimated number of speakers in the conversation.
243+
* If not set, defaults to '2'.
244+
* Ignored unless enable_speaker_diarization is set to true."
245+
*
193246
* @property {Object} metadata
194247
* *Optional* Metadata regarding this request.
195248
*
@@ -797,6 +850,17 @@ var StreamingRecognizeResponse = {
797850
* This field is only provided for interim results (`is_final=false`).
798851
* The default of 0.0 is a sentinel value indicating `stability` was not set.
799852
*
853+
* @property {number} channelTag
854+
* For multi-channel audio, this is the channel number corresponding to the
855+
* recognized result for the audio from that channel.
856+
* For audio_channel_count = N, its output values can range from '1' to 'N'.
857+
*
858+
* @property {string} languageCode
859+
* Output only. The
860+
* [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag of the
861+
* language in this result. This language code was detected to have the most
862+
* likelihood of being spoken in the audio.
863+
*
800864
* @typedef StreamingRecognitionResult
801865
* @memberof google.cloud.speech.v1p1beta1
802866
* @see [google.cloud.speech.v1p1beta1.StreamingRecognitionResult definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1p1beta1/cloud_speech.proto}
@@ -816,6 +880,17 @@ var StreamingRecognitionResult = {
816880
*
817881
* This object should have the same structure as [SpeechRecognitionAlternative]{@link google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative}
818882
*
883+
* @property {number} channelTag
884+
* For multi-channel audio, this is the channel number corresponding to the
885+
* recognized result for the audio from that channel.
886+
* For audio_channel_count = N, its output values can range from '1' to 'N'.
887+
*
888+
* @property {string} languageCode
889+
* Output only. The
890+
* [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag of the
891+
* language in this result. This language code was detected to have the most
892+
* likelihood of being spoken in the audio.
893+
*
819894
* @typedef SpeechRecognitionResult
820895
* @memberof google.cloud.speech.v1p1beta1
821896
* @see [google.cloud.speech.v1p1beta1.SpeechRecognitionResult definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1p1beta1/cloud_speech.proto}
@@ -880,6 +955,22 @@ var SpeechRecognitionAlternative = {
880955
* @property {string} word
881956
* Output only. The word corresponding to this set of information.
882957
*
958+
* @property {number} confidence
959+
* Output only. The confidence estimate between 0.0 and 1.0. A higher number
960+
* indicates an estimated greater likelihood that the recognized words are
961+
* correct. This field is set only for the top alternative of a non-streaming
962+
* result or, of a streaming result where `is_final=true`.
963+
* This field is not guaranteed to be accurate and users should not rely on it
964+
* to be always provided.
965+
* The default of 0.0 is a sentinel value indicating `confidence` was not set.
966+
*
967+
* @property {number} speakerTag
968+
* Output only. A distinct integer value is assigned for every speaker within
969+
* the audio. This field specifies which one of those speakers was detected to
970+
* have spoken this word. Value ranges from '1' to diarization_speaker_count.
971+
* speaker_tag is set if enable_speaker_diarization = 'true' and only in the
972+
* top alternative.
973+
*
883974
* @typedef WordInfo
884975
* @memberof google.cloud.speech.v1p1beta1
885976
* @see [google.cloud.speech.v1p1beta1.WordInfo definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1p1beta1/cloud_speech.proto}

0 commit comments

Comments
 (0)