@@ -479,10 +479,11 @@ message StreamingDetectIntentRequest {
479
479
//
480
480
// Multiple response messages can be returned in order:
481
481
//
482
- // 1. If the input was set to streaming audio, the first one or more messages
483
- // contain `recognition_result`. Each `recognition_result` represents a more
484
- // complete transcript of what the user said. The last `recognition_result`
485
- // has `is_final` set to `true`.
482
+ // 1. If the `StreamingDetectIntentRequest.input_audio` field was
483
+ // set, the `recognition_result` field is populated for one
484
+ // or more messages.
485
+ // See the [StreamingRecognitionResult][google.cloud.dialogflow.v2.StreamingRecognitionResult] message for details
486
+ // about the result message sequence.
486
487
//
487
488
// 2. The next message contains `response_id`, `query_result`
488
489
// and optionally `webhook_status` if a WebHook was called.
@@ -520,35 +521,39 @@ message StreamingDetectIntentResponse {
520
521
// that is currently being processed or an indication that this is the end
521
522
// of the single requested utterance.
522
523
//
523
- // Example:
524
+ // While end-user audio is being processed, Dialogflow sends a series of
525
+ // results. Each result may contain a `transcript` value. A transcript
526
+ // represents a portion of the utterance. While the recognizer is processing
527
+ // audio, transcript values may be interim values or finalized values.
528
+ // Once a transcript is finalized, the `is_final` value is set to true and
529
+ // processing continues for the next transcript.
524
530
//
525
- // 1. transcript: "tube"
531
+ // If `StreamingDetectIntentRequest.query_input.audio_config.single_utterance`
532
+ // was true, and the recognizer has completed processing audio,
533
+ // the `message_type` value is set to `END_OF_SINGLE_UTTERANCE and the
534
+ // following (last) result contains the last finalized transcript.
526
535
//
527
- // 2. transcript: "to be a"
536
+ // The complete end-user utterance is determined by concatenating the
537
+ // finalized transcript values received for the series of results.
528
538
//
529
- // 3. transcript: "to be"
539
+ // In the following example, single utterance is enabled. In the case where
540
+ // single utterance is not enabled, result 7 would not occur.
530
541
//
531
- // 4. transcript: "to be or not to be"
532
- // is_final: true
542
+ // ```
543
+ // Num | transcript | message_type | is_final
544
+ // --- | ----------------------- | ----------------------- | --------
545
+ // 1 | "tube" | TRANSCRIPT | false
546
+ // 2 | "to be a" | TRANSCRIPT | false
547
+ // 3 | "to be" | TRANSCRIPT | false
548
+ // 4 | "to be or not to be" | TRANSCRIPT | true
549
+ // 5 | "that's" | TRANSCRIPT | false
550
+ // 6 | "that is | TRANSCRIPT | false
551
+ // 7 | unset | END_OF_SINGLE_UTTERANCE | unset
552
+ // 8 | " that is the question" | TRANSCRIPT | true
553
+ // ```
533
554
//
534
- // 5. transcript: " that's"
535
- //
536
- // 6. transcript: " that is"
537
- //
538
- // 7. message_type: `END_OF_SINGLE_UTTERANCE`
539
- //
540
- // 8. transcript: " that is the question"
541
- // is_final: true
542
- //
543
- // Only two of the responses contain final results (#4 and #8 indicated by
544
- // `is_final: true`). Concatenating these generates the full transcript: "to be
545
- // or not to be that is the question".
546
- //
547
- // In each response we populate:
548
- //
549
- // * for `TRANSCRIPT`: `transcript` and possibly `is_final`.
550
- //
551
- // * for `END_OF_SINGLE_UTTERANCE`: only `message_type`.
555
+ // Concatenating the finalized transcripts with `is_final` set to true,
556
+ // the complete utterance becomes "to be or not to be that is the question".
552
557
message StreamingRecognitionResult {
553
558
// Type of the response message.
554
559
enum MessageType {
0 commit comments