docs: closed captions (#1497)

szuperaz · web-flow · commit ed47579b16b6 · 2024-10-03T14:40:34.000+02:00
I'll add the docs if the code looks good :)
diff --git a/packages/client/docusaurus/docs/javascript/02-guides/16-closed-captions.mdx b/packages/client/docusaurus/docs/javascript/02-guides/16-closed-captions.mdx
@@ -0,0 +1,111 @@
+---
+id: closed-captions
+title: Closed captions
+description: How to add closed captions to your calls
+---
+
+The Stream API supports adding real-time closed captions (subtitles for participants) to your calls. This guide shows you how to implement this feature on the client side.
+
+## Call and call type settings
+
+The closed caption feature can be controlled with the following options:
+
+- `available`: the feature is available for your call and can be enabled.
+- `disabled`: the feature is not available for your call. In this case, it's a good idea to "hide" any UI element you have related to closed captions.
+- `auto-on`: the feature is available and will be enabled automatically once the user is connected to the call.
+
+This setting can be set on the call or call type level.
+
+You can check the current value like this:
+
+```typescript
+console.log(call.state.settings?.transcription.closed_caption_mode);
+```
+
+## Closed caption events
+
+If closed captions are enabled for a given call, you'll receive the captions in the `call.closed_caption` events. Below, you can find an example payload:
+
+```
+{
+  "type": "call.closed_caption",
+  "created_at": "2024-09-25T12:22:25.067005915Z",
+  "call_cid": "default:test",
+  "closed_caption": {
+      "text": "Thank you, guys, for listening.",
+      // When did the speaker start speaking
+      "start_time": "2024-09-25T12:22:21.310735726Z",
+      // When did the speaker finish saying the caption
+      "end_time": "2024-09-25T12:22:24.310735726Z",
+      "speaker_id": "zitaszuperagetstreamio"
+  }
+}
+```
+
+## Displaying the captions
+
+When displaying closed captions, we should make sure that they are real-time (showing a sentence from 30 seconds ago has very little use in a conversation) and visible for enough time that participants can read them.
+
+Below is an example implementation:
+
+```typescript
+import {
+  Call,
+  CallClosedCaption,
+  ClosedCaptionEvent,
+} from '@stream-io/video-client';
+
+// The captions queue
+let captions: (CallClosedCaption & { speaker_name?: string })[] = [];
+// The maximum number of captions that can be visible on the screen
+const numberOfCaptionsVisible = 2;
+// A single caption can stay visible on the screen for this duration
+// This is the maximum duration, new captions can push a caption out of the screen sooner
+const captionTimeoutMs = 2700;
+
+// Subscribe to call.closed_caption events
+call.on('call.closed_caption', (event: ClosedCaptionEvent) => {
+  const caption = event.closed_caption;
+  // It's possible to receive the same caption twice, so make sure to filter duplicates
+  const isDuplicate = captions.find(
+    (c) =>
+      c.speaker_id === caption.speaker_id &&
+      c.start_time === caption.start_time,
+  );
+  if (!isDuplicate) {
+    // Look up the speaker's name based on the user id
+    const speaker = call.state.participants.find(
+      (p) => p.userId === caption.speaker_id,
+    );
+    const speakerName = speaker?.name || speaker?.userId;
+    // Add the caption to the queue
+    captions.push({ ...caption, speaker_name: speakerName });
+    // Update the UI
+    updateDisplayedCaptions();
+    // We specify a maximum amount of time a caption can be visible
+    // after that, we remove it from the screen (unless a newer caption has already pushed it out)
+    captionTimeout = setTimeout(() => {
+      captions = captions.slice(1);
+      updateDisplayedCaptions();
+      captionTimeout = undefined;
+    }, captionTimeoutMs);
+  }
+});
+
+const updateDisplayedCaptions = () => {
+  // The default implementation shows the last two captions
+  const displayedCaptions = captions.slice(-1 * numberOfCaptionsVisible);
+  const captionsHTML = displayedCaptions
+    .map((c) => `<b>${c.speaker_name}:</b> ${c.text}`)
+    .join('<br>');
+  // Update the UI
+};
+```
+
+:::note
+Since the closed caption event contains `start_time` and `end_time` fields, you can subtract the two to know how long it took the speaker to say the caption. You can then use this duration to control how long the text is visible on the screen. This is useful to ensure the captions are as real-time as possible, but that might not leave enough time for participants to read the text.
+:::
+
+## See it in action
+
+To see it all in action check out our TypeScript sample application on [GitHub](https://github.com/GetStream/stream-video-js/tree/main/sample-apps/client/ts-quickstart) or in [Codesandbox](https://codesandbox.io/p/sandbox/eloquent-glitter-99th3v).
diff --git a/sample-apps/client/ts-quickstart/index.html b/sample-apps/client/ts-quickstart/index.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html>
+<!doctype html>
 <html lang="en">
   <head>
     <meta charset="UTF-8" />
@@ -10,6 +10,7 @@
     <div id="call-controls"></div>
     <div id="screenshare"></div>
     <div id="participants"></div>
+    <div id="closed-captions"></div>
     <script type="module" src="/src/main.ts"></script>
   </body>
 </html>
diff --git a/sample-apps/client/ts-quickstart/package.json b/sample-apps/client/ts-quickstart/package.json
@@ -4,7 +4,7 @@
   "version": "0.0.0",
   "type": "module",
   "scripts": {
-    "dev": "vite --host 0.0.0.0 --https",
+    "dev": "https=1 vite --host 0.0.0.0",
     "build": "tsc && vite build",
     "preview": "vite preview"
   },
diff --git a/sample-apps/client/ts-quickstart/src/closed-captions.ts b/sample-apps/client/ts-quickstart/src/closed-captions.ts
@@ -0,0 +1,102 @@
+import {
+  Call,
+  CallClosedCaption,
+  ClosedCaptionEvent,
+} from '@stream-io/video-client';
+
+export class ClosedCaptionManager {
+  status: 'on' | 'off' = 'off';
+  private unsubscribe?: () => void;
+  private captionTimeout?: ReturnType<typeof setTimeout>;
+  private captions: (CallClosedCaption & { speaker_name?: string })[] = [];
+  private captionContainer?: HTMLElement;
+  /**
+   * A single caption can stay visible on the screen for this duration
+   *
+   * This is the maximum duration, new captions can push a caption out of the screen sooner
+   */
+  private captionTimeoutMs = 2700;
+  /**
+   * The maximum number of captions that can be visible on the screen
+   */
+  private numberOfCaptionsVisible = 2;
+
+  constructor(private call: Call) {}
+
+  renderToggleElement() {
+    const button = document.createElement('button');
+    button.textContent =
+      this.status === 'on'
+        ? 'Turn off closed captions'
+        : 'Turn on closed captions';
+
+    button.addEventListener('click', async () => {
+      this.status === 'on' ? this.hideCaptions() : this.showCaptions();
+      button.textContent =
+        this.status === 'on'
+          ? 'Turn off closed captions'
+          : 'Turn on closed captions';
+    });
+
+    return button;
+  }
+
+  renderCaptionContainer() {
+    this.captionContainer = document.createElement('div');
+
+    return this.captionContainer;
+  }
+
+  showCaptions() {
+    this.status = 'on';
+    this.unsubscribe = this.call.on(
+      'call.closed_caption',
+      (event: ClosedCaptionEvent) => {
+        const caption = event.closed_caption;
+        const isDuplicate = this.captions.find(
+          (c) =>
+            c.speaker_id === caption.speaker_id &&
+            c.start_time === caption.start_time,
+        );
+        if (!isDuplicate) {
+          const speaker = this.call.state.participants.find(
+            (p) => p.userId === caption.speaker_id,
+          );
+          const speakerName = speaker?.name || speaker?.userId;
+          this.captions.push({ ...caption, speaker_name: speakerName });
+          this.updateDisplayedCaptions();
+          this.captionTimeout = setTimeout(() => {
+            this.captions = this.captions.slice(1);
+            this.updateDisplayedCaptions();
+            this.captionTimeout = undefined;
+          }, this.captionTimeoutMs);
+        }
+      },
+    );
+  }
+
+  hideCaptions() {
+    this.status = 'off';
+    this.cleanup();
+  }
+
+  cleanup() {
+    this.unsubscribe?.();
+    clearTimeout(this.captionTimeout);
+  }
+
+  private updateDisplayedCaptions() {
+    if (!this.captionContainer) {
+      console.warn(
+        'Render caption container before turning on closed captions',
+      );
+      return;
+    }
+    const displayedCaptions = this.captions.slice(
+      -1 * this.numberOfCaptionsVisible,
+    );
+    this.captionContainer.innerHTML = displayedCaptions
+      .map((c) => `<b>${c.speaker_name}:</b> ${c.text}`)
+      .join('<br>');
+  }
+}
diff --git a/sample-apps/client/ts-quickstart/src/main.ts b/sample-apps/client/ts-quickstart/src/main.ts
@@ -10,6 +10,7 @@ import {
   renderVolumeControl,
 } from './device-selector';
 import { isMobile } from './mobile';
+import { ClosedCaptionManager } from './closed-captions';
 
 const searchParams = new URLSearchParams(window.location.search);
 const extractPayloadFromToken = (token: string) => {
@@ -50,32 +51,42 @@ call.screenShare.setSettings({
   maxBitrate: 1500000,
 });
 
-call.join({ create: true }).then(async () => {
-  // render mic and camera controls
-  const controls = renderControls(call);
-  const container = document.getElementById('call-controls')!;
-  container.appendChild(controls.audioButton);
-  container.appendChild(controls.videoButton);
-  container.appendChild(controls.screenShareButton);
-
-  container.appendChild(renderAudioDeviceSelector(call));
-
-  // render device selectors
-  if (isMobile.any()) {
-    container.appendChild(controls.flipButton);
-  } else {
-    container.appendChild(renderVideoDeviceSelector(call));
-  }
-
-  const audioOutputSelector = renderAudioOutputSelector(call);
-  if (audioOutputSelector) {
-    container.appendChild(audioOutputSelector);
-  }
-
-  container.appendChild(renderVolumeControl(call));
-});
+const container = document.getElementById('call-controls')!;
+
+// render mic and camera controls
+const controls = renderControls(call);
+container.appendChild(controls.audioButton);
+container.appendChild(controls.videoButton);
+container.appendChild(controls.screenShareButton);
+
+container.appendChild(renderAudioDeviceSelector(call));
+
+// render device selectors
+if (isMobile.any()) {
+  container.appendChild(controls.flipButton);
+} else {
+  container.appendChild(renderVideoDeviceSelector(call));
+}
+
+const audioOutputSelector = renderAudioOutputSelector(call);
+if (audioOutputSelector) {
+  container.appendChild(audioOutputSelector);
+}
+
+container.appendChild(renderVolumeControl(call));
+
+// Closed caption controls
+const closedCaptionManager = new ClosedCaptionManager(call);
+container.appendChild(closedCaptionManager.renderToggleElement());
+
+const captionContainer = document.getElementById('closed-captions');
+captionContainer?.appendChild(closedCaptionManager.renderCaptionContainer());
+
+call.join({ create: true });
 
 window.addEventListener('beforeunload', () => {
+  // Make sure to remove your event listeners when you leave a call
+  closedCaptionManager?.cleanup();
   call.leave();
 });