Skip to content

Commit 43eb235

Browse files
anguillanneufchingor13
authored andcommitted
samples: feat: video speech transcription (#1264)
1 parent fd74bbe commit 43eb235

File tree

2 files changed

+87
-1
lines changed

2 files changed

+87
-1
lines changed

video/src/main/java/com/example/video/Detect.java

+76-1
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,21 @@
2525
import com.google.cloud.videointelligence.v1.Feature;
2626
import com.google.cloud.videointelligence.v1.LabelAnnotation;
2727
import com.google.cloud.videointelligence.v1.LabelSegment;
28+
import com.google.cloud.videointelligence.v1.SpeechRecognitionAlternative;
29+
import com.google.cloud.videointelligence.v1.SpeechTranscription;
30+
import com.google.cloud.videointelligence.v1.SpeechTranscriptionConfig;
2831
import com.google.cloud.videointelligence.v1.VideoAnnotationResults;
32+
import com.google.cloud.videointelligence.v1.VideoContext;
2933
import com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient;
3034
import com.google.cloud.videointelligence.v1.VideoSegment;
35+
import com.google.cloud.videointelligence.v1.WordInfo;
3136
import com.google.protobuf.ByteString;
3237
import java.io.IOException;
3338
import java.nio.file.Files;
3439
import java.nio.file.Path;
3540
import java.nio.file.Paths;
41+
import java.util.concurrent.TimeUnit;
42+
3643
import org.apache.commons.codec.binary.Base64;
3744

3845

@@ -83,6 +90,9 @@ public static void argsHelper(String[] args) throws Exception {
8390
if (command.equals("explicit-content")) {
8491
analyzeExplicitContent(path);
8592
}
93+
if (command.equals("speech-transcription")) {
94+
speechTranscription(path);
95+
}
8696
}
8797

8898
/**
@@ -322,4 +332,69 @@ public static void analyzeExplicitContent(String gcsUri) throws Exception {
322332
// [END video_analyze_explicit_content]
323333
}
324334
}
325-
}
335+
336+
/**
337+
* Transcribe speech from a video stored on GCS.
338+
*
339+
* @param gcsUri the path to the video file to analyze.
340+
*/
341+
public static void speechTranscription(String gcsUri) throws Exception {
342+
// [START video_speech_transcription_gcs]
343+
// Instantiate a com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient
344+
try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) {
345+
// Set the language code
346+
SpeechTranscriptionConfig config = SpeechTranscriptionConfig.newBuilder()
347+
.setLanguageCode("en-US")
348+
.setEnableAutomaticPunctuation(true)
349+
.build();
350+
351+
// Set the video context with the above configuration
352+
VideoContext context = VideoContext.newBuilder()
353+
.setSpeechTranscriptionConfig(config)
354+
.build();
355+
356+
// Create the request
357+
AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder()
358+
.setInputUri(gcsUri)
359+
.addFeatures(Feature.SPEECH_TRANSCRIPTION)
360+
.setVideoContext(context)
361+
.build();
362+
363+
// asynchronously perform speech transcription on videos
364+
OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> response =
365+
client.annotateVideoAsync(request);
366+
367+
System.out.println("Waiting for operation to complete...");
368+
// Display the results
369+
for (VideoAnnotationResults results : response.get(600, TimeUnit.SECONDS)
370+
.getAnnotationResultsList()) {
371+
for (SpeechTranscription speechTranscription : results.getSpeechTranscriptionsList()) {
372+
try {
373+
// Print the transcription
374+
if (speechTranscription.getAlternativesCount() > 0) {
375+
SpeechRecognitionAlternative alternative = speechTranscription.getAlternatives(0);
376+
377+
System.out.printf("Transcript: %s\n", alternative.getTranscript());
378+
System.out.printf("Confidence: %.2f\n", alternative.getConfidence());
379+
380+
System.out.println("Word level information:");
381+
for (WordInfo wordInfo : alternative.getWordsList()) {
382+
double startTime = wordInfo.getStartTime().getSeconds()
383+
+ wordInfo.getStartTime().getNanos() / 1e9;
384+
double endTime = wordInfo.getEndTime().getSeconds()
385+
+ wordInfo.getEndTime().getNanos() / 1e9;
386+
System.out.printf("\t%4.2fs - %4.2fs: %s\n",
387+
startTime, endTime, wordInfo.getWord());
388+
}
389+
} else {
390+
System.out.println("No transcription found");
391+
}
392+
} catch (IndexOutOfBoundsException ioe) {
393+
System.out.println("Could not retrieve frame: " + ioe.getMessage());
394+
}
395+
}
396+
}
397+
}
398+
// [END video_speech_transcription_gcs]
399+
}
400+
}

video/src/test/java/com/example/video/DetectIT.java

+11
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ public class DetectIT {
3737
static final String LABEL_FILE_LOCATION = "./resources/cat.mp4";
3838
static final String SHOTS_FILE_LOCATION = "gs://demomaker/gbikes_dinosaur.mp4";
3939
static final String EXPLICIT_CONTENT_LOCATION = "gs://demomaker/cat.mp4";
40+
static final String SPEECH_GCS_LOCATION =
41+
"gs://java-docs-samples-testing/video/googlework_short.mp4";
4042

4143
@Before
4244
public void setUp() {
@@ -84,4 +86,13 @@ public void testShots() throws Exception {
8486
assertThat(got).contains("Shots:");
8587
assertThat(got).contains("Location: 0");
8688
}
89+
90+
@Test
91+
public void testSpeechTranscription() throws Exception {
92+
String[] args = {"speech-transcription", SPEECH_GCS_LOCATION};
93+
Detect.argsHelper(args);
94+
String got = bout.toString();
95+
96+
assertThat(got).contains("cultural");
97+
}
8798
}

0 commit comments

Comments
 (0)