25
25
import com .google .cloud .videointelligence .v1 .Feature ;
26
26
import com .google .cloud .videointelligence .v1 .LabelAnnotation ;
27
27
import com .google .cloud .videointelligence .v1 .LabelSegment ;
28
+ import com .google .cloud .videointelligence .v1 .SpeechRecognitionAlternative ;
29
+ import com .google .cloud .videointelligence .v1 .SpeechTranscription ;
30
+ import com .google .cloud .videointelligence .v1 .SpeechTranscriptionConfig ;
28
31
import com .google .cloud .videointelligence .v1 .VideoAnnotationResults ;
32
+ import com .google .cloud .videointelligence .v1 .VideoContext ;
29
33
import com .google .cloud .videointelligence .v1 .VideoIntelligenceServiceClient ;
30
34
import com .google .cloud .videointelligence .v1 .VideoSegment ;
35
+ import com .google .cloud .videointelligence .v1 .WordInfo ;
31
36
import com .google .protobuf .ByteString ;
32
37
import java .io .IOException ;
33
38
import java .nio .file .Files ;
34
39
import java .nio .file .Path ;
35
40
import java .nio .file .Paths ;
41
+ import java .util .concurrent .TimeUnit ;
42
+
36
43
import org .apache .commons .codec .binary .Base64 ;
37
44
38
45
@@ -83,6 +90,9 @@ public static void argsHelper(String[] args) throws Exception {
83
90
if (command .equals ("explicit-content" )) {
84
91
analyzeExplicitContent (path );
85
92
}
93
+ if (command .equals ("speech-transcription" )) {
94
+ speechTranscription (path );
95
+ }
86
96
}
87
97
88
98
/**
@@ -322,4 +332,69 @@ public static void analyzeExplicitContent(String gcsUri) throws Exception {
322
332
// [END video_analyze_explicit_content]
323
333
}
324
334
}
325
- }
335
+
336
+ /**
337
+ * Transcribe speech from a video stored on GCS.
338
+ *
339
+ * @param gcsUri the path to the video file to analyze.
340
+ */
341
+ public static void speechTranscription (String gcsUri ) throws Exception {
342
+ // [START video_speech_transcription_gcs]
343
+ // Instantiate a com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient
344
+ try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient .create ()) {
345
+ // Set the language code
346
+ SpeechTranscriptionConfig config = SpeechTranscriptionConfig .newBuilder ()
347
+ .setLanguageCode ("en-US" )
348
+ .setEnableAutomaticPunctuation (true )
349
+ .build ();
350
+
351
+ // Set the video context with the above configuration
352
+ VideoContext context = VideoContext .newBuilder ()
353
+ .setSpeechTranscriptionConfig (config )
354
+ .build ();
355
+
356
+ // Create the request
357
+ AnnotateVideoRequest request = AnnotateVideoRequest .newBuilder ()
358
+ .setInputUri (gcsUri )
359
+ .addFeatures (Feature .SPEECH_TRANSCRIPTION )
360
+ .setVideoContext (context )
361
+ .build ();
362
+
363
+ // asynchronously perform speech transcription on videos
364
+ OperationFuture <AnnotateVideoResponse , AnnotateVideoProgress > response =
365
+ client .annotateVideoAsync (request );
366
+
367
+ System .out .println ("Waiting for operation to complete..." );
368
+ // Display the results
369
+ for (VideoAnnotationResults results : response .get (600 , TimeUnit .SECONDS )
370
+ .getAnnotationResultsList ()) {
371
+ for (SpeechTranscription speechTranscription : results .getSpeechTranscriptionsList ()) {
372
+ try {
373
+ // Print the transcription
374
+ if (speechTranscription .getAlternativesCount () > 0 ) {
375
+ SpeechRecognitionAlternative alternative = speechTranscription .getAlternatives (0 );
376
+
377
+ System .out .printf ("Transcript: %s\n " , alternative .getTranscript ());
378
+ System .out .printf ("Confidence: %.2f\n " , alternative .getConfidence ());
379
+
380
+ System .out .println ("Word level information:" );
381
+ for (WordInfo wordInfo : alternative .getWordsList ()) {
382
+ double startTime = wordInfo .getStartTime ().getSeconds ()
383
+ + wordInfo .getStartTime ().getNanos () / 1e9 ;
384
+ double endTime = wordInfo .getEndTime ().getSeconds ()
385
+ + wordInfo .getEndTime ().getNanos () / 1e9 ;
386
+ System .out .printf ("\t %4.2fs - %4.2fs: %s\n " ,
387
+ startTime , endTime , wordInfo .getWord ());
388
+ }
389
+ } else {
390
+ System .out .println ("No transcription found" );
391
+ }
392
+ } catch (IndexOutOfBoundsException ioe ) {
393
+ System .out .println ("Could not retrieve frame: " + ioe .getMessage ());
394
+ }
395
+ }
396
+ }
397
+ }
398
+ // [END video_speech_transcription_gcs]
399
+ }
400
+ }
0 commit comments