@@ -53,6 +53,7 @@ public static void main(String... args) throws Exception {
53
53
"\t java %s \" <command>\" \" <path-to-image>\" \n "
54
54
+ "Commands:\n "
55
55
+ "\t syncrecognize | asyncrecognize | streamrecognize | wordoffsets | model-selection\n "
56
+ + "\t | auto-punctuation | stream-punctuation\n "
56
57
+ "Path:\n \t A file path (ex: ./resources/audio.raw) or a URI "
57
58
+ "for a Cloud Storage resource (gs://...)\n " ,
58
59
Recognize .class .getCanonicalName ());
@@ -88,6 +89,14 @@ public static void main(String... args) throws Exception {
88
89
} else {
89
90
transcribeModelSelection (path );
90
91
}
92
+ } else if (command .equals ("auto-punctuation" )) {
93
+ if (path .startsWith ("gs://" )) {
94
+ transcribeGcsWithAutomaticPunctuation (path );
95
+ } else {
96
+ transcribeFileWithAutomaticPunctuation (path );
97
+ }
98
+ } else if (command .equals ("stream-punctuation" )) {
99
+ streamingTranscribeWithAutomaticPunctuation (path );
91
100
}
92
101
}
93
102
@@ -497,4 +506,174 @@ public static void transcribeModelSelectionGcs(String gcsUri) throws Exception {
497
506
}
498
507
// [END speech_transcribe_model_selection_gcs]
499
508
}
509
+
510
+ // [START speech_sync_recognize_punctuation]
511
+ /**
512
+ * Performs transcription with automatic punctuation on raw PCM audio data.
513
+ *
514
+ * @param fileName the path to a PCM audio file to transcribe.
515
+ */
516
+ public static void transcribeFileWithAutomaticPunctuation (String fileName ) throws Exception {
517
+ Path path = Paths .get (fileName );
518
+ byte [] content = Files .readAllBytes (path );
519
+
520
+ try (SpeechClient speechClient = SpeechClient .create ()) {
521
+ // Configure request with local raw PCM audio
522
+ RecognitionConfig recConfig = RecognitionConfig .newBuilder ()
523
+ .setEncoding (AudioEncoding .LINEAR16 )
524
+ .setLanguageCode ("en-US" )
525
+ .setSampleRateHertz (16000 )
526
+ .setEnableAutomaticPunctuation (true )
527
+ .build ();
528
+
529
+ // Get the contents of the local audio file
530
+ RecognitionAudio recognitionAudio = RecognitionAudio .newBuilder ()
531
+ .setContent (ByteString .copyFrom (content ))
532
+ .build ();
533
+
534
+ // Perform the transcription request
535
+ RecognizeResponse recognizeResponse = speechClient .recognize (recConfig , recognitionAudio );
536
+
537
+ // Just print the first result here.
538
+ SpeechRecognitionResult result = recognizeResponse .getResultsList ().get (0 );
539
+
540
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
541
+ // first (most likely) one here.
542
+ SpeechRecognitionAlternative alternative = result .getAlternativesList ().get (0 );
543
+
544
+ // Print out the result
545
+ System .out .printf ("Transcript : %s\n " , alternative .getTranscript ());
546
+ }
547
+ }
548
+ // [END speech_sync_recognize_punctuation]
549
+
550
+ // [START speech_async_recognize_gcs_punctuation]
551
+ /**
552
+ * Performs transcription on remote FLAC file and prints the transcription.
553
+ *
554
+ * @param gcsUri the path to the remote FLAC audio file to transcribe.
555
+ */
556
+ public static void transcribeGcsWithAutomaticPunctuation (String gcsUri ) throws Exception {
557
+ try (SpeechClient speechClient = SpeechClient .create ()) {
558
+ // Configure request with raw PCM audio
559
+ RecognitionConfig config = RecognitionConfig .newBuilder ()
560
+ .setEncoding (AudioEncoding .FLAC )
561
+ .setLanguageCode ("en-US" )
562
+ .setSampleRateHertz (16000 )
563
+ .setEnableAutomaticPunctuation (true )
564
+ .build ();
565
+
566
+ // Set the remote path for the audio file
567
+ RecognitionAudio audio = RecognitionAudio .newBuilder ()
568
+ .setUri (gcsUri )
569
+ .build ();
570
+
571
+ // Use non-blocking call for getting file transcription
572
+ OperationFuture <LongRunningRecognizeResponse , LongRunningRecognizeMetadata > response =
573
+ speechClient .longRunningRecognizeAsync (config , audio );
574
+
575
+ while (!response .isDone ()) {
576
+ System .out .println ("Waiting for response..." );
577
+ Thread .sleep (10000 );
578
+ }
579
+
580
+ // Just print the first result here.
581
+ SpeechRecognitionResult result = response .get ().getResultsList ().get (0 );
582
+
583
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
584
+ // first (most likely) one here.
585
+ SpeechRecognitionAlternative alternative = result .getAlternativesList ().get (0 );
586
+
587
+ // Print out the result
588
+ System .out .printf ("Transcript : %s\n " , alternative .getTranscript ());
589
+ }
590
+ }
591
+ // [END speech_async_recognize_gcs_punctuation]
592
+
593
+ /**
594
+ * Performs streaming speech recognition on raw PCM audio data.
595
+ *
596
+ * @param fileName the path to a PCM audio file to transcribe.
597
+ */
598
+ public static void streamingTranscribeWithAutomaticPunctuation (String fileName ) throws Exception {
599
+ Path path = Paths .get (fileName );
600
+ byte [] data = Files .readAllBytes (path );
601
+
602
+ // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
603
+ try (SpeechClient speech = SpeechClient .create ()) {
604
+
605
+ // Configure request with local raw PCM audio
606
+ RecognitionConfig recConfig = RecognitionConfig .newBuilder ()
607
+ .setEncoding (AudioEncoding .LINEAR16 )
608
+ .setLanguageCode ("en-US" )
609
+ .setSampleRateHertz (16000 )
610
+ .setEnableAutomaticPunctuation (true )
611
+ .build ();
612
+
613
+ // Build the streaming config with the audio config
614
+ StreamingRecognitionConfig config = StreamingRecognitionConfig .newBuilder ()
615
+ .setConfig (recConfig )
616
+ .build ();
617
+
618
+ class ResponseApiStreamingObserver <T > implements ApiStreamObserver <T > {
619
+ private final SettableFuture <List <T >> future = SettableFuture .create ();
620
+ private final List <T > messages = new java .util .ArrayList <T >();
621
+
622
+ @ Override
623
+ public void onNext (T message ) {
624
+ messages .add (message );
625
+ }
626
+
627
+ @ Override
628
+ public void onError (Throwable t ) {
629
+ future .setException (t );
630
+ }
631
+
632
+ @ Override
633
+ public void onCompleted () {
634
+ future .set (messages );
635
+ }
636
+
637
+ // Returns the SettableFuture object to get received messages / exceptions.
638
+ public SettableFuture <List <T >> future () {
639
+ return future ;
640
+ }
641
+ }
642
+
643
+ ResponseApiStreamingObserver <StreamingRecognizeResponse > responseObserver =
644
+ new ResponseApiStreamingObserver <>();
645
+
646
+ BidiStreamingCallable <StreamingRecognizeRequest , StreamingRecognizeResponse > callable =
647
+ speech .streamingRecognizeCallable ();
648
+
649
+ ApiStreamObserver <StreamingRecognizeRequest > requestObserver =
650
+ callable .bidiStreamingCall (responseObserver );
651
+
652
+ // The first request must **only** contain the audio configuration:
653
+ requestObserver .onNext (StreamingRecognizeRequest .newBuilder ()
654
+ .setStreamingConfig (config )
655
+ .build ());
656
+
657
+ // Subsequent requests must **only** contain the audio data.
658
+ requestObserver .onNext (StreamingRecognizeRequest .newBuilder ()
659
+ .setAudioContent (ByteString .copyFrom (data ))
660
+ .build ());
661
+
662
+ // Mark transmission as completed after sending the data.
663
+ requestObserver .onCompleted ();
664
+
665
+ List <StreamingRecognizeResponse > responses = responseObserver .future ().get ();
666
+
667
+ for (StreamingRecognizeResponse response : responses ) {
668
+ // For streaming recognize, the results list has one is_final result (if available) followed
669
+ // by a number of in-progress results (if iterim_results is true) for subsequent utterances.
670
+ // Just print the first result here.
671
+ StreamingRecognitionResult result = response .getResultsList ().get (0 );
672
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
673
+ // first (most likely) one here.
674
+ SpeechRecognitionAlternative alternative = result .getAlternativesList ().get (0 );
675
+ System .out .printf ("Transcript : %s\n " , alternative .getTranscript ());
676
+ }
677
+ }
678
+ }
500
679
}
0 commit comments