@@ -53,6 +53,7 @@ public static void main(String... args) throws Exception {
53
53
"\t java %s \" <command>\" \" <path-to-image>\" \n "
54
54
+ "Commands:\n "
55
55
+ "\t syncrecognize | asyncrecognize | streamrecognize | wordoffsets | model-selection\n "
56
+ + "\t | auto-punctuation | stream-punctuation\n "
56
57
+ "Path:\n \t A file path (ex: ./resources/audio.raw) or a URI "
57
58
+ "for a Cloud Storage resource (gs://...)\n " ,
58
59
Recognize .class .getCanonicalName ());
@@ -88,6 +89,14 @@ public static void main(String... args) throws Exception {
88
89
} else {
89
90
transcribeModelSelection (path );
90
91
}
92
+ } else if (command .equals ("auto-punctuation" )) {
93
+ if (path .startsWith ("gs://" )) {
94
+ transcribeGcsWithAutomaticPunctuation (path );
95
+ } else {
96
+ transcribeFileWithAutomaticPunctuation (path );
97
+ }
98
+ } else if (command .equals ("stream-punctuation" )) {
99
+ streamingTranscribeWithAutomaticPunctuation (path );
91
100
}
92
101
}
93
102
@@ -497,4 +506,176 @@ public static void transcribeModelSelectionGcs(String gcsUri) throws Exception {
497
506
}
498
507
// [END speech_transcribe_model_selection_gcs]
499
508
}
509
+
510
+ // [START speech_sync_recognize_punctuation]
511
+ /**
512
+ * Performs transcription with automatic punctuation on raw PCM audio data.
513
+ *
514
+ * @param fileName the path to a PCM audio file to transcribe.
515
+ */
516
+ public static void transcribeFileWithAutomaticPunctuation (String fileName ) throws Exception {
517
+ Path path = Paths .get (fileName );
518
+ byte [] content = Files .readAllBytes (path );
519
+
520
+ try (SpeechClient speechClient = SpeechClient .create ()) {
521
+ // Configure request with local raw PCM audio
522
+ RecognitionConfig recConfig = RecognitionConfig .newBuilder ()
523
+ .setEncoding (AudioEncoding .LINEAR16 )
524
+ .setLanguageCode ("en-US" )
525
+ .setSampleRateHertz (16000 )
526
+ .setEnableAutomaticPunctuation (true )
527
+ .build ();
528
+
529
+ // Get the contents of the local audio file
530
+ RecognitionAudio recognitionAudio = RecognitionAudio .newBuilder ()
531
+ .setContent (ByteString .copyFrom (content ))
532
+ .build ();
533
+
534
+ // Perform the transcription request
535
+ RecognizeResponse recognizeResponse = speechClient .recognize (recConfig , recognitionAudio );
536
+
537
+ // Just print the first result here.
538
+ SpeechRecognitionResult result = recognizeResponse .getResultsList ().get (0 );
539
+
540
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
541
+ // first (most likely) one here.
542
+ SpeechRecognitionAlternative alternative = result .getAlternativesList ().get (0 );
543
+
544
+ // Print out the result
545
+ System .out .printf ("Transcript : %s\n " , alternative .getTranscript ());
546
+ }
547
+ }
548
+ // [END speech_sync_recognize_punctuation]
549
+
550
+ // [START speech_async_recognize_gcs_punctuation]
551
+ /**
552
+ * Performs transcription on remote FLAC file and prints the transcription.
553
+ *
554
+ * @param gcsUri the path to the remote FLAC audio file to transcribe.
555
+ */
556
+ public static void transcribeGcsWithAutomaticPunctuation (String gcsUri ) throws Exception {
557
+ try (SpeechClient speechClient = SpeechClient .create ()) {
558
+ // Configure request with raw PCM audio
559
+ RecognitionConfig config = RecognitionConfig .newBuilder ()
560
+ .setEncoding (AudioEncoding .FLAC )
561
+ .setLanguageCode ("en-US" )
562
+ .setSampleRateHertz (16000 )
563
+ .setEnableAutomaticPunctuation (true )
564
+ .build ();
565
+
566
+ // Set the remote path for the audio file
567
+ RecognitionAudio audio = RecognitionAudio .newBuilder ()
568
+ .setUri (gcsUri )
569
+ .build ();
570
+
571
+ // Use non-blocking call for getting file transcription
572
+ OperationFuture <LongRunningRecognizeResponse , LongRunningRecognizeMetadata > response =
573
+ speechClient .longRunningRecognizeAsync (config , audio );
574
+
575
+ while (!response .isDone ()) {
576
+ System .out .println ("Waiting for response..." );
577
+ Thread .sleep (10000 );
578
+ }
579
+
580
+ // Just print the first result here.
581
+ SpeechRecognitionResult result = response .get ().getResultsList ().get (0 );
582
+
583
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
584
+ // first (most likely) one here.
585
+ SpeechRecognitionAlternative alternative = result .getAlternativesList ().get (0 );
586
+
587
+ // Print out the result
588
+ System .out .printf ("Transcript : %s\n " , alternative .getTranscript ());
589
+ }
590
+ }
591
+ // [END speech_async_recognize_gcs_punctuation]
592
+
593
+ // [START speech_stream_recognize_punctuation]
594
+ /**
595
+ * Performs streaming speech recognition on raw PCM audio data.
596
+ *
597
+ * @param fileName the path to a PCM audio file to transcribe.
598
+ */
599
+ public static void streamingTranscribeWithAutomaticPunctuation (String fileName ) throws Exception {
600
+ Path path = Paths .get (fileName );
601
+ byte [] data = Files .readAllBytes (path );
602
+
603
+ // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
604
+ try (SpeechClient speech = SpeechClient .create ()) {
605
+
606
+ // Configure request with local raw PCM audio
607
+ RecognitionConfig recConfig = RecognitionConfig .newBuilder ()
608
+ .setEncoding (AudioEncoding .LINEAR16 )
609
+ .setLanguageCode ("en-US" )
610
+ .setSampleRateHertz (16000 )
611
+ .setEnableAutomaticPunctuation (true )
612
+ .build ();
613
+
614
+ // Build the streaming config with the audio config
615
+ StreamingRecognitionConfig config = StreamingRecognitionConfig .newBuilder ()
616
+ .setConfig (recConfig )
617
+ .build ();
618
+
619
+ class ResponseApiStreamingObserver <T > implements ApiStreamObserver <T > {
620
+ private final SettableFuture <List <T >> future = SettableFuture .create ();
621
+ private final List <T > messages = new java .util .ArrayList <T >();
622
+
623
+ @ Override
624
+ public void onNext (T message ) {
625
+ messages .add (message );
626
+ }
627
+
628
+ @ Override
629
+ public void onError (Throwable t ) {
630
+ future .setException (t );
631
+ }
632
+
633
+ @ Override
634
+ public void onCompleted () {
635
+ future .set (messages );
636
+ }
637
+
638
+ // Returns the SettableFuture object to get received messages / exceptions.
639
+ public SettableFuture <List <T >> future () {
640
+ return future ;
641
+ }
642
+ }
643
+
644
+ ResponseApiStreamingObserver <StreamingRecognizeResponse > responseObserver =
645
+ new ResponseApiStreamingObserver <>();
646
+
647
+ BidiStreamingCallable <StreamingRecognizeRequest , StreamingRecognizeResponse > callable =
648
+ speech .streamingRecognizeCallable ();
649
+
650
+ ApiStreamObserver <StreamingRecognizeRequest > requestObserver =
651
+ callable .bidiStreamingCall (responseObserver );
652
+
653
+ // The first request must **only** contain the audio configuration:
654
+ requestObserver .onNext (StreamingRecognizeRequest .newBuilder ()
655
+ .setStreamingConfig (config )
656
+ .build ());
657
+
658
+ // Subsequent requests must **only** contain the audio data.
659
+ requestObserver .onNext (StreamingRecognizeRequest .newBuilder ()
660
+ .setAudioContent (ByteString .copyFrom (data ))
661
+ .build ());
662
+
663
+ // Mark transmission as completed after sending the data.
664
+ requestObserver .onCompleted ();
665
+
666
+ List <StreamingRecognizeResponse > responses = responseObserver .future ().get ();
667
+
668
+ for (StreamingRecognizeResponse response : responses ) {
669
+ // For streaming recognize, the results list has one is_final result (if available) followed
670
+ // by a number of in-progress results (if iterim_results is true) for subsequent utterances.
671
+ // Just print the first result here.
672
+ StreamingRecognitionResult result = response .getResultsList ().get (0 );
673
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
674
+ // first (most likely) one here.
675
+ SpeechRecognitionAlternative alternative = result .getAlternativesList ().get (0 );
676
+ System .out .printf ("Transcript : %s\n " , alternative .getTranscript ());
677
+ }
678
+ }
679
+ }
680
+ // [END speech_stream_recognize_punctuation]
500
681
}
0 commit comments