Skip to content

Commit 52c515a

Browse files
committed
Add Auto-Punctuation samples to speech
1 parent e4cc47a commit 52c515a

File tree

4 files changed

+217
-1
lines changed

4 files changed

+217
-1
lines changed

speech/cloud-client/README.md

+16
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,19 @@ Asynchronously transcribe a audio file hosted on GCS
7070
```
7171
mvn exec:java -DRecognize -Dexec.args="model-selection gs://cloud-samples-tests/speech/Google_Gnome.wav"
7272
```
73+
74+
## Auto Punctuation
75+
Synchronously transcribe and punctuate an audio file
76+
```
77+
mvn exec:java -DRecognize -Dexec.args="auto-punctuation ./resources/audio.raw"
78+
```
79+
80+
Asynchronously transcribe and punctuate an audio file hosted on GCS
81+
```
82+
mvn exec:java -DRecognize -Dexec.args="auto-punctuation gs://cloud-samples-tests/speech/brooklyn.flac"
83+
```
84+
85+
Performing streaming speech transcription and punctuation on an audio file
86+
```
87+
mvn exec:java -DRecognize -Dexec.args="stream-punctuation ./resources/audio.raw"
88+
```

speech/cloud-client/pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
<dependency>
4141
<groupId>com.google.cloud</groupId>
4242
<artifactId>google-cloud-speech</artifactId>
43-
<version>0.40.0-alpha</version>
43+
<version>0.42.0-alpha</version>
4444
</dependency>
4545
<!-- [END dependencies] -->
4646

speech/cloud-client/src/main/java/com/example/speech/Recognize.java

+179
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ public static void main(String... args) throws Exception {
5353
"\tjava %s \"<command>\" \"<path-to-image>\"\n"
5454
+ "Commands:\n"
5555
+ "\tsyncrecognize | asyncrecognize | streamrecognize | wordoffsets | model-selection\n"
56+
+ "\t| auto-punctuation | stream-punctuation\n"
5657
+ "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
5758
+ "for a Cloud Storage resource (gs://...)\n",
5859
Recognize.class.getCanonicalName());
@@ -88,6 +89,14 @@ public static void main(String... args) throws Exception {
8889
} else {
8990
transcribeModelSelection(path);
9091
}
92+
} else if (command.equals("auto-punctuation")) {
93+
if (path.startsWith("gs://")) {
94+
transcribeGcsWithAutomaticPunctuation(path);
95+
} else {
96+
transcribeFileWithAutomaticPunctuation(path);
97+
}
98+
} else if (command.equals("stream-punctuation")) {
99+
streamingTranscribeWithAutomaticPunctuation(path);
91100
}
92101
}
93102

@@ -497,4 +506,174 @@ public static void transcribeModelSelectionGcs(String gcsUri) throws Exception {
497506
}
498507
// [END speech_transcribe_model_selection_gcs]
499508
}
509+
510+
// [START speech_sync_recognize_punctuation]
511+
/**
512+
* Performs transcription with automatic punctuation on raw PCM audio data.
513+
*
514+
* @param fileName the path to a PCM audio file to transcribe.
515+
*/
516+
public static void transcribeFileWithAutomaticPunctuation(String fileName) throws Exception {
517+
Path path = Paths.get(fileName);
518+
byte[] content = Files.readAllBytes(path);
519+
520+
try (SpeechClient speechClient = SpeechClient.create()) {
521+
// Configure request with local raw PCM audio
522+
RecognitionConfig recConfig = RecognitionConfig.newBuilder()
523+
.setEncoding(AudioEncoding.LINEAR16)
524+
.setLanguageCode("en-US")
525+
.setSampleRateHertz(16000)
526+
.setEnableAutomaticPunctuation(true)
527+
.build();
528+
529+
// Get the contents of the local audio file
530+
RecognitionAudio recognitionAudio = RecognitionAudio.newBuilder()
531+
.setContent(ByteString.copyFrom(content))
532+
.build();
533+
534+
// Perform the transcription request
535+
RecognizeResponse recognizeResponse = speechClient.recognize(recConfig, recognitionAudio);
536+
537+
// Just print the first result here.
538+
SpeechRecognitionResult result = recognizeResponse.getResultsList().get(0);
539+
540+
// There can be several alternative transcripts for a given chunk of speech. Just use the
541+
// first (most likely) one here.
542+
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
543+
544+
// Print out the result
545+
System.out.printf("Transcript : %s\n", alternative.getTranscript());
546+
}
547+
}
548+
// [END speech_sync_recognize_punctuation]
549+
550+
// [START speech_async_recognize_gcs_punctuation]
551+
/**
552+
* Performs transcription on remote FLAC file and prints the transcription.
553+
*
554+
* @param gcsUri the path to the remote FLAC audio file to transcribe.
555+
*/
556+
public static void transcribeGcsWithAutomaticPunctuation(String gcsUri) throws Exception {
557+
try (SpeechClient speechClient = SpeechClient.create()) {
558+
// Configure request with raw PCM audio
559+
RecognitionConfig config = RecognitionConfig.newBuilder()
560+
.setEncoding(AudioEncoding.FLAC)
561+
.setLanguageCode("en-US")
562+
.setSampleRateHertz(16000)
563+
.setEnableAutomaticPunctuation(true)
564+
.build();
565+
566+
// Set the remote path for the audio file
567+
RecognitionAudio audio = RecognitionAudio.newBuilder()
568+
.setUri(gcsUri)
569+
.build();
570+
571+
// Use non-blocking call for getting file transcription
572+
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response =
573+
speechClient.longRunningRecognizeAsync(config, audio);
574+
575+
while (!response.isDone()) {
576+
System.out.println("Waiting for response...");
577+
Thread.sleep(10000);
578+
}
579+
580+
// Just print the first result here.
581+
SpeechRecognitionResult result = response.get().getResultsList().get(0);
582+
583+
// There can be several alternative transcripts for a given chunk of speech. Just use the
584+
// first (most likely) one here.
585+
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
586+
587+
// Print out the result
588+
System.out.printf("Transcript : %s\n", alternative.getTranscript());
589+
}
590+
}
591+
// [END speech_async_recognize_gcs_punctuation]
592+
593+
/**
594+
* Performs streaming speech recognition on raw PCM audio data.
595+
*
596+
* @param fileName the path to a PCM audio file to transcribe.
597+
*/
598+
public static void streamingTranscribeWithAutomaticPunctuation(String fileName) throws Exception {
599+
Path path = Paths.get(fileName);
600+
byte[] data = Files.readAllBytes(path);
601+
602+
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
603+
try (SpeechClient speech = SpeechClient.create()) {
604+
605+
// Configure request with local raw PCM audio
606+
RecognitionConfig recConfig = RecognitionConfig.newBuilder()
607+
.setEncoding(AudioEncoding.LINEAR16)
608+
.setLanguageCode("en-US")
609+
.setSampleRateHertz(16000)
610+
.setEnableAutomaticPunctuation(true)
611+
.build();
612+
613+
// Build the streaming config with the audio config
614+
StreamingRecognitionConfig config = StreamingRecognitionConfig.newBuilder()
615+
.setConfig(recConfig)
616+
.build();
617+
618+
class ResponseApiStreamingObserver<T> implements ApiStreamObserver<T> {
619+
private final SettableFuture<List<T>> future = SettableFuture.create();
620+
private final List<T> messages = new java.util.ArrayList<T>();
621+
622+
@Override
623+
public void onNext(T message) {
624+
messages.add(message);
625+
}
626+
627+
@Override
628+
public void onError(Throwable t) {
629+
future.setException(t);
630+
}
631+
632+
@Override
633+
public void onCompleted() {
634+
future.set(messages);
635+
}
636+
637+
// Returns the SettableFuture object to get received messages / exceptions.
638+
public SettableFuture<List<T>> future() {
639+
return future;
640+
}
641+
}
642+
643+
ResponseApiStreamingObserver<StreamingRecognizeResponse> responseObserver =
644+
new ResponseApiStreamingObserver<>();
645+
646+
BidiStreamingCallable<StreamingRecognizeRequest, StreamingRecognizeResponse> callable =
647+
speech.streamingRecognizeCallable();
648+
649+
ApiStreamObserver<StreamingRecognizeRequest> requestObserver =
650+
callable.bidiStreamingCall(responseObserver);
651+
652+
// The first request must **only** contain the audio configuration:
653+
requestObserver.onNext(StreamingRecognizeRequest.newBuilder()
654+
.setStreamingConfig(config)
655+
.build());
656+
657+
// Subsequent requests must **only** contain the audio data.
658+
requestObserver.onNext(StreamingRecognizeRequest.newBuilder()
659+
.setAudioContent(ByteString.copyFrom(data))
660+
.build());
661+
662+
// Mark transmission as completed after sending the data.
663+
requestObserver.onCompleted();
664+
665+
List<StreamingRecognizeResponse> responses = responseObserver.future().get();
666+
667+
for (StreamingRecognizeResponse response : responses) {
668+
// For streaming recognize, the results list has one is_final result (if available) followed
669+
// by a number of in-progress results (if iterim_results is true) for subsequent utterances.
670+
// Just print the first result here.
671+
StreamingRecognitionResult result = response.getResultsList().get(0);
672+
// There can be several alternative transcripts for a given chunk of speech. Just use the
673+
// first (most likely) one here.
674+
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
675+
System.out.printf("Transcript : %s\n", alternative.getTranscript());
676+
}
677+
}
678+
}
500679
}

speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java

+21
Original file line numberDiff line numberDiff line change
@@ -124,4 +124,25 @@ public void testGcsModelSelection() throws Exception {
124124
assertThat(got).contains("OK Google");
125125
assertThat(got).contains("the weather outside is sunny");
126126
}
127+
128+
@Test
129+
public void testAutoPunctuation() throws Exception {
130+
Recognize.transcribeFileWithAutomaticPunctuation(audioFileName);
131+
String got = bout.toString();
132+
assertThat(got).contains("How old is the Brooklyn Bridge?");
133+
}
134+
135+
@Test
136+
public void testGcsAutoPunctuation() throws Exception {
137+
Recognize.transcribeGcsWithAutomaticPunctuation(gcsAudioPath);
138+
String got = bout.toString();
139+
assertThat(got).contains("How old is the Brooklyn Bridge?");
140+
}
141+
142+
@Test
143+
public void testStreamAutoPunctuation() throws Exception {
144+
Recognize.streamingTranscribeWithAutomaticPunctuation(audioFileName);
145+
String got = bout.toString();
146+
assertThat(got).contains("How old is the Brooklyn Bridge?");
147+
}
127148
}

0 commit comments

Comments
 (0)