Skip to content

Commit 884ba89

Browse files
authored
Add Auto-Punctuation samples to speech (#1079)
* Add Auto-Punctuation samples to speech * Add region tags for streaming sample * Update README
1 parent beb762b commit 884ba89

File tree

4 files changed

+226
-3
lines changed

4 files changed

+226
-3
lines changed

speech/cloud-client/README.md

+23-2
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,33 @@ mvn exec:java -DRecognize -Dexec.args="wordoffsets gs://cloud-samples-tests/spee
6161
```
6262

6363
## Model Selection
64-
Synchronously transcribe a audio file
64+
Synchronously transcribe an audio file
6565
```
6666
mvn exec:java -DRecognize -Dexec.args="model-selection ./resources/Google_Gnome.wav"
6767
```
6868

69-
Asynchronously transcribe a audio file hosted on GCS
69+
Asynchronously transcribe an audio file hosted on GCS
7070
```
7171
mvn exec:java -DRecognize -Dexec.args="model-selection gs://cloud-samples-tests/speech/Google_Gnome.wav"
7272
```
73+
74+
Perform streaming speech transcription on an audio file
75+
```
76+
mvn exec:java -DRecognize -Dexec.args="streamrecognize ./resources/Google_Gnome.wav"
77+
```
78+
79+
## Auto Punctuation
80+
Synchronously transcribe and punctuate an audio file
81+
```
82+
mvn exec:java -DRecognize -Dexec.args="auto-punctuation ./resources/audio.raw"
83+
```
84+
85+
Asynchronously transcribe and punctuate an audio file hosted on GCS
86+
```
87+
mvn exec:java -DRecognize -Dexec.args="auto-punctuation gs://cloud-samples-tests/speech/brooklyn.flac"
88+
```
89+
90+
Performing streaming speech transcription and punctuation on an audio file
91+
```
92+
mvn exec:java -DRecognize -Dexec.args="stream-punctuation ./resources/audio.raw"
93+
```

speech/cloud-client/pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
<dependency>
4141
<groupId>com.google.cloud</groupId>
4242
<artifactId>google-cloud-speech</artifactId>
43-
<version>0.40.0-alpha</version>
43+
<version>0.42.0-alpha</version>
4444
</dependency>
4545
<!-- [END dependencies] -->
4646

speech/cloud-client/src/main/java/com/example/speech/Recognize.java

+181
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ public static void main(String... args) throws Exception {
5353
"\tjava %s \"<command>\" \"<path-to-image>\"\n"
5454
+ "Commands:\n"
5555
+ "\tsyncrecognize | asyncrecognize | streamrecognize | wordoffsets | model-selection\n"
56+
+ "\t| auto-punctuation | stream-punctuation\n"
5657
+ "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
5758
+ "for a Cloud Storage resource (gs://...)\n",
5859
Recognize.class.getCanonicalName());
@@ -88,6 +89,14 @@ public static void main(String... args) throws Exception {
8889
} else {
8990
transcribeModelSelection(path);
9091
}
92+
} else if (command.equals("auto-punctuation")) {
93+
if (path.startsWith("gs://")) {
94+
transcribeGcsWithAutomaticPunctuation(path);
95+
} else {
96+
transcribeFileWithAutomaticPunctuation(path);
97+
}
98+
} else if (command.equals("stream-punctuation")) {
99+
streamingTranscribeWithAutomaticPunctuation(path);
91100
}
92101
}
93102

@@ -497,4 +506,176 @@ public static void transcribeModelSelectionGcs(String gcsUri) throws Exception {
497506
}
498507
// [END speech_transcribe_model_selection_gcs]
499508
}
509+
510+
// [START speech_sync_recognize_punctuation]
511+
/**
512+
* Performs transcription with automatic punctuation on raw PCM audio data.
513+
*
514+
* @param fileName the path to a PCM audio file to transcribe.
515+
*/
516+
public static void transcribeFileWithAutomaticPunctuation(String fileName) throws Exception {
517+
Path path = Paths.get(fileName);
518+
byte[] content = Files.readAllBytes(path);
519+
520+
try (SpeechClient speechClient = SpeechClient.create()) {
521+
// Configure request with local raw PCM audio
522+
RecognitionConfig recConfig = RecognitionConfig.newBuilder()
523+
.setEncoding(AudioEncoding.LINEAR16)
524+
.setLanguageCode("en-US")
525+
.setSampleRateHertz(16000)
526+
.setEnableAutomaticPunctuation(true)
527+
.build();
528+
529+
// Get the contents of the local audio file
530+
RecognitionAudio recognitionAudio = RecognitionAudio.newBuilder()
531+
.setContent(ByteString.copyFrom(content))
532+
.build();
533+
534+
// Perform the transcription request
535+
RecognizeResponse recognizeResponse = speechClient.recognize(recConfig, recognitionAudio);
536+
537+
// Just print the first result here.
538+
SpeechRecognitionResult result = recognizeResponse.getResultsList().get(0);
539+
540+
// There can be several alternative transcripts for a given chunk of speech. Just use the
541+
// first (most likely) one here.
542+
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
543+
544+
// Print out the result
545+
System.out.printf("Transcript : %s\n", alternative.getTranscript());
546+
}
547+
}
548+
// [END speech_sync_recognize_punctuation]
549+
550+
// [START speech_async_recognize_gcs_punctuation]
551+
/**
552+
* Performs transcription on remote FLAC file and prints the transcription.
553+
*
554+
* @param gcsUri the path to the remote FLAC audio file to transcribe.
555+
*/
556+
public static void transcribeGcsWithAutomaticPunctuation(String gcsUri) throws Exception {
557+
try (SpeechClient speechClient = SpeechClient.create()) {
558+
// Configure request with raw PCM audio
559+
RecognitionConfig config = RecognitionConfig.newBuilder()
560+
.setEncoding(AudioEncoding.FLAC)
561+
.setLanguageCode("en-US")
562+
.setSampleRateHertz(16000)
563+
.setEnableAutomaticPunctuation(true)
564+
.build();
565+
566+
// Set the remote path for the audio file
567+
RecognitionAudio audio = RecognitionAudio.newBuilder()
568+
.setUri(gcsUri)
569+
.build();
570+
571+
// Use non-blocking call for getting file transcription
572+
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response =
573+
speechClient.longRunningRecognizeAsync(config, audio);
574+
575+
while (!response.isDone()) {
576+
System.out.println("Waiting for response...");
577+
Thread.sleep(10000);
578+
}
579+
580+
// Just print the first result here.
581+
SpeechRecognitionResult result = response.get().getResultsList().get(0);
582+
583+
// There can be several alternative transcripts for a given chunk of speech. Just use the
584+
// first (most likely) one here.
585+
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
586+
587+
// Print out the result
588+
System.out.printf("Transcript : %s\n", alternative.getTranscript());
589+
}
590+
}
591+
// [END speech_async_recognize_gcs_punctuation]
592+
593+
// [START speech_stream_recognize_punctuation]
594+
/**
595+
* Performs streaming speech recognition on raw PCM audio data.
596+
*
597+
* @param fileName the path to a PCM audio file to transcribe.
598+
*/
599+
public static void streamingTranscribeWithAutomaticPunctuation(String fileName) throws Exception {
600+
Path path = Paths.get(fileName);
601+
byte[] data = Files.readAllBytes(path);
602+
603+
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
604+
try (SpeechClient speech = SpeechClient.create()) {
605+
606+
// Configure request with local raw PCM audio
607+
RecognitionConfig recConfig = RecognitionConfig.newBuilder()
608+
.setEncoding(AudioEncoding.LINEAR16)
609+
.setLanguageCode("en-US")
610+
.setSampleRateHertz(16000)
611+
.setEnableAutomaticPunctuation(true)
612+
.build();
613+
614+
// Build the streaming config with the audio config
615+
StreamingRecognitionConfig config = StreamingRecognitionConfig.newBuilder()
616+
.setConfig(recConfig)
617+
.build();
618+
619+
class ResponseApiStreamingObserver<T> implements ApiStreamObserver<T> {
620+
private final SettableFuture<List<T>> future = SettableFuture.create();
621+
private final List<T> messages = new java.util.ArrayList<T>();
622+
623+
@Override
624+
public void onNext(T message) {
625+
messages.add(message);
626+
}
627+
628+
@Override
629+
public void onError(Throwable t) {
630+
future.setException(t);
631+
}
632+
633+
@Override
634+
public void onCompleted() {
635+
future.set(messages);
636+
}
637+
638+
// Returns the SettableFuture object to get received messages / exceptions.
639+
public SettableFuture<List<T>> future() {
640+
return future;
641+
}
642+
}
643+
644+
ResponseApiStreamingObserver<StreamingRecognizeResponse> responseObserver =
645+
new ResponseApiStreamingObserver<>();
646+
647+
BidiStreamingCallable<StreamingRecognizeRequest, StreamingRecognizeResponse> callable =
648+
speech.streamingRecognizeCallable();
649+
650+
ApiStreamObserver<StreamingRecognizeRequest> requestObserver =
651+
callable.bidiStreamingCall(responseObserver);
652+
653+
// The first request must **only** contain the audio configuration:
654+
requestObserver.onNext(StreamingRecognizeRequest.newBuilder()
655+
.setStreamingConfig(config)
656+
.build());
657+
658+
// Subsequent requests must **only** contain the audio data.
659+
requestObserver.onNext(StreamingRecognizeRequest.newBuilder()
660+
.setAudioContent(ByteString.copyFrom(data))
661+
.build());
662+
663+
// Mark transmission as completed after sending the data.
664+
requestObserver.onCompleted();
665+
666+
List<StreamingRecognizeResponse> responses = responseObserver.future().get();
667+
668+
for (StreamingRecognizeResponse response : responses) {
669+
// For streaming recognize, the results list has one is_final result (if available) followed
670+
// by a number of in-progress results (if iterim_results is true) for subsequent utterances.
671+
// Just print the first result here.
672+
StreamingRecognitionResult result = response.getResultsList().get(0);
673+
// There can be several alternative transcripts for a given chunk of speech. Just use the
674+
// first (most likely) one here.
675+
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
676+
System.out.printf("Transcript : %s\n", alternative.getTranscript());
677+
}
678+
}
679+
}
680+
// [END speech_stream_recognize_punctuation]
500681
}

speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java

+21
Original file line numberDiff line numberDiff line change
@@ -124,4 +124,25 @@ public void testGcsModelSelection() throws Exception {
124124
assertThat(got).contains("OK Google");
125125
assertThat(got).contains("the weather outside is sunny");
126126
}
127+
128+
@Test
129+
public void testAutoPunctuation() throws Exception {
130+
Recognize.transcribeFileWithAutomaticPunctuation(audioFileName);
131+
String got = bout.toString();
132+
assertThat(got).contains("How old is the Brooklyn Bridge?");
133+
}
134+
135+
@Test
136+
public void testGcsAutoPunctuation() throws Exception {
137+
Recognize.transcribeGcsWithAutomaticPunctuation(gcsAudioPath);
138+
String got = bout.toString();
139+
assertThat(got).contains("How old is the Brooklyn Bridge?");
140+
}
141+
142+
@Test
143+
public void testStreamAutoPunctuation() throws Exception {
144+
Recognize.streamingTranscribeWithAutomaticPunctuation(audioFileName);
145+
String got = bout.toString();
146+
assertThat(got).contains("How old is the Brooklyn Bridge?");
147+
}
127148
}

0 commit comments

Comments
 (0)