Skip to content

Commit c0210e2

Browse files
authored
Added command line option class + option to pass different lang code as argument (#1504)
* added different language option for other than english * changed it to english * timestamp change * changed millisecond format to min:sec * Revert "timestamp change" This reverts commit df21c30. * removed indent issues * made requested changes
1 parent 5a53097 commit c0210e2

File tree

4 files changed

+109
-28
lines changed

4 files changed

+109
-28
lines changed

speech/cloud-client/README.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,11 @@ mvn exec:java -DRecognize -Dexec.args="word-level-conf gs://cloud-samples-tests/
165165
```
166166

167167
## Infinite Streaming
168-
Continuously stream audio to the speech API over multiple requests
168+
Continuously stream audio to the speech API over multiple requests (by default en-US).
169169
```
170170
mvn exec:java -DInfiniteStreamRecognize
171171
```
172+
If stream audio is in different language, you could also pass language code as a command line argument (for example, en-GB for english (Great Britian), en-US for english U.S., and more available in [this link](https://cloud.google.com/speech-to-text/docs/languages)).
173+
```
174+
mvn exec:java -Dexec.args="-lang_code=en-US" -DInfiniteStreamRecognize
175+
```

speech/cloud-client/pom.xml

+5-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,11 @@
4343
<version>1.9.0</version>
4444
</dependency>
4545
<!-- [END speech_quickstart_dependencies] -->
46-
46+
<dependency>
47+
<groupId>commons-cli</groupId>
48+
<artifactId>commons-cli</artifactId>
49+
<version>1.3</version>
50+
</dependency>
4751
<!-- Test dependencies -->
4852
<dependency>
4953
<groupId>junit</groupId>

speech/cloud-client/src/main/java/com/example/speech/InfiniteStreamRecognize.java

+43-26
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package com.example.speech;
1818

1919
// [START speech_transcribe_infinite_streaming]
20+
2021
import com.google.api.gax.rpc.ClientStream;
2122
import com.google.api.gax.rpc.ResponseObserver;
2223
import com.google.api.gax.rpc.StreamController;
@@ -29,11 +30,13 @@
2930
import com.google.cloud.speech.v1p1beta1.StreamingRecognizeResponse;
3031
import com.google.protobuf.ByteString;
3132
import com.google.protobuf.Duration;
33+
3234
import java.lang.Math;
3335
import java.text.DecimalFormat;
3436
import java.util.ArrayList;
3537
import java.util.concurrent.BlockingQueue;
3638
import java.util.concurrent.LinkedBlockingQueue;
39+
import java.util.concurrent.TimeUnit;
3740
import javax.sound.sampled.AudioFormat;
3841
import javax.sound.sampled.AudioSystem;
3942
import javax.sound.sampled.DataLine;
@@ -54,7 +57,7 @@ public class InfiniteStreamRecognize {
5457
private static int BYTES_PER_BUFFER = 6400; // buffer size in bytes
5558

5659
private static int restartCounter = 0;
57-
private static ArrayList<ByteString> audioInput = new ArrayList<ByteString>();
60+
private static ArrayList<ByteString> audioInput = new ArrayList<ByteString>();
5861
private static ArrayList<ByteString> lastAudioInput = new ArrayList<ByteString>();
5962
private static int resultEndTimeInMS = 0;
6063
private static int isFinalEndTime = 0;
@@ -66,15 +69,33 @@ public class InfiniteStreamRecognize {
6669
private static ByteString tempByteString;
6770

6871
public static void main(String... args) {
72+
InfiniteStreamRecognizeOptions options = InfiniteStreamRecognizeOptions.fromFlags(args);
73+
if (options == null) {
74+
// Could not parse.
75+
System.out.println("Failed to parse options.");
76+
System.exit(1);
77+
}
78+
6979
try {
70-
infiniteStreamingRecognize();
80+
infiniteStreamingRecognize(options.langCode);
7181
} catch (Exception e) {
7282
System.out.println("Exception caught: " + e);
7383
}
7484
}
7585

86+
public static String convertMillisToDate(double milliSeconds) {
87+
long millis = (long) milliSeconds;
88+
DecimalFormat format = new DecimalFormat();
89+
format.setMinimumIntegerDigits(2);
90+
return String.format("%s:%s /",
91+
format.format(TimeUnit.MILLISECONDS.toMinutes(millis)),
92+
format.format(TimeUnit.MILLISECONDS.toSeconds(millis)
93+
- TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(millis)))
94+
);
95+
}
96+
7697
/** Performs infinite streaming speech recognition */
77-
public static void infiniteStreamingRecognize() throws Exception {
98+
public static void infiniteStreamingRecognize(String languageCode) throws Exception {
7899

79100
// Microphone Input buffering
80101
class MicBuffer implements Runnable {
@@ -115,45 +136,41 @@ public void onStart(StreamController controller) {
115136
}
116137

117138
public void onResponse(StreamingRecognizeResponse response) {
118-
119139
responses.add(response);
120-
121140
StreamingRecognitionResult result = response.getResultsList().get(0);
122-
123141
Duration resultEndTime = result.getResultEndTime();
124-
125142
resultEndTimeInMS = (int) ((resultEndTime.getSeconds() * 1000)
126-
+ (resultEndTime.getNanos() / 1000000));
127-
143+
+ (resultEndTime.getNanos() / 1000000));
128144
double correctedTime = resultEndTimeInMS - bridgingOffset
129-
+ (STREAMING_LIMIT * restartCounter);
130-
DecimalFormat format = new DecimalFormat("0.#");
145+
+ (STREAMING_LIMIT * restartCounter);
131146

132147
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
133148
if (result.getIsFinal()) {
134149
System.out.print(GREEN);
135150
System.out.print("\033[2K\r");
136-
System.out.printf("%s: %s\n", format.format(correctedTime),
137-
alternative.getTranscript());
138-
151+
System.out.printf("%s: %s [confidence: %.2f]\n",
152+
convertMillisToDate(correctedTime),
153+
alternative.getTranscript(),
154+
alternative.getConfidence()
155+
);
139156
isFinalEndTime = resultEndTimeInMS;
140157
lastTranscriptWasFinal = true;
141158
} else {
142159
System.out.print(RED);
143160
System.out.print("\033[2K\r");
144-
System.out.printf("%s: %s", format.format(correctedTime),
145-
alternative.getTranscript());
146-
161+
System.out.printf("%s: %s", convertMillisToDate(correctedTime),
162+
alternative.getTranscript()
163+
);
147164
lastTranscriptWasFinal = false;
148165
}
149166
}
150167

151-
public void onComplete() {}
152-
153-
public void onError(Throwable t) {}
168+
public void onComplete() {
169+
}
154170

171+
public void onError(Throwable t) {
172+
}
155173
};
156-
157174
clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
158175

159176
RecognitionConfig recognitionConfig =
@@ -227,8 +244,8 @@ public void onError(Throwable t) {}
227244

228245
request =
229246
StreamingRecognizeRequest.newBuilder()
230-
.setStreamingConfig(streamingRecognitionConfig)
231-
.build();
247+
.setStreamingConfig(streamingRecognitionConfig)
248+
.build();
232249

233250
System.out.println(YELLOW);
234251
System.out.printf("%d: RESTARTING REQUEST\n", restartCounter * STREAMING_LIMIT);
@@ -253,13 +270,12 @@ public void onError(Throwable t) {}
253270
bridgingOffset = finalRequestEndTime;
254271
}
255272
int chunksFromMS = (int) Math.floor((finalRequestEndTime
256-
- bridgingOffset) / chunkTime);
273+
- bridgingOffset) / chunkTime);
257274
// chunks from MS is number of chunks to resend
258275
bridgingOffset = (int) Math.floor((lastAudioInput.size()
259-
- chunksFromMS) * chunkTime);
276+
- chunksFromMS) * chunkTime);
260277
// set bridging offset for next request
261278
for (int i = chunksFromMS; i < lastAudioInput.size(); i++) {
262-
263279
request =
264280
StreamingRecognizeRequest.newBuilder()
265281
.setAudioContent(lastAudioInput.get(i))
@@ -288,5 +304,6 @@ public void onError(Throwable t) {}
288304
}
289305
}
290306
}
307+
291308
}
292309
// [END speech_transcribe_infinite_streaming]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* Copyright 2019 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.speech;
18+
19+
import org.apache.commons.cli.CommandLine;
20+
import org.apache.commons.cli.CommandLineParser;
21+
import org.apache.commons.cli.DefaultParser;
22+
import org.apache.commons.cli.Option;
23+
import org.apache.commons.cli.Options;
24+
import org.apache.commons.cli.ParseException;
25+
26+
public class InfiniteStreamRecognizeOptions {
27+
String langCode = "en-US"; //by default english US
28+
29+
/** Construct an InfiniteStreamRecognizeOptions class from command line flags. */
30+
public static InfiniteStreamRecognizeOptions fromFlags(String[] args) {
31+
Options options = new Options();
32+
options.addOption(
33+
Option.builder()
34+
.type(String.class)
35+
.longOpt("lang_code")
36+
.hasArg()
37+
.desc("Language code")
38+
.build());
39+
40+
CommandLineParser parser = new DefaultParser();
41+
CommandLine commandLine;
42+
try {
43+
commandLine = parser.parse(options, args);
44+
InfiniteStreamRecognizeOptions res = new InfiniteStreamRecognizeOptions();
45+
46+
if (commandLine.hasOption("lang_code")) {
47+
res.langCode = commandLine.getOptionValue("lang_code");
48+
}
49+
return res;
50+
} catch (ParseException e) {
51+
System.err.println(e.getMessage());
52+
return null;
53+
}
54+
}
55+
56+
}

0 commit comments

Comments
 (0)