17
17
package com .example .speech ;
18
18
19
19
// [START speech_transcribe_infinite_streaming]
20
+
20
21
import com .google .api .gax .rpc .ClientStream ;
21
22
import com .google .api .gax .rpc .ResponseObserver ;
22
23
import com .google .api .gax .rpc .StreamController ;
29
30
import com .google .cloud .speech .v1p1beta1 .StreamingRecognizeResponse ;
30
31
import com .google .protobuf .ByteString ;
31
32
import com .google .protobuf .Duration ;
33
+
32
34
import java .lang .Math ;
33
35
import java .text .DecimalFormat ;
34
36
import java .util .ArrayList ;
35
37
import java .util .concurrent .BlockingQueue ;
36
38
import java .util .concurrent .LinkedBlockingQueue ;
39
+ import java .util .concurrent .TimeUnit ;
37
40
import javax .sound .sampled .AudioFormat ;
38
41
import javax .sound .sampled .AudioSystem ;
39
42
import javax .sound .sampled .DataLine ;
@@ -54,7 +57,7 @@ public class InfiniteStreamRecognize {
54
57
private static int BYTES_PER_BUFFER = 6400 ; // buffer size in bytes
55
58
56
59
private static int restartCounter = 0 ;
57
- private static ArrayList <ByteString > audioInput = new ArrayList <ByteString >();
60
+ private static ArrayList <ByteString > audioInput = new ArrayList <ByteString >();
58
61
private static ArrayList <ByteString > lastAudioInput = new ArrayList <ByteString >();
59
62
private static int resultEndTimeInMS = 0 ;
60
63
private static int isFinalEndTime = 0 ;
@@ -66,15 +69,33 @@ public class InfiniteStreamRecognize {
66
69
private static ByteString tempByteString ;
67
70
68
71
public static void main (String ... args ) {
72
+ InfiniteStreamRecognizeOptions options = InfiniteStreamRecognizeOptions .fromFlags (args );
73
+ if (options == null ) {
74
+ // Could not parse.
75
+ System .out .println ("Failed to parse options." );
76
+ System .exit (1 );
77
+ }
78
+
69
79
try {
70
- infiniteStreamingRecognize ();
80
+ infiniteStreamingRecognize (options . langCode );
71
81
} catch (Exception e ) {
72
82
System .out .println ("Exception caught: " + e );
73
83
}
74
84
}
75
85
86
+ public static String convertMillisToDate (double milliSeconds ) {
87
+ long millis = (long ) milliSeconds ;
88
+ DecimalFormat format = new DecimalFormat ();
89
+ format .setMinimumIntegerDigits (2 );
90
+ return String .format ("%s:%s /" ,
91
+ format .format (TimeUnit .MILLISECONDS .toMinutes (millis )),
92
+ format .format (TimeUnit .MILLISECONDS .toSeconds (millis )
93
+ - TimeUnit .MINUTES .toSeconds (TimeUnit .MILLISECONDS .toMinutes (millis )))
94
+ );
95
+ }
96
+
76
97
/** Performs infinite streaming speech recognition */
77
- public static void infiniteStreamingRecognize () throws Exception {
98
+ public static void infiniteStreamingRecognize (String languageCode ) throws Exception {
78
99
79
100
// Microphone Input buffering
80
101
class MicBuffer implements Runnable {
@@ -115,45 +136,41 @@ public void onStart(StreamController controller) {
115
136
}
116
137
117
138
public void onResponse (StreamingRecognizeResponse response ) {
118
-
119
139
responses .add (response );
120
-
121
140
StreamingRecognitionResult result = response .getResultsList ().get (0 );
122
-
123
141
Duration resultEndTime = result .getResultEndTime ();
124
-
125
142
resultEndTimeInMS = (int ) ((resultEndTime .getSeconds () * 1000 )
126
- + (resultEndTime .getNanos () / 1000000 ));
127
-
143
+ + (resultEndTime .getNanos () / 1000000 ));
128
144
double correctedTime = resultEndTimeInMS - bridgingOffset
129
- + (STREAMING_LIMIT * restartCounter );
130
- DecimalFormat format = new DecimalFormat ("0.#" );
145
+ + (STREAMING_LIMIT * restartCounter );
131
146
132
147
SpeechRecognitionAlternative alternative = result .getAlternativesList ().get (0 );
133
148
if (result .getIsFinal ()) {
134
149
System .out .print (GREEN );
135
150
System .out .print ("\033 [2K\r " );
136
- System .out .printf ("%s: %s\n " , format .format (correctedTime ),
137
- alternative .getTranscript ());
138
-
151
+ System .out .printf ("%s: %s [confidence: %.2f]\n " ,
152
+ convertMillisToDate (correctedTime ),
153
+ alternative .getTranscript (),
154
+ alternative .getConfidence ()
155
+ );
139
156
isFinalEndTime = resultEndTimeInMS ;
140
157
lastTranscriptWasFinal = true ;
141
158
} else {
142
159
System .out .print (RED );
143
160
System .out .print ("\033 [2K\r " );
144
- System .out .printf ("%s: %s" , format . format (correctedTime ),
145
- alternative .getTranscript ());
146
-
161
+ System .out .printf ("%s: %s" , convertMillisToDate (correctedTime ),
162
+ alternative .getTranscript ()
163
+ );
147
164
lastTranscriptWasFinal = false ;
148
165
}
149
166
}
150
167
151
- public void onComplete () {}
152
-
153
- public void onError (Throwable t ) {}
168
+ public void onComplete () {
169
+ }
154
170
171
+ public void onError (Throwable t ) {
172
+ }
155
173
};
156
-
157
174
clientStream = client .streamingRecognizeCallable ().splitCall (responseObserver );
158
175
159
176
RecognitionConfig recognitionConfig =
@@ -227,8 +244,8 @@ public void onError(Throwable t) {}
227
244
228
245
request =
229
246
StreamingRecognizeRequest .newBuilder ()
230
- .setStreamingConfig (streamingRecognitionConfig )
231
- .build ();
247
+ .setStreamingConfig (streamingRecognitionConfig )
248
+ .build ();
232
249
233
250
System .out .println (YELLOW );
234
251
System .out .printf ("%d: RESTARTING REQUEST\n " , restartCounter * STREAMING_LIMIT );
@@ -253,13 +270,12 @@ public void onError(Throwable t) {}
253
270
bridgingOffset = finalRequestEndTime ;
254
271
}
255
272
int chunksFromMS = (int ) Math .floor ((finalRequestEndTime
256
- - bridgingOffset ) / chunkTime );
273
+ - bridgingOffset ) / chunkTime );
257
274
// chunks from MS is number of chunks to resend
258
275
bridgingOffset = (int ) Math .floor ((lastAudioInput .size ()
259
- - chunksFromMS ) * chunkTime );
276
+ - chunksFromMS ) * chunkTime );
260
277
// set bridging offset for next request
261
278
for (int i = chunksFromMS ; i < lastAudioInput .size (); i ++) {
262
-
263
279
request =
264
280
StreamingRecognizeRequest .newBuilder ()
265
281
.setAudioContent (lastAudioInput .get (i ))
@@ -288,5 +304,6 @@ public void onError(Throwable t) {}
288
304
}
289
305
}
290
306
}
307
+
291
308
}
292
309
// [END speech_transcribe_infinite_streaming]
0 commit comments