29
29
import com .google .cloud .speech .v1p1beta1 .RecognizeResponse ;
30
30
import com .google .cloud .speech .v1p1beta1 .SpeakerDiarizationConfig ;
31
31
import com .google .cloud .speech .v1p1beta1 .SpeechClient ;
32
-
33
32
import com .google .cloud .speech .v1p1beta1 .SpeechRecognitionAlternative ;
34
33
import com .google .cloud .speech .v1p1beta1 .SpeechRecognitionResult ;
35
34
import com .google .cloud .speech .v1p1beta1 .WordInfo ;
36
35
import com .google .protobuf .ByteString ;
37
-
38
36
import java .nio .file .Files ;
39
37
import java .nio .file .Path ;
40
38
import java .nio .file .Paths ;
@@ -156,14 +154,16 @@ public static void transcribeDiarization(String fileName) throws Exception {
156
154
RecognitionAudio recognitionAudio =
157
155
RecognitionAudio .newBuilder ().setContent (ByteString .copyFrom (content )).build ();
158
156
159
- SpeakerDiarizationConfig speakerDiarizationConfig = SpeakerDiarizationConfig .newBuilder ()
157
+ SpeakerDiarizationConfig speakerDiarizationConfig =
158
+ SpeakerDiarizationConfig .newBuilder ()
160
159
.setEnableSpeakerDiarization (true )
161
160
.setMinSpeakerCount (2 )
162
161
.setMaxSpeakerCount (2 )
163
162
.build ();
164
163
165
164
// Configure request to enable Speaker diarization
166
- RecognitionConfig config = RecognitionConfig .newBuilder ()
165
+ RecognitionConfig config =
166
+ RecognitionConfig .newBuilder ()
167
167
.setEncoding (AudioEncoding .LINEAR16 )
168
168
.setLanguageCode ("en-US" )
169
169
.setSampleRateHertz (8000 )
@@ -175,16 +175,16 @@ public static void transcribeDiarization(String fileName) throws Exception {
175
175
176
176
// Speaker Tags are only included in the last result object, which has only one alternative.
177
177
SpeechRecognitionAlternative alternative =
178
- recognizeResponse .getResults (
179
- recognizeResponse .getResultsCount () - 1 ).getAlternatives (0 );
178
+ recognizeResponse .getResults (recognizeResponse .getResultsCount () - 1 ).getAlternatives (0 );
180
179
181
180
// The alternative is made up of WordInfo objects that contain the speaker_tag.
182
181
WordInfo wordInfo = alternative .getWords (0 );
183
182
int currentSpeakerTag = wordInfo .getSpeakerTag ();
184
183
185
184
// For each word, get all the words associated with one speaker, once the speaker changes,
186
185
// add a new line with the new speaker and their spoken words.
187
- StringBuilder speakerWords = new StringBuilder (
186
+ StringBuilder speakerWords =
187
+ new StringBuilder (
188
188
String .format ("Speaker %d: %s" , wordInfo .getSpeakerTag (), wordInfo .getWord ()));
189
189
190
190
for (int i = 1 ; i < alternative .getWordsCount (); i ++) {
@@ -194,9 +194,7 @@ public static void transcribeDiarization(String fileName) throws Exception {
194
194
speakerWords .append (wordInfo .getWord ());
195
195
} else {
196
196
speakerWords .append (
197
- String .format ("\n Speaker %d: %s" ,
198
- wordInfo .getSpeakerTag (),
199
- wordInfo .getWord ()));
197
+ String .format ("\n Speaker %d: %s" , wordInfo .getSpeakerTag (), wordInfo .getWord ()));
200
198
currentSpeakerTag = wordInfo .getSpeakerTag ();
201
199
}
202
200
}
@@ -214,7 +212,8 @@ public static void transcribeDiarization(String fileName) throws Exception {
214
212
*/
215
213
public static void transcribeDiarizationGcs (String gcsUri ) throws Exception {
216
214
try (SpeechClient speechClient = SpeechClient .create ()) {
217
- SpeakerDiarizationConfig speakerDiarizationConfig = SpeakerDiarizationConfig .newBuilder ()
215
+ SpeakerDiarizationConfig speakerDiarizationConfig =
216
+ SpeakerDiarizationConfig .newBuilder ()
218
217
.setEnableSpeakerDiarization (true )
219
218
.setMinSpeakerCount (2 )
220
219
.setMaxSpeakerCount (2 )
@@ -244,17 +243,18 @@ public static void transcribeDiarizationGcs(String gcsUri) throws Exception {
244
243
// Speaker Tags are only included in the last result object, which has only one alternative.
245
244
LongRunningRecognizeResponse longRunningRecognizeResponse = response .get ();
246
245
SpeechRecognitionAlternative alternative =
247
- longRunningRecognizeResponse . getResults (
248
- longRunningRecognizeResponse .getResultsCount () - 1 )
249
- .getAlternatives (0 );
246
+ longRunningRecognizeResponse
247
+ . getResults ( longRunningRecognizeResponse .getResultsCount () - 1 )
248
+ .getAlternatives (0 );
250
249
251
250
// The alternative is made up of WordInfo objects that contain the speaker_tag.
252
251
WordInfo wordInfo = alternative .getWords (0 );
253
252
int currentSpeakerTag = wordInfo .getSpeakerTag ();
254
253
255
254
// For each word, get all the words associated with one speaker, once the speaker changes,
256
255
// add a new line with the new speaker and their spoken words.
257
- StringBuilder speakerWords = new StringBuilder (
256
+ StringBuilder speakerWords =
257
+ new StringBuilder (
258
258
String .format ("Speaker %d: %s" , wordInfo .getSpeakerTag (), wordInfo .getWord ()));
259
259
260
260
for (int i = 1 ; i < alternative .getWordsCount (); i ++) {
@@ -264,9 +264,7 @@ public static void transcribeDiarizationGcs(String gcsUri) throws Exception {
264
264
speakerWords .append (wordInfo .getWord ());
265
265
} else {
266
266
speakerWords .append (
267
- String .format ("\n Speaker %d: %s" ,
268
- wordInfo .getSpeakerTag (),
269
- wordInfo .getWord ()));
267
+ String .format ("\n Speaker %d: %s" , wordInfo .getSpeakerTag (), wordInfo .getWord ()));
270
268
currentSpeakerTag = wordInfo .getSpeakerTag ();
271
269
}
272
270
}
0 commit comments