Skip to content

Commit fc65abd

Browse files
nirupa-kumarchingor13
authored andcommitted
samples: [DO_NOT_MERGE] Microphone streaming with a 1 minute duration. (#1185)
* Microphone streaming with a 1 minute duration. * Fixed audit issues. * Fixing issues after review. * Fixing review issues.
1 parent 7c5b71f commit fc65abd

File tree

1 file changed

+113
-7
lines changed

1 file changed

+113
-7
lines changed

speech/snippets/src/main/java/com/example/speech/Recognize.java

+113-7
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
import com.google.api.gax.longrunning.OperationFuture;
2020
import com.google.api.gax.rpc.ApiStreamObserver;
2121
import com.google.api.gax.rpc.BidiStreamingCallable;
22+
import com.google.api.gax.rpc.ClientStream;
23+
import com.google.api.gax.rpc.ResponseObserver;
24+
import com.google.api.gax.rpc.StreamController;
2225
import com.google.cloud.speech.v1p1beta1.LongRunningRecognizeMetadata;
2326
import com.google.cloud.speech.v1p1beta1.LongRunningRecognizeResponse;
2427
import com.google.cloud.speech.v1p1beta1.RecognitionAudio;
@@ -47,6 +50,13 @@
4750
import java.util.ArrayList;
4851
import java.util.List;
4952

53+
import javax.sound.sampled.AudioFormat;
54+
import javax.sound.sampled.AudioInputStream;
55+
import javax.sound.sampled.AudioSystem;
56+
import javax.sound.sampled.DataLine;
57+
import javax.sound.sampled.DataLine.Info;
58+
import javax.sound.sampled.TargetDataLine;
59+
5060
public class Recognize {
5161

5262
/** Run speech recognition tasks. */
@@ -56,9 +66,10 @@ public static void main(String... args) throws Exception {
5666
System.out.printf(
5767
"\tjava %s \"<command>\" \"<path-to-image>\"\n"
5868
+ "Commands:\n"
59-
+ "\tsyncrecognize | asyncrecognize | streamrecognize | wordoffsets\n"
60-
+ "\t| model-selection | auto-punctuation | stream-punctuation | enhanced-model\n"
61-
+ "\t| metadata | diarization | multi-channel | multi-language | word-level-conf"
69+
+ "\tsyncrecognize | asyncrecognize | streamrecognize | micstreamrecognize \n"
70+
+ "\t| wordoffsets | model-selection | auto-punctuation | stream-punctuation \n"
71+
+ "\t| enhanced-model| metadata | diarization | multi-channel | multi-language \n"
72+
+ "\t | word-level-conf"
6273
+ "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
6374
+ "for a Cloud Storage resource (gs://...)\n",
6475
Recognize.class.getCanonicalName());
@@ -88,6 +99,8 @@ public static void main(String... args) throws Exception {
8899
}
89100
} else if (command.equals("streamrecognize")) {
90101
streamingRecognizeFile(path);
102+
} else if (command.equals("micstreamrecognize")) {
103+
streamingMicRecognize();
91104
} else if (command.equals("model-selection")) {
92105
if (path.startsWith("gs://")) {
93106
transcribeModelSelectionGcs(path);
@@ -704,6 +717,97 @@ public SettableFuture<List<T>> future() {
704717
}
705718
// [END speech_stream_recognize_punctuation]
706719

720+
// [START speech_streaming_mic_recognize]
721+
/** Performs microphone streaming speech recognition with a duration of 1 minute. */
722+
public static void streamingMicRecognize() throws Exception {
723+
724+
ResponseObserver<StreamingRecognizeResponse> responseObserver = null;
725+
try (SpeechClient client = SpeechClient.create()) {
726+
727+
responseObserver =
728+
new ResponseObserver<StreamingRecognizeResponse>() {
729+
ArrayList<StreamingRecognizeResponse> responses = new ArrayList<>();
730+
731+
public void onStart(StreamController controller) {}
732+
733+
public void onResponse(StreamingRecognizeResponse response) {
734+
responses.add(response);
735+
}
736+
737+
public void onComplete() {
738+
for (StreamingRecognizeResponse response : responses) {
739+
StreamingRecognitionResult result = response.getResultsList().get(0);
740+
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
741+
System.out.printf("Transcript : %s\n", alternative.getTranscript());
742+
}
743+
}
744+
745+
public void onError(Throwable t) {
746+
System.out.println(t);
747+
}
748+
};
749+
750+
ClientStream<StreamingRecognizeRequest> clientStream =
751+
client.streamingRecognizeCallable().splitCall(responseObserver);
752+
753+
RecognitionConfig recognitionConfig =
754+
RecognitionConfig.newBuilder()
755+
.setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
756+
.setLanguageCode("en-US")
757+
.setSampleRateHertz(16000)
758+
.build();
759+
StreamingRecognitionConfig streamingRecognitionConfig =
760+
StreamingRecognitionConfig.newBuilder().setConfig(recognitionConfig).build();
761+
762+
StreamingRecognizeRequest request =
763+
StreamingRecognizeRequest.newBuilder()
764+
.setStreamingConfig(streamingRecognitionConfig)
765+
.build(); // The first request in a streaming call has to be a config
766+
767+
clientStream.send(request);
768+
// SampleRate:16000Hz, SampleSizeInBits: 16, Number of channels: 1, Signed: true,
769+
// bigEndian: false
770+
AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false);
771+
DataLine.Info targetInfo =
772+
new Info(
773+
TargetDataLine.class,
774+
audioFormat); // Set the system information to read from the microphone audio stream
775+
776+
if (!AudioSystem.isLineSupported(targetInfo)) {
777+
System.out.println("Microphone not supported");
778+
System.exit(0);
779+
}
780+
// Target data line captures the audio stream the microphone produces.
781+
TargetDataLine targetDataLine = (TargetDataLine) AudioSystem.getLine(targetInfo);
782+
targetDataLine.open(audioFormat);
783+
targetDataLine.start();
784+
System.out.println("Start speaking");
785+
long startTime = System.currentTimeMillis();
786+
// Audio Input Stream
787+
AudioInputStream audio = new AudioInputStream(targetDataLine);
788+
while (true) {
789+
long estimatedTime = System.currentTimeMillis() - startTime;
790+
byte[] data = new byte[6400];
791+
audio.read(data);
792+
if (estimatedTime > 60000) { // 60 seconds
793+
System.out.println("Stop speaking.");
794+
targetDataLine.stop();
795+
targetDataLine.close();
796+
break;
797+
}
798+
request =
799+
StreamingRecognizeRequest.newBuilder()
800+
.setAudioContent(ByteString.copyFrom(data))
801+
.build();
802+
clientStream.send(request);
803+
}
804+
} catch (Exception e) {
805+
System.out.println(e);
806+
}
807+
responseObserver.onComplete();
808+
}
809+
// [END speech_streaming_mic_recognize]
810+
707811
// [START speech_transcribe_file_with_enhanced_model]
708812
/**
709813
* Transcribe the given audio file using an enhanced model.
@@ -833,8 +937,9 @@ public static void transcribeDiarization(String fileName) throws Exception {
833937
SpeechRecognitionAlternative alternative = result.getAlternatives(0);
834938
System.out.format("Transcript : %s\n", alternative.getTranscript());
835939
// The words array contains the entire transcript up until that point.
836-
//Referencing the last spoken word to get the associated Speaker tag
837-
System.out.format("Speaker Tag %s: %s\n",
940+
// Referencing the last spoken word to get the associated Speaker tag
941+
System.out.format(
942+
"Speaker Tag %s: %s\n",
838943
alternative.getWords((alternative.getWordsCount() - 1)).getSpeakerTag(),
839944
alternative.getTranscript());
840945
}
@@ -877,8 +982,9 @@ public static void transcribeDiarizationGcs(String gcsUri) throws Exception {
877982
// use the first (most likely) one here.
878983
SpeechRecognitionAlternative alternative = result.getAlternatives(0);
879984
// The words array contains the entire transcript up until that point.
880-
//Referencing the last spoken word to get the associated Speaker tag
881-
System.out.format("Speaker Tag %s:%s\n",
985+
// Referencing the last spoken word to get the associated Speaker tag
986+
System.out.format(
987+
"Speaker Tag %s:%s\n",
882988
alternative.getWords((alternative.getWordsCount() - 1)).getSpeakerTag(),
883989
alternative.getTranscript());
884990
}

0 commit comments

Comments
 (0)