Skip to content

Commit f9ef990

Browse files
chore(samples): adds model adaptation sample (#468)
1 parent e5fb373 commit f9ef990

File tree

2 files changed

+227
-0
lines changed

2 files changed

+227
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
/*
2+
* Copyright 2021 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.speech;
18+
19+
// [START speech_transcribe_with_model_adaptation_gcs_beta]
20+
import com.google.api.gax.rpc.ApiException;
21+
import com.google.cloud.speech.v1p1beta1.AdaptationClient;
22+
import com.google.cloud.speech.v1p1beta1.CreateCustomClassRequest;
23+
import com.google.cloud.speech.v1p1beta1.CreatePhraseSetRequest;
24+
import com.google.cloud.speech.v1p1beta1.CustomClass;
25+
import com.google.cloud.speech.v1p1beta1.CustomClass.ClassItem;
26+
import com.google.cloud.speech.v1p1beta1.LocationName;
27+
import com.google.cloud.speech.v1p1beta1.PhraseSet;
28+
import com.google.cloud.speech.v1p1beta1.PhraseSet.Phrase;
29+
import com.google.cloud.speech.v1p1beta1.RecognitionAudio;
30+
import com.google.cloud.speech.v1p1beta1.RecognitionConfig;
31+
import com.google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding;
32+
import com.google.cloud.speech.v1p1beta1.RecognizeResponse;
33+
import com.google.cloud.speech.v1p1beta1.SpeechAdaptation;
34+
import com.google.cloud.speech.v1p1beta1.SpeechClient;
35+
import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative;
36+
import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult;
37+
import java.util.List;
38+
39+
public class SpeechModelAdaptationBeta {
40+
41+
public static void main(String[] args) throws Exception {
42+
String uriPath = "gs://cloud-samples-tests/speech/brooklyn.flac";
43+
String projectId = "YOUR_PROJECT_ID";
44+
// Region e.g. us-west1
45+
String location = "LOCATION_REGION";
46+
// This value should be 4-63 characters, and valid characters are /[a-z][0-9]-/.
47+
String customClassId = "YOUR-CUSTOM-CLASS-ID";
48+
// This value should be 4-63 characters, and valid characters are /[a-z][0-9]-/.
49+
String phraseSetId = "YOUR-PHRASE-SET-ID";
50+
transcribeWithModelAdaptation(projectId, location, uriPath, customClassId, phraseSetId);
51+
}
52+
53+
/**
54+
* Transcribe with model adaptation
55+
*
56+
* @param projectId your project id
57+
* @param location the region
58+
* @param gcsUri the path to the audio file
59+
*/
60+
public static void transcribeWithModelAdaptation(
61+
String projectId, String location, String gcsUri, String customClassId, String phraseSetId)
62+
throws Exception {
63+
// Initialize client that will be used to send requests. This client only needs to be created
64+
// once, and can be reused for multiple requests. After completing all of your requests, call
65+
// the "close" method on the client to safely clean up any remaining background resources.
66+
try (AdaptationClient adaptationClient = AdaptationClient.create()) {
67+
68+
// Create `PhraseSet` and `CustomClasses` to create custom lists of similar
69+
// items that are likely to occur in your input data.
70+
71+
// The parent resource where the custom class and phrase set will be created.
72+
LocationName parent = LocationName.of(projectId, location);
73+
74+
// Create the custom class
75+
CreateCustomClassRequest classRequest =
76+
CreateCustomClassRequest.newBuilder()
77+
.setParent(parent.toString())
78+
.setCustomClassId(customClassId)
79+
.setCustomClass(
80+
CustomClass.newBuilder()
81+
.addItems(ClassItem.newBuilder().setValue("sushido"))
82+
.addItems(ClassItem.newBuilder().setValue("altura"))
83+
.addItems(ClassItem.newBuilder().setValue("taneda"))
84+
.build())
85+
.build();
86+
CustomClass classResponse = adaptationClient.createCustomClass(classRequest);
87+
88+
// Create the phrase set
89+
CreatePhraseSetRequest phraseRequest =
90+
CreatePhraseSetRequest.newBuilder()
91+
.setParent(parent.toString())
92+
.setPhraseSetId(phraseSetId)
93+
.setPhraseSet(
94+
PhraseSet.newBuilder()
95+
.setBoost(10)
96+
.addPhrases(
97+
Phrase.newBuilder()
98+
.setValue(
99+
String.format("Visit restaurants like %s%n", customClassId)))
100+
.build())
101+
.build();
102+
PhraseSet phraseResponse = adaptationClient.createPhraseSet(phraseRequest);
103+
104+
// Next section shows how to use the newly created custom class and phrase set
105+
// to send a transcription request with speech adaptation
106+
107+
// Speech adaptation configuration
108+
SpeechAdaptation speechAdaptation =
109+
SpeechAdaptation.newBuilder()
110+
.addCustomClasses(classResponse)
111+
.addPhraseSets(phraseResponse)
112+
.build();
113+
114+
// Initialize client that will be used to send requests. This client only needs to be created
115+
// once, and can be reused for multiple requests. After completing all of your requests, call
116+
// the "close" method on the client to safely clean up any remaining background resources.
117+
try (SpeechClient speechClient = SpeechClient.create()) {
118+
119+
// The path to the audio file to transcribe
120+
// gcsUri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
121+
122+
// Builds the sync recognize request
123+
RecognitionConfig config =
124+
RecognitionConfig.newBuilder()
125+
.setEncoding(AudioEncoding.FLAC)
126+
.setSampleRateHertz(16000)
127+
.setLanguageCode("en-US")
128+
.setAdaptation(speechAdaptation) // Set the adaptation object
129+
.build();
130+
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
131+
132+
// Performs speech recognition on the audio file.
133+
RecognizeResponse response = speechClient.recognize(config, audio);
134+
List<SpeechRecognitionResult> results = response.getResultsList();
135+
136+
for (SpeechRecognitionResult result : results) {
137+
// There can be several alternative transcripts for a given chunk of speech. Just use the
138+
// first (most likely) one here.
139+
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
140+
System.out.printf("Adapted Transcription: %s%n", alternative.getTranscript());
141+
}
142+
}
143+
} catch (ApiException e) {
144+
System.out.println("Client Interaction Error: \n" + e.toString());
145+
}
146+
}
147+
}
148+
// [END speech_transcribe_with_model_adaptation_gcs_beta]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/*
2+
* Copyright 2021 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.speech;
18+
19+
import static com.google.common.truth.Truth.assertThat;
20+
21+
import com.google.cloud.speech.v1p1beta1.AdaptationClient;
22+
import com.google.cloud.speech.v1p1beta1.CustomClassName;
23+
import com.google.cloud.speech.v1p1beta1.DeleteCustomClassRequest;
24+
import com.google.cloud.speech.v1p1beta1.DeletePhraseSetRequest;
25+
import com.google.cloud.speech.v1p1beta1.PhraseSetName;
26+
import java.io.ByteArrayOutputStream;
27+
import java.io.PrintStream;
28+
import java.util.UUID;
29+
import org.junit.After;
30+
import org.junit.Before;
31+
import org.junit.Test;
32+
33+
public class SpeechModelAdaptationBetaTest {
34+
private static String UNIQUE_ID = UUID.randomUUID().toString();
35+
private static final String AUDIO_FILE = "gs://cloud-samples-tests/speech/brooklyn.flac";
36+
private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT");
37+
private static final String LOCATION = "us-west1";
38+
private static final String CUSTOM_CLASS_ID = "customClassId" + UNIQUE_ID;
39+
private static final String PHRASE_SET_ID = "phraseSetId" + UNIQUE_ID;
40+
private ByteArrayOutputStream bout;
41+
private PrintStream stdout;
42+
private PrintStream out;
43+
44+
@Before
45+
public void setUp() {
46+
bout = new ByteArrayOutputStream();
47+
out = new PrintStream(bout);
48+
stdout = System.out;
49+
System.setOut(out);
50+
}
51+
52+
@After
53+
public void tearDown() throws Exception {
54+
System.setOut(stdout);
55+
try (AdaptationClient adaptationClient = AdaptationClient.create()) {
56+
// clean up resources
57+
DeleteCustomClassRequest customClassDeleteRequest =
58+
DeleteCustomClassRequest.newBuilder()
59+
.setName(CustomClassName.of(PROJECT_ID, LOCATION, CUSTOM_CLASS_ID).toString())
60+
.build();
61+
adaptationClient.deleteCustomClass(customClassDeleteRequest);
62+
63+
// clean up resources
64+
DeletePhraseSetRequest phraseSetDeleteRequest =
65+
DeletePhraseSetRequest.newBuilder()
66+
.setName(PhraseSetName.of(PROJECT_ID, LOCATION, PHRASE_SET_ID).toString())
67+
.build();
68+
adaptationClient.deletePhraseSet(phraseSetDeleteRequest);
69+
}
70+
}
71+
72+
@Test
73+
public void testSpeechModelAdaptationBeta() throws Exception {
74+
SpeechModelAdaptationBeta.transcribeWithModelAdaptation(
75+
PROJECT_ID, LOCATION, AUDIO_FILE, CUSTOM_CLASS_ID, PHRASE_SET_ID);
76+
String got = bout.toString();
77+
assertThat(got).contains("Adapted Transcription:");
78+
}
79+
}

0 commit comments

Comments
 (0)