1616
1717package com .example .speech ;
1818
19+ import com .google .api .gax .core .ApiStreamObserver ;
1920import com .google .api .gax .grpc .OperationFuture ;
20- import com .google .cloud .speech .spi .v1beta1 .SpeechClient ;
21- import com .google .cloud .speech .v1beta1 .AsyncRecognizeResponse ;
22- import com .google .cloud .speech .v1beta1 .RecognitionAudio ;
23- import com .google .cloud .speech .v1beta1 .RecognitionConfig ;
24- import com .google .cloud .speech .v1beta1 .RecognitionConfig .AudioEncoding ;
25- import com .google .cloud .speech .v1beta1 .SpeechRecognitionAlternative ;
26- import com .google .cloud .speech .v1beta1 .SpeechRecognitionResult ;
27- import com .google .cloud .speech .v1beta1 .SyncRecognizeResponse ;
21+ import com .google .api .gax .grpc .StreamingCallable ;
22+ import com .google .cloud .speech .spi .v1 .SpeechClient ;
23+ import com .google .cloud .speech .v1 .LongRunningRecognizeResponse ;
24+ import com .google .cloud .speech .v1 .RecognitionAudio ;
25+ import com .google .cloud .speech .v1 .RecognitionConfig ;
26+ import com .google .cloud .speech .v1 .RecognitionConfig .AudioEncoding ;
27+ import com .google .cloud .speech .v1 .RecognizeResponse ;
28+ import com .google .cloud .speech .v1 .SpeechRecognitionAlternative ;
29+ import com .google .cloud .speech .v1 .SpeechRecognitionResult ;
30+ import com .google .cloud .speech .v1 .StreamingRecognitionConfig ;
31+ import com .google .cloud .speech .v1 .StreamingRecognitionResult ;
32+ import com .google .cloud .speech .v1 .StreamingRecognizeRequest ;
33+ import com .google .cloud .speech .v1 .StreamingRecognizeResponse ;
34+ import com .google .common .util .concurrent .SettableFuture ;
2835import com .google .protobuf .ByteString ;
2936
3037import java .io .IOException ;
@@ -40,7 +47,7 @@ public static void main(String... args) throws Exception {
4047 System .out .printf (
4148 "\t java %s \" <command>\" \" <path-to-image>\" \n "
4249 + "Commands:\n "
43- + "\t syncrecognize | asyncrecognize\n "
50+ + "\t syncrecognize | asyncrecognize | streamrecognize \n "
4451 + "Path:\n \t A file path (ex: ./resources/audio.raw) or a URI "
4552 + "for a Cloud Storage resource (gs://...)\n " ,
4653 Recognize .class .getCanonicalName ());
@@ -62,7 +69,11 @@ public static void main(String... args) throws Exception {
6269 } else {
6370 asyncRecognizeFile (path );
6471 }
72+ } else if (command .equals ("streamrecognize" )) {
73+ streamingRecognizeFile (path );
74+ //streamingRecognizeEasy(path);
6575 }
76+
6677 }
6778
6879 /**
@@ -80,14 +91,15 @@ public static void syncRecognizeFile(String fileName) throws Exception, IOExcept
8091 // Configure request with local raw PCM audio
8192 RecognitionConfig config = RecognitionConfig .newBuilder ()
8293 .setEncoding (AudioEncoding .LINEAR16 )
83- .setSampleRate (16000 )
94+ .setLanguageCode ("en-US" )
95+ .setSampleRateHertz (16000 )
8496 .build ();
8597 RecognitionAudio audio = RecognitionAudio .newBuilder ()
8698 .setContent (audioBytes )
8799 .build ();
88100
89101 // Use blocking call to get audio transcript
90- SyncRecognizeResponse response = speech .syncRecognize (config , audio );
102+ RecognizeResponse response = speech .recognize (config , audio );
91103 List <SpeechRecognitionResult > results = response .getResultsList ();
92104
93105 for (SpeechRecognitionResult result : results ) {
@@ -111,14 +123,15 @@ public static void syncRecognizeGcs(String gcsUri) throws Exception, IOException
111123 // Builds the request for remote FLAC file
112124 RecognitionConfig config = RecognitionConfig .newBuilder ()
113125 .setEncoding (AudioEncoding .FLAC )
114- .setSampleRate (16000 )
126+ .setLanguageCode ("en-US" )
127+ .setSampleRateHertz (16000 )
115128 .build ();
116129 RecognitionAudio audio = RecognitionAudio .newBuilder ()
117130 .setUri (gcsUri )
118131 .build ();
119132
120133 // Use blocking call for getting audio transcript
121- SyncRecognizeResponse response = speech .syncRecognize (config , audio );
134+ RecognizeResponse response = speech .recognize (config , audio );
122135 List <SpeechRecognitionResult > results = response .getResultsList ();
123136
124137 for (SpeechRecognitionResult result : results ) {
@@ -130,6 +143,7 @@ public static void syncRecognizeGcs(String gcsUri) throws Exception, IOException
130143 speech .close ();
131144 }
132145
146+ /*
133147 /**
134148 * Performs non-blocking speech recognition on raw PCM audio and prints
135149 * the transcription.
@@ -147,14 +161,16 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep
147161 // Configure request with local raw PCM audio
148162 RecognitionConfig config = RecognitionConfig .newBuilder ()
149163 .setEncoding (AudioEncoding .LINEAR16 )
150- .setSampleRate (16000 )
164+ .setLanguageCode ("en-US" )
165+ .setSampleRateHertz (16000 )
151166 .build ();
152167 RecognitionAudio audio = RecognitionAudio .newBuilder ()
153168 .setContent (audioBytes )
154169 .build ();
155170
156171 // Use non-blocking call for getting file transcription
157- OperationFuture <AsyncRecognizeResponse > response = speech .asyncRecognizeAsync (config , audio );
172+ OperationFuture <LongRunningRecognizeResponse > response =
173+ speech .longRunningRecognizeAsync (config , audio );
158174 while (!response .isDone ()) {
159175 System .out .println ("Waiting for response..." );
160176 Thread .sleep (200 );
@@ -175,23 +191,25 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep
175191 * Performs non-blocking speech recognition on remote FLAC file and prints
176192 * the transcription.
177193 *
178- * @param gcsUri the path to the remote FLAC audio file to transcribe.
194+ * @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
179195 */
180196 public static void asyncRecognizeGcs (String gcsUri ) throws Exception , IOException {
181197 // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
182198 SpeechClient speech = SpeechClient .create ();
183199
184- // Configure remote file request for FLAC file
200+ // Configure remote file request for Linear16
185201 RecognitionConfig config = RecognitionConfig .newBuilder ()
186202 .setEncoding (AudioEncoding .FLAC )
187- .setSampleRate (16000 )
203+ .setLanguageCode ("en-US" )
204+ .setSampleRateHertz (16000 )
188205 .build ();
189206 RecognitionAudio audio = RecognitionAudio .newBuilder ()
190207 .setUri (gcsUri )
191208 .build ();
192209
193210 // Use non-blocking call for getting file transcription
194- OperationFuture <AsyncRecognizeResponse > response = speech .asyncRecognizeAsync (config , audio );
211+ OperationFuture <LongRunningRecognizeResponse > response =
212+ speech .longRunningRecognizeAsync (config , audio );
195213 while (!response .isDone ()) {
196214 System .out .println ("Waiting for response..." );
197215 Thread .sleep (200 );
@@ -207,4 +225,86 @@ public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOExceptio
207225 }
208226 speech .close ();
209227 }
228+
229+ /**
230+ * Performs streaming speech recognition on raw PCM audio data.
231+ *
232+ * @param fileName the path to a PCM audio file to transcribe.
233+ */
234+ public static void streamingRecognizeFile (String fileName ) throws Exception , IOException {
235+ Path path = Paths .get (fileName );
236+ byte [] data = Files .readAllBytes (path );
237+
238+ // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
239+ SpeechClient speech = SpeechClient .create ();
240+
241+ // Configure request with local raw PCM audio
242+ RecognitionConfig recConfig = RecognitionConfig .newBuilder ()
243+ .setEncoding (AudioEncoding .LINEAR16 )
244+ .setLanguageCode ("en-US" )
245+ .setSampleRateHertz (16000 )
246+ .build ();
247+ StreamingRecognitionConfig config = StreamingRecognitionConfig .newBuilder ()
248+ .setConfig (recConfig )
249+ .build ();
250+
251+ class ResponseApiStreamingObserver <T > implements ApiStreamObserver <T > {
252+ private final SettableFuture <List <T >> future = SettableFuture .create ();
253+ private final List <T > messages = new java .util .ArrayList <T >();
254+
255+ @ Override
256+ public void onNext (T message ) {
257+ messages .add (message );
258+ }
259+
260+ @ Override
261+ public void onError (Throwable t ) {
262+ future .setException (t );
263+ }
264+
265+ @ Override
266+ public void onCompleted () {
267+ future .set (messages );
268+ }
269+
270+ // Returns the SettableFuture object to get received messages / exceptions.
271+ public SettableFuture <List <T >> future () {
272+ return future ;
273+ }
274+ }
275+
276+ ResponseApiStreamingObserver <StreamingRecognizeResponse > responseObserver =
277+ new ResponseApiStreamingObserver <StreamingRecognizeResponse >();
278+
279+ StreamingCallable <StreamingRecognizeRequest ,StreamingRecognizeResponse > callable =
280+ speech .streamingRecognizeCallable ();
281+
282+ ApiStreamObserver <StreamingRecognizeRequest > requestObserver =
283+ callable .bidiStreamingCall (responseObserver );
284+
285+ // The first request must **only** contain the audio configuration:
286+ requestObserver .onNext (StreamingRecognizeRequest .newBuilder ()
287+ .setStreamingConfig (config )
288+ .build ());
289+
290+ // Subsequent requests must **only** contain the audio data.
291+ requestObserver .onNext (StreamingRecognizeRequest .newBuilder ()
292+ .setAudioContent (ByteString .copyFrom (data ))
293+ .build ());
294+
295+ // Mark transmission as completed after sending the data.
296+ requestObserver .onCompleted ();
297+
298+ List <StreamingRecognizeResponse > responses = responseObserver .future ().get ();
299+
300+ for (StreamingRecognizeResponse response : responses ) {
301+ for (StreamingRecognitionResult result : response .getResultsList ()) {
302+ for (SpeechRecognitionAlternative alternative : result .getAlternativesList ()) {
303+ System .out .println (alternative .getTranscript ());
304+ }
305+ }
306+ }
307+ speech .close ();
308+ }
309+
210310}
0 commit comments