Skip to content
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2018 Google LLC
* Copyright 2019 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -20,14 +20,17 @@
import com.google.api.gax.rpc.ClientStream;
import com.google.api.gax.rpc.ResponseObserver;
import com.google.api.gax.rpc.StreamController;
import com.google.cloud.speech.v1.RecognitionConfig;
import com.google.cloud.speech.v1.SpeechClient;
import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1.StreamingRecognitionConfig;
import com.google.cloud.speech.v1.StreamingRecognitionResult;
import com.google.cloud.speech.v1.StreamingRecognizeRequest;
import com.google.cloud.speech.v1.StreamingRecognizeResponse;
import com.google.cloud.speech.v1p1beta1.RecognitionConfig;
import com.google.cloud.speech.v1p1beta1.SpeechClient;
import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1p1beta1.StreamingRecognitionConfig;
import com.google.cloud.speech.v1p1beta1.StreamingRecognitionResult;
import com.google.cloud.speech.v1p1beta1.StreamingRecognizeRequest;
import com.google.cloud.speech.v1p1beta1.StreamingRecognizeResponse;
import com.google.protobuf.ByteString;
import com.google.protobuf.Duration;
import java.lang.Math;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
Expand All @@ -39,11 +42,29 @@

public class InfiniteStreamRecognize {

private static final int STREAMING_LIMIT = 10000; // 10 seconds

public static final String RED = "\033[0;31m";
public static final String GREEN = "\033[0;32m";
public static final String YELLOW = "\033[0;33m";

// Creating shared object
private static volatile BlockingQueue<byte[]> sharedQueue = new LinkedBlockingQueue();
private static TargetDataLine targetDataLine;
private static int BYTES_PER_BUFFER = 6400; // buffer size in bytes

private static int restartCounter = 0;
private static ArrayList<ByteString> audioInput = new ArrayList<ByteString>();
private static ArrayList<ByteString> lastAudioInput = new ArrayList<ByteString>();
private static int resultEndTimeInMS = 0;
private static int isFinalEndTime = 0;
private static int finalRequestEndTime = 0;
private static boolean newStream = true;
private static double bridgingOffset = 0;
private static boolean lastTranscriptWasFinal = false;
private static StreamController referenceToStreamController;
private static ByteString tempByteString;

public static void main(String... args) {
try {
infiniteStreamingRecognize();
Expand All @@ -60,6 +81,7 @@ class MicBuffer implements Runnable {

@Override
public void run() {
System.out.println(YELLOW);
System.out.println("Start speaking...Press Ctrl-C to stop");
targetDataLine.start();
byte[] data = new byte[BYTES_PER_BUFFER];
Expand Down Expand Up @@ -88,24 +110,48 @@ public void run() {

ArrayList<StreamingRecognizeResponse> responses = new ArrayList<>();

public void onStart(StreamController controller) {}
public void onStart(StreamController controller) {
referenceToStreamController = controller;
}

public void onResponse(StreamingRecognizeResponse response) {

responses.add(response);

StreamingRecognitionResult result = response.getResultsList().get(0);
// There can be several alternative transcripts for a given chunk of speech. Just
// use the first (most likely) one here.

Duration resultEndTime = result.getResultEndTime();

resultEndTimeInMS = (int) ((resultEndTime.getSeconds() * 1000)
+ (resultEndTime.getNanos() / 1000000));

double correctedTime = resultEndTimeInMS - bridgingOffset
+ (STREAMING_LIMIT * restartCounter);
DecimalFormat format = new DecimalFormat("0.#");

SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}
if (result.getIsFinal()) {
System.out.print(GREEN);
System.out.print("\033[2K\r");
System.out.printf("%s: %s\n", format.format(correctedTime),
alternative.getTranscript());

public void onComplete() {
System.out.println("Done");
}
isFinalEndTime = resultEndTimeInMS;
lastTranscriptWasFinal = true;
} else {
System.out.print(RED);
System.out.print("\033[2K\r");
System.out.printf("%s: %s", format.format(correctedTime),
alternative.getTranscript());

public void onError(Throwable t) {
System.out.println(t);
lastTranscriptWasFinal = false;
}
}

public void onComplete() {}

public void onError(Throwable t) {}

};

clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
Expand All @@ -116,8 +162,12 @@ public void onError(Throwable t) {
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build();

StreamingRecognitionConfig streamingRecognitionConfig =
StreamingRecognitionConfig.newBuilder().setConfig(recognitionConfig).build();
StreamingRecognitionConfig.newBuilder()
.setConfig(recognitionConfig)
.setInterimResults(true)
.build();

StreamingRecognizeRequest request =
StreamingRecognizeRequest.newBuilder()
Expand Down Expand Up @@ -151,23 +201,76 @@ public void onError(Throwable t) {

long estimatedTime = System.currentTimeMillis() - startTime;

if (estimatedTime >= 55000) {
if (estimatedTime >= STREAMING_LIMIT) {

clientStream.closeSend();
referenceToStreamController.cancel(); // remove Observer

if (resultEndTimeInMS > 0) {
finalRequestEndTime = isFinalEndTime;
}
resultEndTimeInMS = 0;

lastAudioInput = null;
lastAudioInput = audioInput;
audioInput = new ArrayList<ByteString>();

restartCounter++;

if (!lastTranscriptWasFinal) {
System.out.print('\n');
}

newStream = true;

clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);

request =
StreamingRecognizeRequest.newBuilder()
.setStreamingConfig(streamingRecognitionConfig)
.build();

System.out.println(YELLOW);
System.out.printf("%d: RESTARTING REQUEST\n", restartCounter * STREAMING_LIMIT);

startTime = System.currentTimeMillis();

} else {

if ((newStream) && (lastAudioInput.size() > 0)) {
double chunkTime = STREAMING_LIMIT / lastAudioInput.size(); // ms
if (chunkTime != 0) {
if (bridgingOffset < 0) {
bridgingOffset = 0;
}
if (bridgingOffset > finalRequestEndTime) {
bridgingOffset = finalRequestEndTime;
}
int chunksFromMS = (int) Math.floor((finalRequestEndTime
- bridgingOffset) / chunkTime);
bridgingOffset = (int) Math.floor((lastAudioInput.size()
- chunksFromMS) * chunkTime);
for (int i = chunksFromMS; i < lastAudioInput.size(); i++) {

request =
StreamingRecognizeRequest.newBuilder()
.setAudioContent(lastAudioInput.get(i))
.build();
clientStream.send(request);
}
}
newStream = false;
}

tempByteString = ByteString.copyFrom(sharedQueue.take());

request =
StreamingRecognizeRequest.newBuilder()
.setAudioContent(ByteString.copyFrom(sharedQueue.take()))
.setAudioContent(tempByteString)
.build();

audioInput.add(tempByteString);

}

clientStream.send(request);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ public void onError(Throwable t) {
System.out.println("Stop speaking.");
targetDataLine.stop();
targetDataLine.close();
break;
// break;
}
request =
StreamingRecognizeRequest.newBuilder()
Expand Down