googleapis
diff --git a/‎docs/speech-usage.rst‎
Lines changed: 79 additions & 0 deletions b/‎docs/speech-usage.rst‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎speech/google/cloud/speech/_gax.py‎
Lines changed: 89 additions & 0 deletions b/‎speech/google/cloud/speech/_gax.py‎
Lines changed: 89 additions & 0 deletions
diff --git a/‎speech/google/cloud/speech/client.py‎
Lines changed: 85 additions & 0 deletions b/‎speech/google/cloud/speech/client.py‎
Lines changed: 85 additions & 0 deletions
diff --git a/‎speech/google/cloud/speech/sample.py‎
Lines changed: 2 additions & 2 deletions b/‎speech/google/cloud/speech/sample.py‎
Lines changed: 2 additions & 2 deletions
@@ -151,5 +151,84 @@ words to the vocabulary of the recognizer.
  transcript: Hello, this is a test
  confidence: 0.81
 
+
+Streaming Recognition
+---------------------
+
+The :meth:`~google.cloud.speech.Client.streaming_recognize` method converts
+speech data to possible text alternatives on the fly.
+
+.. note::
+ Streaming recognition requests are limited to 1 minute of audio.
+
+ See: https://cloud.google.com/speech/limits#content
+
+.. code-block:: python
+
+ >>> from google.cloud import speech
+ >>> client = speech.Client()
+ >>> with open('./hello.wav', 'rb') as stream:
+ ... sample = client.sample(content=stream,
+ ... encoding=speech.Encoding.LINEAR16,
+ ... sample_rate=16000)
+ ... response = list(client.streaming_recognize(sample))
+ ... print(response[0].transcript)
+ 'hello'
+ ... print(response[0].confidence)
+ 0.973458576
+
+
+By default the recognizer will perform continuous recognition
+(continuing to process audio even if the user pauses speaking) until the client
+closes the output stream or when the maximum time limit has been reached.
+
+If you only want to recognize a single utterance you can set
+ ``single_utterance`` to ``True`` and only one result will be returned.
+
+See: `Single Utterance`_
+
+.. code-block:: python
+
+ >>> with open('./hello_pause_goodbye.wav', 'rb') as stream:
+ >>> sample = client.sample(content=stream,
+ ... encoding=speech.Encoding.LINEAR16,
+ ... sample_rate=16000)
+ >>> response = client.streaming_recognize(sample,
+ ... single_utterance=True)
+ >>> results = list(response)
+ >>> print(results[0].transcript)
+ hello
+ >>> print(results[0].confidence)
+ 0.96523453546
+
+
+If ``interim_results`` is set to ``True``, interim results
+(tentative hypotheses) may be returned as they become available.
+
+.. code-block:: python
+
+ >>> from google.cloud import speech
+ >>> client = speech.Client()
+ >>> with open('./hello.wav', 'rb') as stream:
+ ... sample = client.sample(content=stream,
+ ... encoding=speech.Encoding.LINEAR16,
+ ... sample_rate=16000)
+ ... for response in client.streaming_recognize(sample,
+ ... interim_results=True):
+ ... print('=' * 20)
+ ... print(response[0].transcript)
+ ... print(response[0].confidence)
+ ====================
+ 'he'
+ None
+ ====================
+ 'hell'
+ None
+ ====================
+ 'hello'
+ 0.973458576
+
+
+.. _Single Utterance: https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#streamingrecognitionconfig
 .. _sync_recognize: https://cloud.google.com/speech/reference/rest/v1beta1/speech/syncrecognize
 .. _Speech Asynchronous Recognize: https://cloud.google.com/speech/reference/rest/v1beta1/speech/asyncrecognize
@@ -101,6 +101,95 @@ def async_recognize(self, sample, language_code=None,
 
  return Operation.from_pb(response, self)
 
+ def streaming_recognize(self, sample, language_code=None,
+ max_alternatives=None, profanity_filter=None,
+ speech_context=None, single_utterance=False,
+ interim_results=False):
+ """Streaming speech recognition.
+
+ .. note::
+
+ Streaming recognition requests are limited to 1 minute of audio.
+ See: https://cloud.google.com/speech/limits#content
+
+ Yields :class:`~streaming_response.StreamingSpeechResponse` containing
+ results and metadata from the streaming request.
+
+ :type sample: :class:`~google.cloud.speech.sample.Sample`
+ :param sample: Instance of ``Sample`` containing audio information.
+
+ :type language_code: str
+ :param language_code: (Optional) The language of the supplied audio as
+ BCP-47 language tag. Example: ``'en-GB'``.
+ If omitted, defaults to ``'en-US'``.
+
+ :type max_alternatives: int
+ :param max_alternatives: (Optional) Maximum number of recognition
+ hypotheses to be returned. The server may
+ return fewer than maxAlternatives.
+ Valid values are 0-30. A value of 0 or 1
+ will return a maximum of 1. Defaults to 1
+
+ :type profanity_filter: bool
+ :param profanity_filter: If True, the server will attempt to filter
+ out profanities, replacing all but the
+ initial character in each filtered word with
+ asterisks, e.g. ``'f***'``. If False or
+ omitted, profanities won't be filtered out.
+
+ :type speech_context: list
+ :param speech_context: A list of strings (max 50) containing words and
+ phrases "hints" so that the speech recognition
+ is more likely to recognize them. This can be
+ used to improve the accuracy for specific words
+ and phrases. This can also be used to add new
+ words to the vocabulary of the recognizer.
+
+ :type single_utterance: bool
+ :param single_utterance: (Optional) If false or omitted, the recognizer
+ will perform continuous recognition
+ (continuing to process audio even if the user
+ pauses speaking) until the client closes the
+ output stream (gRPC API) or when the maximum
+ time limit has been reached. Multiple
+ SpeechRecognitionResults with the is_final
+ flag set to true may be returned.
+ If true, the recognizer will detect a single
+ spoken utterance. When it detects that the
+ user has paused or stopped speaking, it will
+ return an END_OF_UTTERANCE event and cease
+ recognition. It will return no more than one
+ SpeechRecognitionResult with the is_final flag
+ set to true.
+
+ :type interim_results: bool
+ :param interim_results: (Optional) If true, interim results (tentative
+ hypotheses) may be returned as they become
+ available (these interim results are indicated
+ with the is_final=false flag). If false or
+ omitted, only is_final=true result(s) are
+ returned.
+
+ :raises: :class:`EnvironmentError` if gRPC is not enabled and
+ :class:`ValueError` if stream has closed.
+
+ :rtype: :class:`~google.cloud.grpc.speech.v1beta1\
+ .cloud_speech_pb2.StreamingRecognizeResponse`
+ :returns: ``StreamingRecognizeResponse`` instances.
+ """
+ if sample.content.closed:
+ raise ValueError('Stream is closed.')
+
+ requests = _stream_requests(sample, language_code=language_code,
+ max_alternatives=max_alternatives,
+ profanity_filter=profanity_filter,
+ speech_context=speech_context,
+ single_utterance=single_utterance,
+ interim_results=interim_results)
+ api = self._gapic_api
+ responses = api.streaming_recognize(requests)
+ return responses
+
  def sync_recognize(self, sample, language_code=None, max_alternatives=None,
  profanity_filter=None, speech_context=None):
  """Synchronous Speech Recognition.
 
@@ -159,6 +159,91 @@ def speech_api(self):
  self._speech_api = _JSONSpeechAPI(self)
  return self._speech_api
 
+ def streaming_recognize(self, sample, language_code=None,
+ max_alternatives=None, profanity_filter=None,
+ speech_context=None, single_utterance=False,
+ interim_results=False):
+ """Streaming speech recognition.
+
+ .. note::
+
+ Streaming recognition requests are limited to 1 minute of audio.
+ See: https://cloud.google.com/speech/limits#content
+
+ Yields: list of :class:`~google.cloud.speech.alternative.Alternatives`
+ containing results and metadata from the streaming request.
+
+ :type sample: :class:`~google.cloud.speech.sample.Sample`
+ :param sample: Instance of ``Sample`` containing audio information.
+
+ :type language_code: str
+ :param language_code: (Optional) The language of the supplied audio as
+ BCP-47 language tag. Example: ``'en-GB'``.
+ If omitted, defaults to ``'en-US'``.
+
+ :type max_alternatives: int
+ :param max_alternatives: (Optional) Maximum number of recognition
+ hypotheses to be returned. The server may
+ return fewer than maxAlternatives.
+ Valid values are 0-30. A value of 0 or 1
+ will return a maximum of 1. Defaults to 1
+
+ :type profanity_filter: bool
+ :param profanity_filter: If True, the server will attempt to filter
+ out profanities, replacing all but the
+ initial character in each filtered word with
+ asterisks, e.g. ``'f***'``. If False or
+ omitted, profanities won't be filtered out.
+
+ :type speech_context: list
+ :param speech_context: A list of strings (max 50) containing words and
+ phrases "hints" so that the speech recognition
+ is more likely to recognize them. This can be
+ used to improve the accuracy for specific words
+ and phrases. This can also be used to add new
+ words to the vocabulary of the recognizer.
+
+ :type single_utterance: bool
+ :param single_utterance: (Optional) If false or omitted, the recognizer
+ will perform continuous recognition
+ (continuing to process audio even if the user
+ pauses speaking) until the client closes the
+ output stream (gRPC API) or when the maximum
+ time limit has been reached. Multiple
+ SpeechRecognitionResults with the is_final
+ flag set to true may be returned.
+ If true, the recognizer will detect a single
+ spoken utterance. When it detects that the
+ user has paused or stopped speaking, it will
+ return an END_OF_UTTERANCE event and cease
+ recognition. It will return no more than one
+ SpeechRecognitionResult with the is_final flag
+ set to true.
+
+ :type interim_results: bool
+ :param interim_results: (Optional) If true, interim results (tentative
+ hypotheses) may be returned as they become
+ available (these interim results are indicated
+ with the is_final=false flag). If false or
+ omitted, only is_final=true result(s) are
+ returned.
+ """
+ if not self._use_gax:
+ raise EnvironmentError('gRPC is required to use this API.')
+
+ responses = self.speech_api.streaming_recognize(sample, language_code,
+ max_alternatives,
+ profanity_filter,
+ speech_context,
+ single_utterance,
+ interim_results)
+ for response in responses:
+ results = getattr(response, 'results', [])
+ if results or interim_results:
+ for result in results:
+ yield [Alternative.from_pb(alternative)
+ for alternative in result.alternatives]
+
  def sync_recognize(self, sample, language_code=None,
  max_alternatives=None, profanity_filter=None,
  speech_context=None):
 
@@ -47,8 +47,8 @@ class Sample(object):
  default_encoding = Encoding.FLAC
  default_sample_rate = 16000
 
- def __init__(self, content=None, source_uri=None,
- encoding=None, sample_rate=None):
+ def __init__(self, content=None, source_uri=None, encoding=None,
+ sample_rate=None):
 
  no_source = content is None and source_uri is None
  both_source = content is not None and source_uri is not None