forked from pool/python-SpeechRecognition
OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-SpeechRecognition?expand=0&rev=16
139 lines
7.9 KiB
Diff
139 lines
7.9 KiB
Diff
From ca422f49dcd10c7f2d62972432be2a24a9cbd167 Mon Sep 17 00:00:00 2001
|
|
From: Francis Tseng <f+accounts@frnsys.com>
|
|
Date: Tue, 12 Mar 2019 10:22:55 -0400
|
|
Subject: [PATCH 1/2] switched to google-cloud-speech library (other one is
|
|
deprecated)
|
|
|
|
---
|
|
README.rst | 8 +---
|
|
speech_recognition/__init__.py | 74 ++++++++++++++++++++---------------------
|
|
2 files changed, 40 insertions(+), 42 deletions(-)
|
|
|
|
--- a/README.rst
|
|
+++ b/README.rst
|
|
@@ -128,16 +128,14 @@ Note that the versions available in most
|
|
|
|
See `Notes on using PocketSphinx <https://github.com/Uberi/speech_recognition/blob/master/reference/pocketsphinx.rst>`__ for information about installing languages, compiling PocketSphinx, and building language packs from online resources. This document is also included under ``reference/pocketsphinx.rst``.
|
|
|
|
-Google API Client Library for Python (for Google Cloud Speech API users)
|
|
+Google Cloud Speech Library for Python (for Google Cloud Speech API users)
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
-`Google API Client Library for Python <https://developers.google.com/api-client-library/python/>`__ is required if and only if you want to use the Google Cloud Speech API (``recognizer_instance.recognize_google_cloud``).
|
|
+`Google Cloud Speech library for Python <https://cloud.google.com/speech-to-text/docs/quickstart>`__ is required if and only if you want to use the Google Cloud Speech API (``recognizer_instance.recognize_google_cloud``).
|
|
|
|
If not installed, everything in the library will still work, except calling ``recognizer_instance.recognize_google_cloud`` will raise an ``RequestError``.
|
|
|
|
-According to the `official installation instructions <https://developers.google.com/api-client-library/python/start/installation>`__, the recommended way to install this is using `Pip <https://pip.readthedocs.org/>`__: execute ``pip install google-api-python-client`` (replace ``pip`` with ``pip3`` if using Python 3).
|
|
-
|
|
-Alternatively, you can perform the installation completely offline from the source archives under the ``./third-party/Source code for Google API Client Library for Python and its dependencies/`` directory.
|
|
+According to the `official installation instructions <https://cloud.google.com/speech-to-text/docs/quickstart>`__, the recommended way to install this is using `Pip <https://pip.readthedocs.org/>`__: execute ``pip install google-cloud-speech`` (replace ``pip`` with ``pip3`` if using Python 3).
|
|
|
|
FLAC (for some systems)
|
|
~~~~~~~~~~~~~~~~~~~~~~~
|
|
--- a/speech_recognition/__init__.py
|
|
+++ b/speech_recognition/__init__.py
|
|
@@ -821,6 +821,8 @@ class Recognizer(AudioSource):
|
|
"""
|
|
assert isinstance(audio_data, AudioData), "``audio_data`` must be audio data"
|
|
assert key is None or isinstance(key, str), "``key`` must be ``None`` or a string"
|
|
+ if credentials_json is None:
|
|
+ assert os.environ.get('GOOGLE_APPLICATION_CREDENTIALS') is not None
|
|
assert isinstance(language, str), "``language`` must be a string"
|
|
|
|
flac_data = audio_data.get_flac_data(
|
|
@@ -887,58 +889,56 @@ class Recognizer(AudioSource):
|
|
assert isinstance(language, str), "``language`` must be a string"
|
|
assert preferred_phrases is None or all(isinstance(preferred_phrases, (type(""), type(u""))) for preferred_phrases in preferred_phrases), "``preferred_phrases`` must be a list of strings"
|
|
|
|
- # See https://cloud.google.com/speech/reference/rest/v1/RecognitionConfig
|
|
+ try:
|
|
+ from google.cloud import speech
|
|
+ from google.cloud.speech import enums
|
|
+ from google.cloud.speech import types
|
|
+ from google.api_core.exceptions import GoogleAPICallError
|
|
+ except ImportError:
|
|
+ raise RequestError('missing google-cloud-speech module: ensure that google-cloud-speech is set up correctly.')
|
|
+
|
|
+ if credentials_json is not None:
|
|
+ client = speech.SpeechClient.from_service_account_json(credentials_json)
|
|
+ else:
|
|
+ client = speech.SpeechClient()
|
|
+
|
|
flac_data = audio_data.get_flac_data(
|
|
convert_rate=None if 8000 <= audio_data.sample_rate <= 48000 else max(8000, min(audio_data.sample_rate, 48000)), # audio sample rate must be between 8 kHz and 48 kHz inclusive - clamp sample rate into this range
|
|
convert_width=2 # audio samples must be 16-bit
|
|
)
|
|
+ audio = types.RecognitionAudio(content=flac_data)
|
|
|
|
- try:
|
|
- from oauth2client.client import GoogleCredentials
|
|
- from googleapiclient.discovery import build
|
|
- import googleapiclient.errors
|
|
-
|
|
- # cannot simply use 'http = httplib2.Http(timeout=self.operation_timeout)'
|
|
- # because discovery.build() says 'Arguments http and credentials are mutually exclusive'
|
|
- import socket
|
|
- import googleapiclient.http
|
|
- if self.operation_timeout and socket.getdefaulttimeout() is None:
|
|
- # override constant (used by googleapiclient.http.build_http())
|
|
- googleapiclient.http.DEFAULT_HTTP_TIMEOUT_SEC = self.operation_timeout
|
|
-
|
|
- if credentials_json is None:
|
|
- api_credentials = GoogleCredentials.get_application_default()
|
|
- else:
|
|
- # the credentials can only be read from a file, so we'll make a temp file and write in the contents to work around that
|
|
- with PortableNamedTemporaryFile("w") as f:
|
|
- f.write(credentials_json)
|
|
- f.flush()
|
|
- api_credentials = GoogleCredentials.from_stream(f.name)
|
|
-
|
|
- speech_service = build("speech", "v1", credentials=api_credentials, cache_discovery=False)
|
|
- except ImportError:
|
|
- raise RequestError("missing google-api-python-client module: ensure that google-api-python-client is set up correctly.")
|
|
-
|
|
- speech_config = {"encoding": "FLAC", "sampleRateHertz": audio_data.sample_rate, "languageCode": language}
|
|
+ config = {
|
|
+ 'encoding': enums.RecognitionConfig.AudioEncoding.FLAC,
|
|
+ 'sample_rate_hertz': audio_data.sample_rate,
|
|
+ 'language_code': language
|
|
+ }
|
|
if preferred_phrases is not None:
|
|
- speech_config["speechContext"] = {"phrases": preferred_phrases}
|
|
+ config['speechContexts'] = [types.SpeechContext(
|
|
+ phrases=preferred_phrases
|
|
+ )]
|
|
if show_all:
|
|
- speech_config["enableWordTimeOffsets"] = True # some useful extra options for when we want all the output
|
|
- request = speech_service.speech().recognize(body={"audio": {"content": base64.b64encode(flac_data).decode("utf8")}, "config": speech_config})
|
|
+ config['enableWordTimeOffsets'] = True # some useful extra options for when we want all the output
|
|
+
|
|
+ opts = {}
|
|
+ if self.operation_timeout and socket.getdefaulttimeout() is None:
|
|
+ opts['timeout'] = self.operation_timeout
|
|
+
|
|
+ config = types.RecognitionConfig(**config)
|
|
|
|
try:
|
|
- response = request.execute()
|
|
- except googleapiclient.errors.HttpError as e:
|
|
+ response = client.recognize(config, audio, **opts)
|
|
+ except GoogleAPICallError as e:
|
|
raise RequestError(e)
|
|
except URLError as e:
|
|
raise RequestError("recognition connection failed: {0}".format(e.reason))
|
|
|
|
if show_all: return response
|
|
- if "results" not in response or len(response["results"]) == 0: raise UnknownValueError()
|
|
- transcript = ""
|
|
- for result in response["results"]:
|
|
- transcript += result["alternatives"][0]["transcript"].strip() + " "
|
|
+ if len(response.results) == 0: raise UnknownValueError()
|
|
|
|
+ transcript = ''
|
|
+ for result in response.results:
|
|
+ transcript += result.alternatives[0].transcript.strip() + ' '
|
|
return transcript
|
|
|
|
def recognize_wit(self, audio_data, key, show_all=False):
|