diff --git a/3.12.0.tar.gz b/3.12.0.tar.gz new file mode 100644 index 0000000..301c418 --- /dev/null +++ b/3.12.0.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d19bcb489386f494b64acb37299c94d75f31598d4ede7bf0339f001d7a9ca57a +size 125433150 diff --git a/3.8.1.tar.gz b/3.8.1.tar.gz deleted file mode 100644 index 86313e9..0000000 --- a/3.8.1.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:82d3313db383409ddaf3e42625fb0c3518231a1feb5e2ed5473b10b3d5ece7bd -size 125426650 diff --git a/406-google-cloud-speech.patch b/406-google-cloud-speech.patch deleted file mode 100644 index 11de0b7..0000000 --- a/406-google-cloud-speech.patch +++ /dev/null @@ -1,138 +0,0 @@ -From ca422f49dcd10c7f2d62972432be2a24a9cbd167 Mon Sep 17 00:00:00 2001 -From: Francis Tseng -Date: Tue, 12 Mar 2019 10:22:55 -0400 -Subject: [PATCH 1/2] switched to google-cloud-speech library (other one is - deprecated) - ---- - README.rst | 8 +--- - speech_recognition/__init__.py | 74 ++++++++++++++++++++--------------------- - 2 files changed, 40 insertions(+), 42 deletions(-) - ---- a/README.rst -+++ b/README.rst -@@ -128,16 +128,14 @@ Note that the versions available in most - - See `Notes on using PocketSphinx `__ for information about installing languages, compiling PocketSphinx, and building language packs from online resources. This document is also included under ``reference/pocketsphinx.rst``. - --Google API Client Library for Python (for Google Cloud Speech API users) -+Google Cloud Speech Library for Python (for Google Cloud Speech API users) - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - --`Google API Client Library for Python `__ is required if and only if you want to use the Google Cloud Speech API (``recognizer_instance.recognize_google_cloud``). -+`Google Cloud Speech library for Python `__ is required if and only if you want to use the Google Cloud Speech API (``recognizer_instance.recognize_google_cloud``). - - If not installed, everything in the library will still work, except calling ``recognizer_instance.recognize_google_cloud`` will raise an ``RequestError``. - --According to the `official installation instructions `__, the recommended way to install this is using `Pip `__: execute ``pip install google-api-python-client`` (replace ``pip`` with ``pip3`` if using Python 3). -- --Alternatively, you can perform the installation completely offline from the source archives under the ``./third-party/Source code for Google API Client Library for Python and its dependencies/`` directory. -+According to the `official installation instructions `__, the recommended way to install this is using `Pip `__: execute ``pip install google-cloud-speech`` (replace ``pip`` with ``pip3`` if using Python 3). - - FLAC (for some systems) - ~~~~~~~~~~~~~~~~~~~~~~~ ---- a/speech_recognition/__init__.py -+++ b/speech_recognition/__init__.py -@@ -821,6 +821,8 @@ class Recognizer(AudioSource): - """ - assert isinstance(audio_data, AudioData), "``audio_data`` must be audio data" - assert key is None or isinstance(key, str), "``key`` must be ``None`` or a string" -+ if credentials_json is None: -+ assert os.environ.get('GOOGLE_APPLICATION_CREDENTIALS') is not None - assert isinstance(language, str), "``language`` must be a string" - - flac_data = audio_data.get_flac_data( -@@ -887,58 +889,56 @@ class Recognizer(AudioSource): - assert isinstance(language, str), "``language`` must be a string" - assert preferred_phrases is None or all(isinstance(preferred_phrases, (type(""), type(u""))) for preferred_phrases in preferred_phrases), "``preferred_phrases`` must be a list of strings" - -- # See https://cloud.google.com/speech/reference/rest/v1/RecognitionConfig -+ try: -+ from google.cloud import speech -+ from google.cloud.speech import enums -+ from google.cloud.speech import types -+ from google.api_core.exceptions import GoogleAPICallError -+ except ImportError: -+ raise RequestError('missing google-cloud-speech module: ensure that google-cloud-speech is set up correctly.') -+ -+ if credentials_json is not None: -+ client = speech.SpeechClient.from_service_account_json(credentials_json) -+ else: -+ client = speech.SpeechClient() -+ - flac_data = audio_data.get_flac_data( - convert_rate=None if 8000 <= audio_data.sample_rate <= 48000 else max(8000, min(audio_data.sample_rate, 48000)), # audio sample rate must be between 8 kHz and 48 kHz inclusive - clamp sample rate into this range - convert_width=2 # audio samples must be 16-bit - ) -+ audio = types.RecognitionAudio(content=flac_data) - -- try: -- from oauth2client.client import GoogleCredentials -- from googleapiclient.discovery import build -- import googleapiclient.errors -- -- # cannot simply use 'http = httplib2.Http(timeout=self.operation_timeout)' -- # because discovery.build() says 'Arguments http and credentials are mutually exclusive' -- import socket -- import googleapiclient.http -- if self.operation_timeout and socket.getdefaulttimeout() is None: -- # override constant (used by googleapiclient.http.build_http()) -- googleapiclient.http.DEFAULT_HTTP_TIMEOUT_SEC = self.operation_timeout -- -- if credentials_json is None: -- api_credentials = GoogleCredentials.get_application_default() -- else: -- # the credentials can only be read from a file, so we'll make a temp file and write in the contents to work around that -- with PortableNamedTemporaryFile("w") as f: -- f.write(credentials_json) -- f.flush() -- api_credentials = GoogleCredentials.from_stream(f.name) -- -- speech_service = build("speech", "v1", credentials=api_credentials, cache_discovery=False) -- except ImportError: -- raise RequestError("missing google-api-python-client module: ensure that google-api-python-client is set up correctly.") -- -- speech_config = {"encoding": "FLAC", "sampleRateHertz": audio_data.sample_rate, "languageCode": language} -+ config = { -+ 'encoding': enums.RecognitionConfig.AudioEncoding.FLAC, -+ 'sample_rate_hertz': audio_data.sample_rate, -+ 'language_code': language -+ } - if preferred_phrases is not None: -- speech_config["speechContext"] = {"phrases": preferred_phrases} -+ config['speechContexts'] = [types.SpeechContext( -+ phrases=preferred_phrases -+ )] - if show_all: -- speech_config["enableWordTimeOffsets"] = True # some useful extra options for when we want all the output -- request = speech_service.speech().recognize(body={"audio": {"content": base64.b64encode(flac_data).decode("utf8")}, "config": speech_config}) -+ config['enableWordTimeOffsets'] = True # some useful extra options for when we want all the output -+ -+ opts = {} -+ if self.operation_timeout and socket.getdefaulttimeout() is None: -+ opts['timeout'] = self.operation_timeout -+ -+ config = types.RecognitionConfig(**config) - - try: -- response = request.execute() -- except googleapiclient.errors.HttpError as e: -+ response = client.recognize(config, audio, **opts) -+ except GoogleAPICallError as e: - raise RequestError(e) - except URLError as e: - raise RequestError("recognition connection failed: {0}".format(e.reason)) - - if show_all: return response -- if "results" not in response or len(response["results"]) == 0: raise UnknownValueError() -- transcript = "" -- for result in response["results"]: -- transcript += result["alternatives"][0]["transcript"].strip() + " " -+ if len(response.results) == 0: raise UnknownValueError() - -+ transcript = '' -+ for result in response.results: -+ transcript += result.alternatives[0].transcript.strip() + ' ' - return transcript - - def recognize_wit(self, audio_data, key, show_all=False): diff --git a/fix-readme.patch b/fix-readme.patch index ca15d0e..ad43a70 100644 --- a/fix-readme.patch +++ b/fix-readme.patch @@ -1,8 +1,8 @@ -Index: speech_recognition-3.8.1/README.rst +Index: speech_recognition-3.12.0/README.rst =================================================================== ---- speech_recognition-3.8.1.orig/README.rst -+++ speech_recognition-3.8.1/README.rst -@@ -270,9 +270,7 @@ To hack on this library, first make sure +--- speech_recognition-3.12.0.orig/README.rst ++++ speech_recognition-3.12.0/README.rst +@@ -283,9 +283,7 @@ To hack on this library, first make sure - Most of the library code lives in ``speech_recognition/__init__.py``. - Examples live under the ``examples/`` `directory `__, and the demo script lives in ``speech_recognition/__main__.py``. @@ -10,11 +10,11 @@ Index: speech_recognition-3.8.1/README.rst - Documentation can be found in the ``reference/`` `directory `__. -- Third-party libraries, utilities, and reference material are in the ``third-party/`` `directory `__. - To install/reinstall the library locally, run ``python setup.py install`` in the project `root directory `__. + To install/reinstall the library locally, run ``python -m pip install -e .[dev]`` in the project `root directory `__. -@@ -301,41 +299,6 @@ Testing is also done automatically by Tr - python -m flake8 --ignore=E501,E701 speech_recognition tests examples setup.py # ignore errors for long lines and multi-statement lines - python -m rstcheck README.rst reference/*.rst # ensure RST is well-formed +@@ -316,41 +314,6 @@ To ensure RST is well-formed: + + Testing is also done automatically by GitHub Actions, upon every push. -FLAC Executables -~~~~~~~~~~~~~~~~ @@ -54,7 +54,7 @@ Index: speech_recognition-3.8.1/README.rst Authors ------- -@@ -374,9 +337,3 @@ Copyright 2014-2017 `Anthony Zhang (Uber +@@ -389,9 +352,3 @@ Copyright 2014-2017 `Anthony Zhang (Uber SpeechRecognition is made available under the 3-clause BSD license. See ``LICENSE.txt`` in the project's `root directory `__ for more information. For convenience, all the official distributions of SpeechRecognition already include a copy of the necessary copyright notices and licenses. In your project, you can simply **say that licensing information for SpeechRecognition can be found within the SpeechRecognition README, and make sure SpeechRecognition is visible to users if they wish to see it**. diff --git a/python-SpeechRecognition.changes b/python-SpeechRecognition.changes index cd4352b..121462d 100644 --- a/python-SpeechRecognition.changes +++ b/python-SpeechRecognition.changes @@ -1,3 +1,38 @@ +------------------------------------------------------------------- +Thu Dec 12 04:26:36 UTC 2024 - Steve Kowalik + +- Update to 3.12.0: + * New features + + Groq Support + + Python 3.13 Support (experimental) + + Support Python 3.11 + + Support Whisper API + + Add recognize_tensorflow + + Add recognize_vosk + + Add recognize_amazon and recognize_assemblyai + + Add recognize_whisper + * Bugfixes & improvements + + Support Python maintenance: Add 3.12, Drop 3.8 + + Updated to the latest OpenAI API changes + + Replace with in-memory stream on recognize_whisper + + Remove prints that shouldn't be printed by default + + Update to speechContext formatting for recognize_google_cloud + + Fix for OSError: [Errno -9988] Stream closed Error + + Add paramater to change profanity filter level for Google Speech + Recognition + + Updating Wit API version (20160526 -> 20170307) + + Google cloud speech library + + Fix large cpu consumption in snowboy detect + + Replace Bing Speech API with Azure Speech API + + Removed duplicate code + + fix recognize_google_cloud + + Specify fp16 parameter for whisper +- Switch to pyproject macros. +- Run at least a portion of the testsuite. +- No more greedy globs in %files. +- Drop patch 406-google-cloud-speech.patch, included upstream. +- Refresh patch fix-readme.patch + ------------------------------------------------------------------- Thu Jul 14 09:04:00 UTC 2022 - Matej Cepl diff --git a/python-SpeechRecognition.spec b/python-SpeechRecognition.spec index 8c8ba49..bb64402 100644 --- a/python-SpeechRecognition.spec +++ b/python-SpeechRecognition.spec @@ -1,7 +1,7 @@ # # spec file for package python-SpeechRecognition # -# Copyright (c) 2022 SUSE LLC +# Copyright (c) 2024 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -16,10 +16,9 @@ # -%{?!python_module:%define python_module() python-%{**} python3-%{**}} %define justpython python Name: python-SpeechRecognition -Version: 3.8.1 +Version: 3.12.0 Release: 0 Summary: Library for performing speech recognition, with support for several engines # Note: The sources include third party code with different licenses. @@ -30,17 +29,27 @@ URL: https://github.com/Uberi/speech_recognition#readme Source: https://github.com/Uberi/speech_recognition/archive/%{version}.tar.gz # Remove information about unbundled libraries. Patch0: fix-readme.patch -# PATCH-FIX-UPSTREAM 406-google-cloud-speech.patch gh#Uberi/speech_recognition#406 mcepl@suse.com -# Switch dependency to google-cloud-speech from deprecated oauth2client and googleapiclient -Patch1: 406-google-cloud-speech.patch +BuildRequires: %{python_module audioop-lts if %python-base >= 3.13} +BuildRequires: %{python_module base >= 3.9} BuildRequires: %{python_module google-cloud-speech} +BuildRequires: %{python_module pip} +BuildRequires: %{python_module pytest} BuildRequires: %{python_module setuptools} +BuildRequires: %{python_module standard-aifc if %python-base >= 3.13} +BuildRequires: %{python_module typing-extensions} +BuildRequires: %{python_module wheel} BuildRequires: fdupes +BuildRequires: flac BuildRequires: python-rpm-macros Requires: %{justpython}-SpeechRecognition-common-en-US Requires: flac Requires: python-PyAudio Requires: python-google-cloud-speech +Requires: python-typing-extensions +%if 0%{?python_version_nodots} >= 313 +Requires: python-audioop-lts +Requires: python-standard-aifc +%endif Recommends: python-pocketsphinx-python BuildArch: noarch %python_subpackages @@ -74,19 +83,29 @@ rm speech_recognition/flac-* rm LICENSE-FLAC.txt %build -%python_build +%pyproject_wheel %install -%python_install +%pyproject_install +# Do not ship tests +%python_expand rm -r %{buildroot}%{$python_sitelib}/tests %python_expand %fdupes %{buildroot}%{$python_sitelib} mkdir -p %{buildroot}%{_datadir}/speech_recognition cp -Ra speech_recognition/pocketsphinx-data %{buildroot}%{_datadir}/speech_recognition/ %python_expand rm -Rf %{buildroot}%{$python_sitelib}/speech_recognition/pocketsphinx-data %python_expand ln -s %{_datadir}/speech_recognition/pocketsphinx-data %{buildroot}%{$python_sitelib}/speech_recognition/ +%check +# No internet access for OpenAI or Groq +ignore="--ignore tests/recognizers/test_groq.py --ignore tests/recognizers/test_openai.py" +ignore+=" --ignore tests/test_whisper_recognition.py" +# PocketSphinx is only built for primary Python +%pytest $ignore -k 'not test_sphinx_' + %files %{python_files} %license LICENSE.txt -%{python_sitelib}/* +%{python_sitelib}/speech_recognition +%{python_sitelib}/SpeechRecognition-%{version}.dist-info %dir %{_datadir}/speech_recognition/ %dir %{_datadir}/speech_recognition/pocketsphinx-data