From bfd23e3848d5cf3ad24ffe56be893bf295d48ce3a6ec9cb5c1a37e177794fd3d Mon Sep 17 00:00:00 2001 From: Steve Kowalik Date: Wed, 26 Mar 2025 00:52:50 +0000 Subject: [PATCH] - Lowercase metadata directory name. OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-SpeechRecognition?expand=0&rev=16 --- .gitattributes | 23 ++++++ .gitignore | 1 + 3.12.0.tar.gz | 3 + 3.8.1.tar.gz | 3 + 406-google-cloud-speech.patch | 138 +++++++++++++++++++++++++++++++ fix-readme.patch | 66 +++++++++++++++ python-SpeechRecognition.changes | 120 +++++++++++++++++++++++++++ python-SpeechRecognition.spec | 115 ++++++++++++++++++++++++++ 8 files changed, 469 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 3.12.0.tar.gz create mode 100644 3.8.1.tar.gz create mode 100644 406-google-cloud-speech.patch create mode 100644 fix-readme.patch create mode 100644 python-SpeechRecognition.changes create mode 100644 python-SpeechRecognition.spec diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9b03811 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,23 @@ +## Default LFS +*.7z filter=lfs diff=lfs merge=lfs -text +*.bsp filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.gem filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.jar filter=lfs diff=lfs merge=lfs -text +*.lz filter=lfs diff=lfs merge=lfs -text +*.lzma filter=lfs diff=lfs merge=lfs -text +*.obscpio filter=lfs diff=lfs merge=lfs -text +*.oxt filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.rpm filter=lfs diff=lfs merge=lfs -text +*.tbz filter=lfs diff=lfs merge=lfs -text +*.tbz2 filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.txz filter=lfs diff=lfs merge=lfs -text +*.whl filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..57affb6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.osc diff --git a/3.12.0.tar.gz b/3.12.0.tar.gz new file mode 100644 index 0000000..301c418 --- /dev/null +++ b/3.12.0.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d19bcb489386f494b64acb37299c94d75f31598d4ede7bf0339f001d7a9ca57a +size 125433150 diff --git a/3.8.1.tar.gz b/3.8.1.tar.gz new file mode 100644 index 0000000..86313e9 --- /dev/null +++ b/3.8.1.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82d3313db383409ddaf3e42625fb0c3518231a1feb5e2ed5473b10b3d5ece7bd +size 125426650 diff --git a/406-google-cloud-speech.patch b/406-google-cloud-speech.patch new file mode 100644 index 0000000..11de0b7 --- /dev/null +++ b/406-google-cloud-speech.patch @@ -0,0 +1,138 @@ +From ca422f49dcd10c7f2d62972432be2a24a9cbd167 Mon Sep 17 00:00:00 2001 +From: Francis Tseng +Date: Tue, 12 Mar 2019 10:22:55 -0400 +Subject: [PATCH 1/2] switched to google-cloud-speech library (other one is + deprecated) + +--- + README.rst | 8 +--- + speech_recognition/__init__.py | 74 ++++++++++++++++++++--------------------- + 2 files changed, 40 insertions(+), 42 deletions(-) + +--- a/README.rst ++++ b/README.rst +@@ -128,16 +128,14 @@ Note that the versions available in most + + See `Notes on using PocketSphinx `__ for information about installing languages, compiling PocketSphinx, and building language packs from online resources. This document is also included under ``reference/pocketsphinx.rst``. + +-Google API Client Library for Python (for Google Cloud Speech API users) ++Google Cloud Speech Library for Python (for Google Cloud Speech API users) + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +-`Google API Client Library for Python `__ is required if and only if you want to use the Google Cloud Speech API (``recognizer_instance.recognize_google_cloud``). ++`Google Cloud Speech library for Python `__ is required if and only if you want to use the Google Cloud Speech API (``recognizer_instance.recognize_google_cloud``). + + If not installed, everything in the library will still work, except calling ``recognizer_instance.recognize_google_cloud`` will raise an ``RequestError``. + +-According to the `official installation instructions `__, the recommended way to install this is using `Pip `__: execute ``pip install google-api-python-client`` (replace ``pip`` with ``pip3`` if using Python 3). +- +-Alternatively, you can perform the installation completely offline from the source archives under the ``./third-party/Source code for Google API Client Library for Python and its dependencies/`` directory. ++According to the `official installation instructions `__, the recommended way to install this is using `Pip `__: execute ``pip install google-cloud-speech`` (replace ``pip`` with ``pip3`` if using Python 3). + + FLAC (for some systems) + ~~~~~~~~~~~~~~~~~~~~~~~ +--- a/speech_recognition/__init__.py ++++ b/speech_recognition/__init__.py +@@ -821,6 +821,8 @@ class Recognizer(AudioSource): + """ + assert isinstance(audio_data, AudioData), "``audio_data`` must be audio data" + assert key is None or isinstance(key, str), "``key`` must be ``None`` or a string" ++ if credentials_json is None: ++ assert os.environ.get('GOOGLE_APPLICATION_CREDENTIALS') is not None + assert isinstance(language, str), "``language`` must be a string" + + flac_data = audio_data.get_flac_data( +@@ -887,58 +889,56 @@ class Recognizer(AudioSource): + assert isinstance(language, str), "``language`` must be a string" + assert preferred_phrases is None or all(isinstance(preferred_phrases, (type(""), type(u""))) for preferred_phrases in preferred_phrases), "``preferred_phrases`` must be a list of strings" + +- # See https://cloud.google.com/speech/reference/rest/v1/RecognitionConfig ++ try: ++ from google.cloud import speech ++ from google.cloud.speech import enums ++ from google.cloud.speech import types ++ from google.api_core.exceptions import GoogleAPICallError ++ except ImportError: ++ raise RequestError('missing google-cloud-speech module: ensure that google-cloud-speech is set up correctly.') ++ ++ if credentials_json is not None: ++ client = speech.SpeechClient.from_service_account_json(credentials_json) ++ else: ++ client = speech.SpeechClient() ++ + flac_data = audio_data.get_flac_data( + convert_rate=None if 8000 <= audio_data.sample_rate <= 48000 else max(8000, min(audio_data.sample_rate, 48000)), # audio sample rate must be between 8 kHz and 48 kHz inclusive - clamp sample rate into this range + convert_width=2 # audio samples must be 16-bit + ) ++ audio = types.RecognitionAudio(content=flac_data) + +- try: +- from oauth2client.client import GoogleCredentials +- from googleapiclient.discovery import build +- import googleapiclient.errors +- +- # cannot simply use 'http = httplib2.Http(timeout=self.operation_timeout)' +- # because discovery.build() says 'Arguments http and credentials are mutually exclusive' +- import socket +- import googleapiclient.http +- if self.operation_timeout and socket.getdefaulttimeout() is None: +- # override constant (used by googleapiclient.http.build_http()) +- googleapiclient.http.DEFAULT_HTTP_TIMEOUT_SEC = self.operation_timeout +- +- if credentials_json is None: +- api_credentials = GoogleCredentials.get_application_default() +- else: +- # the credentials can only be read from a file, so we'll make a temp file and write in the contents to work around that +- with PortableNamedTemporaryFile("w") as f: +- f.write(credentials_json) +- f.flush() +- api_credentials = GoogleCredentials.from_stream(f.name) +- +- speech_service = build("speech", "v1", credentials=api_credentials, cache_discovery=False) +- except ImportError: +- raise RequestError("missing google-api-python-client module: ensure that google-api-python-client is set up correctly.") +- +- speech_config = {"encoding": "FLAC", "sampleRateHertz": audio_data.sample_rate, "languageCode": language} ++ config = { ++ 'encoding': enums.RecognitionConfig.AudioEncoding.FLAC, ++ 'sample_rate_hertz': audio_data.sample_rate, ++ 'language_code': language ++ } + if preferred_phrases is not None: +- speech_config["speechContext"] = {"phrases": preferred_phrases} ++ config['speechContexts'] = [types.SpeechContext( ++ phrases=preferred_phrases ++ )] + if show_all: +- speech_config["enableWordTimeOffsets"] = True # some useful extra options for when we want all the output +- request = speech_service.speech().recognize(body={"audio": {"content": base64.b64encode(flac_data).decode("utf8")}, "config": speech_config}) ++ config['enableWordTimeOffsets'] = True # some useful extra options for when we want all the output ++ ++ opts = {} ++ if self.operation_timeout and socket.getdefaulttimeout() is None: ++ opts['timeout'] = self.operation_timeout ++ ++ config = types.RecognitionConfig(**config) + + try: +- response = request.execute() +- except googleapiclient.errors.HttpError as e: ++ response = client.recognize(config, audio, **opts) ++ except GoogleAPICallError as e: + raise RequestError(e) + except URLError as e: + raise RequestError("recognition connection failed: {0}".format(e.reason)) + + if show_all: return response +- if "results" not in response or len(response["results"]) == 0: raise UnknownValueError() +- transcript = "" +- for result in response["results"]: +- transcript += result["alternatives"][0]["transcript"].strip() + " " ++ if len(response.results) == 0: raise UnknownValueError() + ++ transcript = '' ++ for result in response.results: ++ transcript += result.alternatives[0].transcript.strip() + ' ' + return transcript + + def recognize_wit(self, audio_data, key, show_all=False): diff --git a/fix-readme.patch b/fix-readme.patch new file mode 100644 index 0000000..ad43a70 --- /dev/null +++ b/fix-readme.patch @@ -0,0 +1,66 @@ +Index: speech_recognition-3.12.0/README.rst +=================================================================== +--- speech_recognition-3.12.0.orig/README.rst ++++ speech_recognition-3.12.0/README.rst +@@ -283,9 +283,7 @@ To hack on this library, first make sure + + - Most of the library code lives in ``speech_recognition/__init__.py``. + - Examples live under the ``examples/`` `directory `__, and the demo script lives in ``speech_recognition/__main__.py``. +-- The FLAC encoder binaries are in the ``speech_recognition/`` `directory `__. + - Documentation can be found in the ``reference/`` `directory `__. +-- Third-party libraries, utilities, and reference material are in the ``third-party/`` `directory `__. + + To install/reinstall the library locally, run ``python -m pip install -e .[dev]`` in the project `root directory `__. + +@@ -316,41 +314,6 @@ To ensure RST is well-formed: + + Testing is also done automatically by GitHub Actions, upon every push. + +-FLAC Executables +-~~~~~~~~~~~~~~~~ +- +-The included ``flac-win32`` executable is the `official FLAC 1.3.2 32-bit Windows binary `__. +- +-The included ``flac-linux-x86`` and ``flac-linux-x86_64`` executables are built from the `FLAC 1.3.2 source code `__ with `Manylinux `__ to ensure that it's compatible with a wide variety of distributions. +- +-The built FLAC executables should be bit-for-bit reproducible. To rebuild them, run the following inside the project directory on a Debian-like system: +- +-.. code:: bash +- +- # download and extract the FLAC source code +- cd third-party +- sudo apt-get install --yes docker.io +- +- # build FLAC inside the Manylinux i686 Docker image +- tar xf flac-1.3.2.tar.xz +- sudo docker run --tty --interactive --rm --volume "$(pwd):/root" quay.io/pypa/manylinux1_i686:latest bash +- cd /root/flac-1.3.2 +- ./configure LDFLAGS=-static # compiler flags to make a static build +- make +- exit +- cp flac-1.3.2/src/flac/flac ../speech_recognition/flac-linux-x86 && sudo rm -rf flac-1.3.2/ +- +- # build FLAC inside the Manylinux x86_64 Docker image +- tar xf flac-1.3.2.tar.xz +- sudo docker run --tty --interactive --rm --volume "$(pwd):/root" quay.io/pypa/manylinux1_x86_64:latest bash +- cd /root/flac-1.3.2 +- ./configure LDFLAGS=-static # compiler flags to make a static build +- make +- exit +- cp flac-1.3.2/src/flac/flac ../speech_recognition/flac-linux-x86_64 && sudo rm -r flac-1.3.2/ +- +-The included ``flac-mac`` executable is extracted from `xACT 2.39 `__, which is a frontend for FLAC 1.3.2 that conveniently includes binaries for all of its encoders. Specifically, it is a copy of ``xACT 2.39/xACT.app/Contents/Resources/flac`` in ``xACT2.39.zip``. +- + Authors + ------- + +@@ -389,9 +352,3 @@ Copyright 2014-2017 `Anthony Zhang (Uber + SpeechRecognition is made available under the 3-clause BSD license. See ``LICENSE.txt`` in the project's `root directory `__ for more information. + + For convenience, all the official distributions of SpeechRecognition already include a copy of the necessary copyright notices and licenses. In your project, you can simply **say that licensing information for SpeechRecognition can be found within the SpeechRecognition README, and make sure SpeechRecognition is visible to users if they wish to see it**. +- +-SpeechRecognition distributes source code, binaries, and language files from `CMU Sphinx `__. These files are BSD-licensed and redistributable as long as copyright notices are correctly retained. See ``speech_recognition/pocketsphinx-data/*/LICENSE*.txt`` and ``third-party/LICENSE-Sphinx.txt`` for license details for individual parts. +- +-SpeechRecognition distributes source code and binaries from `PyAudio `__. These files are MIT-licensed and redistributable as long as copyright notices are correctly retained. See ``third-party/LICENSE-PyAudio.txt`` for license details. +- +-SpeechRecognition distributes binaries from `FLAC `__ - ``speech_recognition/flac-win32.exe``, ``speech_recognition/flac-linux-x86``, and ``speech_recognition/flac-mac``. These files are GPLv2-licensed and redistributable, as long as the terms of the GPL are satisfied. The FLAC binaries are an `aggregate `__ of `separate programs `__, so these GPL restrictions do not apply to the library or your programs that use the library, only to FLAC itself. See ``LICENSE-FLAC.txt`` for license details. diff --git a/python-SpeechRecognition.changes b/python-SpeechRecognition.changes new file mode 100644 index 0000000..9ffb0ac --- /dev/null +++ b/python-SpeechRecognition.changes @@ -0,0 +1,120 @@ +------------------------------------------------------------------- +Wed Mar 26 00:52:33 UTC 2025 - Steve Kowalik + +- Lowercase metadata directory name. + +------------------------------------------------------------------- +Thu Dec 12 04:26:36 UTC 2024 - Steve Kowalik + +- Update to 3.12.0: + * New features + + Groq Support + + Python 3.13 Support (experimental) + + Support Python 3.11 + + Support Whisper API + + Add recognize_tensorflow + + Add recognize_vosk + + Add recognize_amazon and recognize_assemblyai + + Add recognize_whisper + * Bugfixes & improvements + + Support Python maintenance: Add 3.12, Drop 3.8 + + Updated to the latest OpenAI API changes + + Replace with in-memory stream on recognize_whisper + + Remove prints that shouldn't be printed by default + + Update to speechContext formatting for recognize_google_cloud + + Fix for OSError: [Errno -9988] Stream closed Error + + Add paramater to change profanity filter level for Google Speech + Recognition + + Updating Wit API version (20160526 -> 20170307) + + Google cloud speech library + + Fix large cpu consumption in snowboy detect + + Replace Bing Speech API with Azure Speech API + + Removed duplicate code + + fix recognize_google_cloud + + Specify fp16 parameter for whisper +- Switch to pyproject macros. +- Run at least a portion of the testsuite. +- No more greedy globs in %files. +- Drop patch 406-google-cloud-speech.patch, included upstream. +- Refresh patch fix-readme.patch + +------------------------------------------------------------------- +Thu Jul 14 09:04:00 UTC 2022 - Matej Cepl + +- Actually do what you have promised in changelog. + +------------------------------------------------------------------- +Tue Jul 12 17:48:49 UTC 2022 - Matej Cepl + +- Add 406-google-cloud-speech.patch + (gh#Uberi/speech_recognition#406) to switch dependency from + oauth2client and googleclient to google-cloud-speech. + +------------------------------------------------------------------- +Thu Nov 7 15:51:16 UTC 2019 - Matej Cepl + +- Run through spec-cleaner + +------------------------------------------------------------------- +Tue Dec 4 12:54:35 UTC 2018 - Matej Cepl + +- Remove superfluous devel dependency for noarch package + +------------------------------------------------------------------- +Thu May 3 15:38:29 UTC 2018 - alarrosa@suse.com + +- Run spec_cleaner + +------------------------------------------------------------------- +Thu May 3 09:48:57 UTC 2018 - alarrosa@suse.com + +- Add fix-readme.patch to keep clear that the flac binaries and third-party + source files are removed and are not used by the python-SpeechRecognition + package. + +------------------------------------------------------------------- +Fri Jan 5 09:38:06 UTC 2018 - alarrosa@suse.com + +- Update to version 3.8.1 + * Snowboy hotwords support for highly efficient, performant listening. + This is implemented as the snowboy_configuration parameter of + recognizer_instance.listen. + * Configurable Pocketsphinx models - you can now specify your own acoustic + parameters, language model, and phoneme dictionary, using the language + parameter of recognizer_instance.recognize_sphinx. + * audio_data_instance.get_segment(start_ms=None, end_ms=None) is a new method + that can be called on any AudioData instance to get a segment of the audio + starting at start_ms and ending at end_ms. This is really useful when you + want to get, say, only the first five seconds of some audio. + * The stopper function returned by listen_in_background now accepts one + parameter, wait_for_stop (defaulting to True for backwards compatibility), + which determines whether the function will wait for the background thread + to fully shutdown before returning. One advantage is that if wait_for_stop + is False, you can call the stopper function from any thread! + * New example, demonstrating how to simultaneously listen to and recognize + speech with the threaded producer/consumer pattern: threaded_workers.py. + * Various improvements and bugfixes: + * Python 3 style type annotations in library documentation. + * recognize_google_cloud now uses the v1 rather than the beta API. + * recognize_google_cloud now returns timestamp info when the show_all + parameter is True. + * recognize_bing won't time out as often on credential requests, due to a + longer default timeout. + * recognize_google_cloud timeouts respect + recognizer_instance.operation_timeout now. + * Any recognizers using FLAC audio were broken inside Linux on Docker - + this is now fixed. + * Various documentation and lint fixes. + * Lots of small build system improvements. + +------------------------------------------------------------------- +Wed Nov 15 16:20:51 UTC 2017 - alarrosa@suse.com + +- Added python-SpeechRecognition-common-en-US package to install the + pocketsphinx-data directory in a common place for python2 and python3 + packages. + +------------------------------------------------------------------- +Mon Sep 4 17:54:27 UTC 2017 - alarrosa@suse.com + +- Initial release of python-SpeechRecognition 3.7.1 diff --git a/python-SpeechRecognition.spec b/python-SpeechRecognition.spec new file mode 100644 index 0000000..41021cd --- /dev/null +++ b/python-SpeechRecognition.spec @@ -0,0 +1,115 @@ +# +# spec file for package python-SpeechRecognition +# +# Copyright (c) 2025 SUSE LLC +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via https://bugs.opensuse.org/ +# + + +%define justpython python +Name: python-SpeechRecognition +Version: 3.12.0 +Release: 0 +Summary: Library for performing speech recognition, with support for several engines +# Note: The sources include third party code with different licenses. +# We remove all those before building so it's not installed in the +# generated packages. +License: BSD-3-Clause +URL: https://github.com/Uberi/speech_recognition#readme +Source: https://github.com/Uberi/speech_recognition/archive/%{version}.tar.gz +# Remove information about unbundled libraries. +Patch0: fix-readme.patch +BuildRequires: %{python_module audioop-lts if %python-base >= 3.13} +BuildRequires: %{python_module base >= 3.9} +BuildRequires: %{python_module google-cloud-speech} +BuildRequires: %{python_module pip} +BuildRequires: %{python_module pytest} +BuildRequires: %{python_module setuptools} +BuildRequires: %{python_module standard-aifc if %python-base >= 3.13} +BuildRequires: %{python_module typing-extensions} +BuildRequires: %{python_module wheel} +BuildRequires: fdupes +BuildRequires: flac +BuildRequires: python-rpm-macros +Requires: %{justpython}-SpeechRecognition-common-en-US +Requires: flac +Requires: python-PyAudio +Requires: python-google-cloud-speech +Requires: python-typing-extensions +%if 0%{?python_version_nodots} >= 313 +Requires: python-audioop-lts +Requires: python-standard-aifc +%endif +Recommends: python-pocketsphinx-python +BuildArch: noarch +%python_subpackages + +%description +SpeechRecognition Library for performing speech recognition, with support for several engines and APIs, online and offline. + +The Speech recognition engine/API supports CMU Sphinx (works offline), Google Speech Recognition, +Google Cloud Speech API, Wit.ai, Microsoft Bing Voice Recognition, Houndify API and +IBM Speech to Text + +%package -n python-SpeechRecognition-common-en-US +Summary: Common files for en-US language model support in python-speech_recognition + +%description -n python-SpeechRecognition-common-en-US +SpeechRecognition Library for performing speech recognition, with support for +several engines and APIs, online and offline. + +The Speech recognition engine/API supports CMU Sphinx (works offline), Google +Speech Recognition, Google Cloud Speech API, Wit.ai, Microsoft Bing Voice +Recognition, Houndify API and IBM Speech to Text. + +This package contains the data for en-US language model to be used by +pocketsphinx from python-SpeechRecognition. + +%prep +%autosetup -p1 -n speech_recognition-%{version} + +rm -Rf third-party +rm speech_recognition/flac-* +rm LICENSE-FLAC.txt + +%build +%pyproject_wheel + +%install +%pyproject_install +# Do not ship tests +%python_expand rm -r %{buildroot}%{$python_sitelib}/tests +%python_expand %fdupes %{buildroot}%{$python_sitelib} +mkdir -p %{buildroot}%{_datadir}/speech_recognition +cp -Ra speech_recognition/pocketsphinx-data %{buildroot}%{_datadir}/speech_recognition/ +%python_expand rm -Rf %{buildroot}%{$python_sitelib}/speech_recognition/pocketsphinx-data +%python_expand ln -s %{_datadir}/speech_recognition/pocketsphinx-data %{buildroot}%{$python_sitelib}/speech_recognition/ + +%check +# No internet access for OpenAI or Groq +ignore="--ignore tests/recognizers/test_groq.py --ignore tests/recognizers/test_openai.py" +ignore+=" --ignore tests/test_whisper_recognition.py" +# PocketSphinx is only built for primary Python +%pytest $ignore -k 'not test_sphinx_' + +%files %{python_files} +%license LICENSE.txt +%{python_sitelib}/speech_recognition +%{python_sitelib}/speechrecognition-%{version}.dist-info +%dir %{_datadir}/speech_recognition/ +%dir %{_datadir}/speech_recognition/pocketsphinx-data + +%files -n python-SpeechRecognition-common-en-US +%{_datadir}/speech_recognition/pocketsphinx-data/en-US + +%changelog