15
0

- Lowercase metadata directory name.

OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-SpeechRecognition?expand=0&rev=16
This commit is contained in:
2025-03-26 00:52:50 +00:00
committed by Git OBS Bridge
commit bfd23e3848
8 changed files with 469 additions and 0 deletions

23
.gitattributes vendored Normal file
View File

@@ -0,0 +1,23 @@
## Default LFS
*.7z filter=lfs diff=lfs merge=lfs -text
*.bsp filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.gem filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.jar filter=lfs diff=lfs merge=lfs -text
*.lz filter=lfs diff=lfs merge=lfs -text
*.lzma filter=lfs diff=lfs merge=lfs -text
*.obscpio filter=lfs diff=lfs merge=lfs -text
*.oxt filter=lfs diff=lfs merge=lfs -text
*.pdf filter=lfs diff=lfs merge=lfs -text
*.png filter=lfs diff=lfs merge=lfs -text
*.rpm filter=lfs diff=lfs merge=lfs -text
*.tbz filter=lfs diff=lfs merge=lfs -text
*.tbz2 filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.ttf filter=lfs diff=lfs merge=lfs -text
*.txz filter=lfs diff=lfs merge=lfs -text
*.whl filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
.osc

3
3.12.0.tar.gz Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d19bcb489386f494b64acb37299c94d75f31598d4ede7bf0339f001d7a9ca57a
size 125433150

3
3.8.1.tar.gz Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:82d3313db383409ddaf3e42625fb0c3518231a1feb5e2ed5473b10b3d5ece7bd
size 125426650

View File

@@ -0,0 +1,138 @@
From ca422f49dcd10c7f2d62972432be2a24a9cbd167 Mon Sep 17 00:00:00 2001
From: Francis Tseng <f+accounts@frnsys.com>
Date: Tue, 12 Mar 2019 10:22:55 -0400
Subject: [PATCH 1/2] switched to google-cloud-speech library (other one is
deprecated)
---
README.rst | 8 +---
speech_recognition/__init__.py | 74 ++++++++++++++++++++---------------------
2 files changed, 40 insertions(+), 42 deletions(-)
--- a/README.rst
+++ b/README.rst
@@ -128,16 +128,14 @@ Note that the versions available in most
See `Notes on using PocketSphinx <https://github.com/Uberi/speech_recognition/blob/master/reference/pocketsphinx.rst>`__ for information about installing languages, compiling PocketSphinx, and building language packs from online resources. This document is also included under ``reference/pocketsphinx.rst``.
-Google API Client Library for Python (for Google Cloud Speech API users)
+Google Cloud Speech Library for Python (for Google Cloud Speech API users)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-`Google API Client Library for Python <https://developers.google.com/api-client-library/python/>`__ is required if and only if you want to use the Google Cloud Speech API (``recognizer_instance.recognize_google_cloud``).
+`Google Cloud Speech library for Python <https://cloud.google.com/speech-to-text/docs/quickstart>`__ is required if and only if you want to use the Google Cloud Speech API (``recognizer_instance.recognize_google_cloud``).
If not installed, everything in the library will still work, except calling ``recognizer_instance.recognize_google_cloud`` will raise an ``RequestError``.
-According to the `official installation instructions <https://developers.google.com/api-client-library/python/start/installation>`__, the recommended way to install this is using `Pip <https://pip.readthedocs.org/>`__: execute ``pip install google-api-python-client`` (replace ``pip`` with ``pip3`` if using Python 3).
-
-Alternatively, you can perform the installation completely offline from the source archives under the ``./third-party/Source code for Google API Client Library for Python and its dependencies/`` directory.
+According to the `official installation instructions <https://cloud.google.com/speech-to-text/docs/quickstart>`__, the recommended way to install this is using `Pip <https://pip.readthedocs.org/>`__: execute ``pip install google-cloud-speech`` (replace ``pip`` with ``pip3`` if using Python 3).
FLAC (for some systems)
~~~~~~~~~~~~~~~~~~~~~~~
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -821,6 +821,8 @@ class Recognizer(AudioSource):
"""
assert isinstance(audio_data, AudioData), "``audio_data`` must be audio data"
assert key is None or isinstance(key, str), "``key`` must be ``None`` or a string"
+ if credentials_json is None:
+ assert os.environ.get('GOOGLE_APPLICATION_CREDENTIALS') is not None
assert isinstance(language, str), "``language`` must be a string"
flac_data = audio_data.get_flac_data(
@@ -887,58 +889,56 @@ class Recognizer(AudioSource):
assert isinstance(language, str), "``language`` must be a string"
assert preferred_phrases is None or all(isinstance(preferred_phrases, (type(""), type(u""))) for preferred_phrases in preferred_phrases), "``preferred_phrases`` must be a list of strings"
- # See https://cloud.google.com/speech/reference/rest/v1/RecognitionConfig
+ try:
+ from google.cloud import speech
+ from google.cloud.speech import enums
+ from google.cloud.speech import types
+ from google.api_core.exceptions import GoogleAPICallError
+ except ImportError:
+ raise RequestError('missing google-cloud-speech module: ensure that google-cloud-speech is set up correctly.')
+
+ if credentials_json is not None:
+ client = speech.SpeechClient.from_service_account_json(credentials_json)
+ else:
+ client = speech.SpeechClient()
+
flac_data = audio_data.get_flac_data(
convert_rate=None if 8000 <= audio_data.sample_rate <= 48000 else max(8000, min(audio_data.sample_rate, 48000)), # audio sample rate must be between 8 kHz and 48 kHz inclusive - clamp sample rate into this range
convert_width=2 # audio samples must be 16-bit
)
+ audio = types.RecognitionAudio(content=flac_data)
- try:
- from oauth2client.client import GoogleCredentials
- from googleapiclient.discovery import build
- import googleapiclient.errors
-
- # cannot simply use 'http = httplib2.Http(timeout=self.operation_timeout)'
- # because discovery.build() says 'Arguments http and credentials are mutually exclusive'
- import socket
- import googleapiclient.http
- if self.operation_timeout and socket.getdefaulttimeout() is None:
- # override constant (used by googleapiclient.http.build_http())
- googleapiclient.http.DEFAULT_HTTP_TIMEOUT_SEC = self.operation_timeout
-
- if credentials_json is None:
- api_credentials = GoogleCredentials.get_application_default()
- else:
- # the credentials can only be read from a file, so we'll make a temp file and write in the contents to work around that
- with PortableNamedTemporaryFile("w") as f:
- f.write(credentials_json)
- f.flush()
- api_credentials = GoogleCredentials.from_stream(f.name)
-
- speech_service = build("speech", "v1", credentials=api_credentials, cache_discovery=False)
- except ImportError:
- raise RequestError("missing google-api-python-client module: ensure that google-api-python-client is set up correctly.")
-
- speech_config = {"encoding": "FLAC", "sampleRateHertz": audio_data.sample_rate, "languageCode": language}
+ config = {
+ 'encoding': enums.RecognitionConfig.AudioEncoding.FLAC,
+ 'sample_rate_hertz': audio_data.sample_rate,
+ 'language_code': language
+ }
if preferred_phrases is not None:
- speech_config["speechContext"] = {"phrases": preferred_phrases}
+ config['speechContexts'] = [types.SpeechContext(
+ phrases=preferred_phrases
+ )]
if show_all:
- speech_config["enableWordTimeOffsets"] = True # some useful extra options for when we want all the output
- request = speech_service.speech().recognize(body={"audio": {"content": base64.b64encode(flac_data).decode("utf8")}, "config": speech_config})
+ config['enableWordTimeOffsets'] = True # some useful extra options for when we want all the output
+
+ opts = {}
+ if self.operation_timeout and socket.getdefaulttimeout() is None:
+ opts['timeout'] = self.operation_timeout
+
+ config = types.RecognitionConfig(**config)
try:
- response = request.execute()
- except googleapiclient.errors.HttpError as e:
+ response = client.recognize(config, audio, **opts)
+ except GoogleAPICallError as e:
raise RequestError(e)
except URLError as e:
raise RequestError("recognition connection failed: {0}".format(e.reason))
if show_all: return response
- if "results" not in response or len(response["results"]) == 0: raise UnknownValueError()
- transcript = ""
- for result in response["results"]:
- transcript += result["alternatives"][0]["transcript"].strip() + " "
+ if len(response.results) == 0: raise UnknownValueError()
+ transcript = ''
+ for result in response.results:
+ transcript += result.alternatives[0].transcript.strip() + ' '
return transcript
def recognize_wit(self, audio_data, key, show_all=False):

66
fix-readme.patch Normal file
View File

@@ -0,0 +1,66 @@
Index: speech_recognition-3.12.0/README.rst
===================================================================
--- speech_recognition-3.12.0.orig/README.rst
+++ speech_recognition-3.12.0/README.rst
@@ -283,9 +283,7 @@ To hack on this library, first make sure
- Most of the library code lives in ``speech_recognition/__init__.py``.
- Examples live under the ``examples/`` `directory <https://github.com/Uberi/speech_recognition/tree/master/examples>`__, and the demo script lives in ``speech_recognition/__main__.py``.
-- The FLAC encoder binaries are in the ``speech_recognition/`` `directory <https://github.com/Uberi/speech_recognition/tree/master/speech_recognition>`__.
- Documentation can be found in the ``reference/`` `directory <https://github.com/Uberi/speech_recognition/tree/master/reference>`__.
-- Third-party libraries, utilities, and reference material are in the ``third-party/`` `directory <https://github.com/Uberi/speech_recognition/tree/master/third-party>`__.
To install/reinstall the library locally, run ``python -m pip install -e .[dev]`` in the project `root directory <https://github.com/Uberi/speech_recognition>`__.
@@ -316,41 +314,6 @@ To ensure RST is well-formed:
Testing is also done automatically by GitHub Actions, upon every push.
-FLAC Executables
-~~~~~~~~~~~~~~~~
-
-The included ``flac-win32`` executable is the `official FLAC 1.3.2 32-bit Windows binary <http://downloads.xiph.org/releases/flac/flac-1.3.2-win.zip>`__.
-
-The included ``flac-linux-x86`` and ``flac-linux-x86_64`` executables are built from the `FLAC 1.3.2 source code <http://downloads.xiph.org/releases/flac/flac-1.3.2.tar.xz>`__ with `Manylinux <https://github.com/pypa/manylinux>`__ to ensure that it's compatible with a wide variety of distributions.
-
-The built FLAC executables should be bit-for-bit reproducible. To rebuild them, run the following inside the project directory on a Debian-like system:
-
-.. code:: bash
-
- # download and extract the FLAC source code
- cd third-party
- sudo apt-get install --yes docker.io
-
- # build FLAC inside the Manylinux i686 Docker image
- tar xf flac-1.3.2.tar.xz
- sudo docker run --tty --interactive --rm --volume "$(pwd):/root" quay.io/pypa/manylinux1_i686:latest bash
- cd /root/flac-1.3.2
- ./configure LDFLAGS=-static # compiler flags to make a static build
- make
- exit
- cp flac-1.3.2/src/flac/flac ../speech_recognition/flac-linux-x86 && sudo rm -rf flac-1.3.2/
-
- # build FLAC inside the Manylinux x86_64 Docker image
- tar xf flac-1.3.2.tar.xz
- sudo docker run --tty --interactive --rm --volume "$(pwd):/root" quay.io/pypa/manylinux1_x86_64:latest bash
- cd /root/flac-1.3.2
- ./configure LDFLAGS=-static # compiler flags to make a static build
- make
- exit
- cp flac-1.3.2/src/flac/flac ../speech_recognition/flac-linux-x86_64 && sudo rm -r flac-1.3.2/
-
-The included ``flac-mac`` executable is extracted from `xACT 2.39 <http://xact.scottcbrown.org/>`__, which is a frontend for FLAC 1.3.2 that conveniently includes binaries for all of its encoders. Specifically, it is a copy of ``xACT 2.39/xACT.app/Contents/Resources/flac`` in ``xACT2.39.zip``.
-
Authors
-------
@@ -389,9 +352,3 @@ Copyright 2014-2017 `Anthony Zhang (Uber
SpeechRecognition is made available under the 3-clause BSD license. See ``LICENSE.txt`` in the project's `root directory <https://github.com/Uberi/speech_recognition>`__ for more information.
For convenience, all the official distributions of SpeechRecognition already include a copy of the necessary copyright notices and licenses. In your project, you can simply **say that licensing information for SpeechRecognition can be found within the SpeechRecognition README, and make sure SpeechRecognition is visible to users if they wish to see it**.
-
-SpeechRecognition distributes source code, binaries, and language files from `CMU Sphinx <http://cmusphinx.sourceforge.net/>`__. These files are BSD-licensed and redistributable as long as copyright notices are correctly retained. See ``speech_recognition/pocketsphinx-data/*/LICENSE*.txt`` and ``third-party/LICENSE-Sphinx.txt`` for license details for individual parts.
-
-SpeechRecognition distributes source code and binaries from `PyAudio <http://people.csail.mit.edu/hubert/pyaudio/>`__. These files are MIT-licensed and redistributable as long as copyright notices are correctly retained. See ``third-party/LICENSE-PyAudio.txt`` for license details.
-
-SpeechRecognition distributes binaries from `FLAC <https://xiph.org/flac/>`__ - ``speech_recognition/flac-win32.exe``, ``speech_recognition/flac-linux-x86``, and ``speech_recognition/flac-mac``. These files are GPLv2-licensed and redistributable, as long as the terms of the GPL are satisfied. The FLAC binaries are an `aggregate <https://www.gnu.org/licenses/gpl-faq.html#MereAggregation>`__ of `separate programs <https://www.gnu.org/licenses/gpl-faq.html#NFUseGPLPlugins>`__, so these GPL restrictions do not apply to the library or your programs that use the library, only to FLAC itself. See ``LICENSE-FLAC.txt`` for license details.

View File

@@ -0,0 +1,120 @@
-------------------------------------------------------------------
Wed Mar 26 00:52:33 UTC 2025 - Steve Kowalik <steven.kowalik@suse.com>
- Lowercase metadata directory name.
-------------------------------------------------------------------
Thu Dec 12 04:26:36 UTC 2024 - Steve Kowalik <steven.kowalik@suse.com>
- Update to 3.12.0:
* New features
+ Groq Support
+ Python 3.13 Support (experimental)
+ Support Python 3.11
+ Support Whisper API
+ Add recognize_tensorflow
+ Add recognize_vosk
+ Add recognize_amazon and recognize_assemblyai
+ Add recognize_whisper
* Bugfixes & improvements
+ Support Python maintenance: Add 3.12, Drop 3.8
+ Updated to the latest OpenAI API changes
+ Replace with in-memory stream on recognize_whisper
+ Remove prints that shouldn't be printed by default
+ Update to speechContext formatting for recognize_google_cloud
+ Fix for OSError: [Errno -9988] Stream closed Error
+ Add paramater to change profanity filter level for Google Speech
Recognition
+ Updating Wit API version (20160526 -> 20170307)
+ Google cloud speech library
+ Fix large cpu consumption in snowboy detect
+ Replace Bing Speech API with Azure Speech API
+ Removed duplicate code
+ fix recognize_google_cloud
+ Specify fp16 parameter for whisper
- Switch to pyproject macros.
- Run at least a portion of the testsuite.
- No more greedy globs in %files.
- Drop patch 406-google-cloud-speech.patch, included upstream.
- Refresh patch fix-readme.patch
-------------------------------------------------------------------
Thu Jul 14 09:04:00 UTC 2022 - Matej Cepl <mcepl@suse.com>
- Actually do what you have promised in changelog.
-------------------------------------------------------------------
Tue Jul 12 17:48:49 UTC 2022 - Matej Cepl <mcepl@suse.com>
- Add 406-google-cloud-speech.patch
(gh#Uberi/speech_recognition#406) to switch dependency from
oauth2client and googleclient to google-cloud-speech.
-------------------------------------------------------------------
Thu Nov 7 15:51:16 UTC 2019 - Matej Cepl <mcepl@suse.com>
- Run through spec-cleaner
-------------------------------------------------------------------
Tue Dec 4 12:54:35 UTC 2018 - Matej Cepl <mcepl@suse.com>
- Remove superfluous devel dependency for noarch package
-------------------------------------------------------------------
Thu May 3 15:38:29 UTC 2018 - alarrosa@suse.com
- Run spec_cleaner
-------------------------------------------------------------------
Thu May 3 09:48:57 UTC 2018 - alarrosa@suse.com
- Add fix-readme.patch to keep clear that the flac binaries and third-party
source files are removed and are not used by the python-SpeechRecognition
package.
-------------------------------------------------------------------
Fri Jan 5 09:38:06 UTC 2018 - alarrosa@suse.com
- Update to version 3.8.1
* Snowboy hotwords support for highly efficient, performant listening.
This is implemented as the snowboy_configuration parameter of
recognizer_instance.listen.
* Configurable Pocketsphinx models - you can now specify your own acoustic
parameters, language model, and phoneme dictionary, using the language
parameter of recognizer_instance.recognize_sphinx.
* audio_data_instance.get_segment(start_ms=None, end_ms=None) is a new method
that can be called on any AudioData instance to get a segment of the audio
starting at start_ms and ending at end_ms. This is really useful when you
want to get, say, only the first five seconds of some audio.
* The stopper function returned by listen_in_background now accepts one
parameter, wait_for_stop (defaulting to True for backwards compatibility),
which determines whether the function will wait for the background thread
to fully shutdown before returning. One advantage is that if wait_for_stop
is False, you can call the stopper function from any thread!
* New example, demonstrating how to simultaneously listen to and recognize
speech with the threaded producer/consumer pattern: threaded_workers.py.
* Various improvements and bugfixes:
* Python 3 style type annotations in library documentation.
* recognize_google_cloud now uses the v1 rather than the beta API.
* recognize_google_cloud now returns timestamp info when the show_all
parameter is True.
* recognize_bing won't time out as often on credential requests, due to a
longer default timeout.
* recognize_google_cloud timeouts respect
recognizer_instance.operation_timeout now.
* Any recognizers using FLAC audio were broken inside Linux on Docker -
this is now fixed.
* Various documentation and lint fixes.
* Lots of small build system improvements.
-------------------------------------------------------------------
Wed Nov 15 16:20:51 UTC 2017 - alarrosa@suse.com
- Added python-SpeechRecognition-common-en-US package to install the
pocketsphinx-data directory in a common place for python2 and python3
packages.
-------------------------------------------------------------------
Mon Sep 4 17:54:27 UTC 2017 - alarrosa@suse.com
- Initial release of python-SpeechRecognition 3.7.1

View File

@@ -0,0 +1,115 @@
#
# spec file for package python-SpeechRecognition
#
# Copyright (c) 2025 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
# upon. The license for this file, and modifications and additions to the
# file, is the same license as for the pristine package itself (unless the
# license for the pristine package is not an Open Source License, in which
# case the license is the MIT License). An "Open Source License" is a
# license that conforms to the Open Source Definition (Version 1.9)
# published by the Open Source Initiative.
# Please submit bugfixes or comments via https://bugs.opensuse.org/
#
%define justpython python
Name: python-SpeechRecognition
Version: 3.12.0
Release: 0
Summary: Library for performing speech recognition, with support for several engines
# Note: The sources include third party code with different licenses.
# We remove all those before building so it's not installed in the
# generated packages.
License: BSD-3-Clause
URL: https://github.com/Uberi/speech_recognition#readme
Source: https://github.com/Uberi/speech_recognition/archive/%{version}.tar.gz
# Remove information about unbundled libraries.
Patch0: fix-readme.patch
BuildRequires: %{python_module audioop-lts if %python-base >= 3.13}
BuildRequires: %{python_module base >= 3.9}
BuildRequires: %{python_module google-cloud-speech}
BuildRequires: %{python_module pip}
BuildRequires: %{python_module pytest}
BuildRequires: %{python_module setuptools}
BuildRequires: %{python_module standard-aifc if %python-base >= 3.13}
BuildRequires: %{python_module typing-extensions}
BuildRequires: %{python_module wheel}
BuildRequires: fdupes
BuildRequires: flac
BuildRequires: python-rpm-macros
Requires: %{justpython}-SpeechRecognition-common-en-US
Requires: flac
Requires: python-PyAudio
Requires: python-google-cloud-speech
Requires: python-typing-extensions
%if 0%{?python_version_nodots} >= 313
Requires: python-audioop-lts
Requires: python-standard-aifc
%endif
Recommends: python-pocketsphinx-python
BuildArch: noarch
%python_subpackages
%description
SpeechRecognition Library for performing speech recognition, with support for several engines and APIs, online and offline.
The Speech recognition engine/API supports CMU Sphinx (works offline), Google Speech Recognition,
Google Cloud Speech API, Wit.ai, Microsoft Bing Voice Recognition, Houndify API and
IBM Speech to Text
%package -n python-SpeechRecognition-common-en-US
Summary: Common files for en-US language model support in python-speech_recognition
%description -n python-SpeechRecognition-common-en-US
SpeechRecognition Library for performing speech recognition, with support for
several engines and APIs, online and offline.
The Speech recognition engine/API supports CMU Sphinx (works offline), Google
Speech Recognition, Google Cloud Speech API, Wit.ai, Microsoft Bing Voice
Recognition, Houndify API and IBM Speech to Text.
This package contains the data for en-US language model to be used by
pocketsphinx from python-SpeechRecognition.
%prep
%autosetup -p1 -n speech_recognition-%{version}
rm -Rf third-party
rm speech_recognition/flac-*
rm LICENSE-FLAC.txt
%build
%pyproject_wheel
%install
%pyproject_install
# Do not ship tests
%python_expand rm -r %{buildroot}%{$python_sitelib}/tests
%python_expand %fdupes %{buildroot}%{$python_sitelib}
mkdir -p %{buildroot}%{_datadir}/speech_recognition
cp -Ra speech_recognition/pocketsphinx-data %{buildroot}%{_datadir}/speech_recognition/
%python_expand rm -Rf %{buildroot}%{$python_sitelib}/speech_recognition/pocketsphinx-data
%python_expand ln -s %{_datadir}/speech_recognition/pocketsphinx-data %{buildroot}%{$python_sitelib}/speech_recognition/
%check
# No internet access for OpenAI or Groq
ignore="--ignore tests/recognizers/test_groq.py --ignore tests/recognizers/test_openai.py"
ignore+=" --ignore tests/test_whisper_recognition.py"
# PocketSphinx is only built for primary Python
%pytest $ignore -k 'not test_sphinx_'
%files %{python_files}
%license LICENSE.txt
%{python_sitelib}/speech_recognition
%{python_sitelib}/speechrecognition-%{version}.dist-info
%dir %{_datadir}/speech_recognition/
%dir %{_datadir}/speech_recognition/pocketsphinx-data
%files -n python-SpeechRecognition-common-en-US
%{_datadir}/speech_recognition/pocketsphinx-data/en-US
%changelog