From ba4b4d4ff7b8c88040392d3e0b6c106e2906169812a6f4e5066836a813d83948 Mon Sep 17 00:00:00 2001 From: Steve Kowalik Date: Tue, 7 Jan 2025 03:51:26 +0000 Subject: [PATCH] - Skip test cases that raise TesseractNotFoundError. - Switch to pyproject macros. - Correct Requires, we need packaging not pkg_resources. OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-pytesseract?expand=0&rev=27 --- .gitattributes | 23 ++++++++ .gitignore | 1 + python-pytesseract.changes | 116 +++++++++++++++++++++++++++++++++++++ python-pytesseract.spec | 93 +++++++++++++++++++++++++++++ v0.3.13.tar.gz | 3 + 5 files changed, 236 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 python-pytesseract.changes create mode 100644 python-pytesseract.spec create mode 100644 v0.3.13.tar.gz diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9b03811 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,23 @@ +## Default LFS +*.7z filter=lfs diff=lfs merge=lfs -text +*.bsp filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.gem filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.jar filter=lfs diff=lfs merge=lfs -text +*.lz filter=lfs diff=lfs merge=lfs -text +*.lzma filter=lfs diff=lfs merge=lfs -text +*.obscpio filter=lfs diff=lfs merge=lfs -text +*.oxt filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.rpm filter=lfs diff=lfs merge=lfs -text +*.tbz filter=lfs diff=lfs merge=lfs -text +*.tbz2 filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.txz filter=lfs diff=lfs merge=lfs -text +*.whl filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..57affb6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.osc diff --git a/python-pytesseract.changes b/python-pytesseract.changes new file mode 100644 index 0000000..aaa5ad1 --- /dev/null +++ b/python-pytesseract.changes @@ -0,0 +1,116 @@ +------------------------------------------------------------------- +Tue Jan 7 03:50:15 UTC 2025 - Steve Kowalik + +- Skip test cases that raise TesseractNotFoundError. +- Switch to pyproject macros. +- Correct Requires, we need packaging not pkg_resources. + +------------------------------------------------------------------- +Thu Dec 14 20:54:57 UTC 2023 - Dirk Müller + +- update to 0.3.13: + * Add support for Python 3.12 +- update to 0.3.12: + * New functionality `run_and_get_multiple_output` #511 +- update to 0.3.11: + * Fix for config parsing on Windows (#356 and #501) + * Fixes for the default hocr and boxing configs (#106 and #454) + * get_tesseract_version caching is optional and disabled by + default for the user (#411) + * Various CI upgrades + +------------------------------------------------------------------- +Mon Feb 6 10:33:46 UTC 2023 - Daniel Garcia + +- Update to v0.3.10 + * Fix image_to_osd regression (reported by @klavdijS) +- v0.3.9 + * Support only python 3.7+ (3.6 is End of Life) + * Switch from deprecated distutils version parsing to packaging.version + * Add build system requirements in order for pip to properly install Pillow +- v0.3.8 + * Header handling in file_to_dict function by @igor-ma + * Proper raising behavior in case of timeout by @amenezes + * __version__ attribute support by @amenezes + * Couple of bug fixes + +------------------------------------------------------------------- +Mon Mar 1 02:53:00 UTC 2021 - John Vandenberg + +- Update license to Apache-2.0 +- Update to v0.3.7 + * no upstream changelog + +------------------------------------------------------------------- +Tue May 5 22:00:53 UTC 2020 - Matej Cepl + +- Update to 0.3.4: + - Support for WebP images + - Support for python 3.8 (CI testing) + - Improved cli error reporting +- Don't use %python3_only command, but properly use alternatives. + +------------------------------------------------------------------- +Mon Mar 23 09:25:20 UTC 2020 - pgajdos@suse.com + +- version update to 0.3.3 + * no upstream changelog + +------------------------------------------------------------------- +Tue Sep 10 11:35:38 UTC 2019 - Tomáš Chvátal + +- Update to 0.3.0: + * no upstream changelog + +------------------------------------------------------------------- +Mon Jul 22 13:31:45 UTC 2019 - Tomáš Chvátal + +- Update to 0.2.7: + * no upstream changelog + +------------------------------------------------------------------- +Tue May 14 21:19:51 UTC 2019 - John Jolly + +- Update to 0.2.6 + + No upstream changelog + +------------------------------------------------------------------- +Tue Dec 4 12:52:56 UTC 2018 - Matej Cepl + +- Remove superfluous devel dependency for noarch package + +------------------------------------------------------------------- +Sun Jul 29 12:17:05 UTC 2018 - jengelh@inai.de + +- Fix some grammar issues and replace future plans by current state. + +------------------------------------------------------------------- +Thu May 24 17:39:43 UTC 2018 - toddrme2178@gmail.com + +- Update to 0.2.0 + * Convert image to RGB mode in order to save as PNG +- Update to 0.1.9 + * Preserve source image extension and metadata info + * Don't delete every file in current directory if the temp_name is not populated + * Remove enum dependency, fix bug with missing text in last line + * Support for different output types + * Added verbose option that returns detailed output from tesseract run +- Update to 0.1.8 + * Add initial support for numpy arrays/opencv images + * Improved method to discard alpha channel + * Add optional nice agrument for runing tesseract with different priority + * fix python 3 byte string bug +- spec file cleanups + +------------------------------------------------------------------- +Wed Oct 18 16:46:49 UTC 2017 - toddrme2178@gmail.com + +- Implement single-spec version +- Update to version 0.1.7 + * No changelog + +------------------------------------------------------------------- +Tue Jun 10 21:31:13 UTC 2014 - jnweiger@gmail.com + +- pull from pypi. needed by testipy + diff --git a/python-pytesseract.spec b/python-pytesseract.spec new file mode 100644 index 0000000..0855ceb --- /dev/null +++ b/python-pytesseract.spec @@ -0,0 +1,93 @@ +# +# spec file for package python-pytesseract +# +# Copyright (c) 2025 SUSE LLC +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via https://bugs.opensuse.org/ +# + + +Name: python-pytesseract +Version: 0.3.13 +Release: 0 +Summary: Python wrapper for Google's Tesseract-OCR +License: Apache-2.0 +URL: https://github.com/madmaze/python-tesseract +# https://github.com/madmaze/pytesseract/issues/262 +Source: https://github.com/madmaze/pytesseract/archive/v%{version}.tar.gz +BuildRequires: %{python_module pip} +BuildRequires: %{python_module setuptools} +BuildRequires: %{python_module wheel} +BuildRequires: fdupes +BuildRequires: python-rpm-macros +Requires: python-Pillow +Requires: python-packaging +Requires: tesseract-ocr-traineddata-deu +Requires: tesseract-ocr-traineddata-eng +Requires: pkgconfig(tesseract) +BuildArch: noarch +# SECTION test requirements +BuildRequires: %{python_module Pillow} +BuildRequires: %{python_module pytest} +BuildRequires: tesseract-ocr-traineddata-deu +BuildRequires: tesseract-ocr-traineddata-eng +BuildRequires: tesseract-ocr-traineddata-fra +BuildRequires: tesseract-ocr-traineddata-orientation_and_script_detection +BuildRequires: pkgconfig(tesseract) +# /SECTION +Requires(post): update-alternatives +Requires(postun): update-alternatives +%python_subpackages + +%description +Python-tesseract is an optical character recognition (OCR) tool for Python, +that is, it will recognize and "read" the text embedded in images. + +Python-tesseract is a wrapper for Google's Tesseract-OCR Engine. It can be used +as a stand-alone invocation script to tesseract, as it can read all image types +supported by the Python Imaging Library, including JPEG, PNG, GIF, BMP, TIFF, +and others, whereas tesseract-ocr, by default, only supports TIFF and BMP. +Additionally, if used as a script, python-tesseract will print the recognized +text instead of writing it to a file. There is no support for confidence estimates and +bounding box data is planned for future releases. + +%prep +%setup -q -n pytesseract-%{version} +sed -i -e '/^#!\//, 1d' pytesseract/pytesseract.py + +%build +%pyproject_wheel + +%install +%pyproject_install +%python_clone -a %{buildroot}%{_bindir}/pytesseract +%python_expand %fdupes %{buildroot}%{$python_sitelib} + +%check +export TESSDATA_PREFIX=%{_datadir}/tessdata/ +# Raises TesseractNotFoundError +%pytest -k 'not test_get_languages' + +%post +%python_install_alternative pytesseract + +%postun +%python_uninstall_alternative pytesseract + +%files %{python_files} +%doc README.rst +%license LICENSE +%python_alternative %{_bindir}/pytesseract +%{python_sitelib}/pytesseract +%{python_sitelib}/pytesseract-%{version}.dist-info + +%changelog diff --git a/v0.3.13.tar.gz b/v0.3.13.tar.gz new file mode 100644 index 0000000..bd25711 --- /dev/null +++ b/v0.3.13.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:454ca16dc7fa59aa9c8ba42500992531773fbdc04caef1b39611755cae9f34bf +size 1099109