From 091bc7660a2356a35c9bb2df811770f4c312b04fb852802b13cfa6808920ef58 Mon Sep 17 00:00:00 2001
From: Matej Cepl <mcepl@suse.com>
Date: Wed, 6 Nov 2024 16:22:16 +0000
Subject: [PATCH] - Update to 16.6.0:   - Fixed an issue where damaged PDFs
 would fail with --redo-ocr.     :issue:`1403`   - Fixed an error that
 prevented JBIG2 optimization on Windows     if the image was optimized in an
 earlier step. :issue:`1396`   - Fixed an error detecting the version of
 unpaper 7.0.0.     :issue:`1409`   - Fixed a performance regression when
 scanning pages.     :issue:`1378`. Thanks @aliemjay.   - Fixed Alpine Docker
 image by enforcing Alpine 3.19. Alpine     3.20 includes a defective version
 of Tesseract OCR and so is     not usable.   - Upgraded Ubuntu Docker image
 to use Ubuntu 24.04.   - Build and test scripts/actions switched to uv.   -
 When running in a container, we now remind the user that     temporary
 folders are inside the container and may not be     accessible.   - Fixed
 Linux test coverage matrix, which was missing some key     versions. - Update
 to 16.5.0:   - Fixed issue with interpreting PDFs that have images with    
 array masks. :issue:`1377`   - Enabled testing on Python 3.13.   - Fixed a
 test that did not work correctly but still passed.     :issue:`1382`   -
 Improved "PDF/A conversion failed" warning message to better     describe
 implications.   - Updated documentation to better explain OCR_JSON_SETTINGS
 in     batch processing.   - Build backend changed from setuptools to
 hatchling.

OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-ocrmypdf?expand=0&rev=2
---
 ocrmypdf-16.0.4.tar.gz  |   3 -
 ocrmypdf-16.6.0.tar.gz  |   3 +
 python-ocrmypdf.changes | 129 ++++++++++++++++++++++++++++++++++++++++
 python-ocrmypdf.spec    |  74 ++++++++++++-----------
 4 files changed, 170 insertions(+), 39 deletions(-)
 delete mode 100644 ocrmypdf-16.0.4.tar.gz
 create mode 100644 ocrmypdf-16.6.0.tar.gz

diff --git a/ocrmypdf-16.0.4.tar.gz b/ocrmypdf-16.0.4.tar.gz
deleted file mode 100644
index beacf48..0000000
--- a/ocrmypdf-16.0.4.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:beddf3156af9057e828004c5054dcc048a22dff4a02628558f3801d9b33fbe54
-size 6718994
diff --git a/ocrmypdf-16.6.0.tar.gz b/ocrmypdf-16.6.0.tar.gz
new file mode 100644
index 0000000..5af96e1
--- /dev/null
+++ b/ocrmypdf-16.6.0.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b29eedf3c4859660b5ac78db8e7383ab1e7752194a7d99ce165e4365bba4232
+size 6689283
diff --git a/python-ocrmypdf.changes b/python-ocrmypdf.changes
index eb12f49..9c1d250 100644
--- a/python-ocrmypdf.changes
+++ b/python-ocrmypdf.changes
@@ -1,3 +1,132 @@
+-------------------------------------------------------------------
+Wed Nov  6 14:57:33 UTC 2024 - Matej Cepl <mcepl@cepl.eu>
+
+- Update to 16.6.0:
+  - Fixed an issue where damaged PDFs would fail with --redo-ocr.
+    :issue:`1403`
+  - Fixed an error that prevented JBIG2 optimization on Windows
+    if the image was optimized in an earlier step. :issue:`1396`
+  - Fixed an error detecting the version of unpaper 7.0.0.
+    :issue:`1409`
+  - Fixed a performance regression when scanning pages.
+    :issue:`1378`. Thanks @aliemjay.
+  - Fixed Alpine Docker image by enforcing Alpine 3.19. Alpine
+    3.20 includes a defective version of Tesseract OCR and so is
+    not usable.
+  - Upgraded Ubuntu Docker image to use Ubuntu 24.04.
+  - Build and test scripts/actions switched to uv.
+  - When running in a container, we now remind the user that
+    temporary folders are inside the container and may not be
+    accessible.
+  - Fixed Linux test coverage matrix, which was missing some key
+    versions.
+- Update to 16.5.0:
+  - Fixed issue with interpreting PDFs that have images with
+    array masks. :issue:`1377`
+  - Enabled testing on Python 3.13.
+  - Fixed a test that did not work correctly but still passed.
+    :issue:`1382`
+  - Improved "PDF/A conversion failed" warning message to better
+    describe implications.
+  - Updated documentation to better explain OCR_JSON_SETTINGS in
+    batch processing.
+  - Build backend changed from setuptools to hatchling.
+- Update to 16.4.3:
+  - Work around pdfminer.six issue where a token on the buffer
+    boundary is incorrectly parsed as two tokens. :issue:`1361`
+  - New rules are applied to stencil masks and explicit masks
+    when calculating the optimal page DPI for rendering.
+    :issue:`1362`
+  - Fixed attempts to use an incompatible jbig2.EXE provided by
+    TeX Live. :issue:`1363`
+- Update to 16.4.2:
+  - Fixed order of filenames passed to Ghostscript for PDF/A
+    generation. :issue:`1359`
+  - Suppressed missing jbig2dec warning message. :issue:`1358`
+  - Fixed calculation of image size when soft mask dimensions
+    don't match image dimension. :issue:`1351`
+  - Several fixes to documentation. Thanks to users Iris and
+    JoKalliauer who contributed these changes.
+  - Fixed error on processing PDFs that are missing certain image
+    metadata. :issue:`1315`
+- Update to 16.4.1:
+  - Fixed calculation of image printed area (used in finding
+    weighted DPI for OCR). :issue:`1334`
+  - Fixed "NotImplementedError: not sure how to get colorspace"
+    error messages in logs which simply records a failure
+    to optimize images with print production colorspaces.
+    :issue:`1315`
+- Update to 16.4.0:
+  - Selecting the osd and equ pseudo-languages with -l/--language
+    now exits with an error when using Tesseract OCR, because
+    these are not regular Tesseract languages but implementation
+    details implemented. Using them can cause Tesseract to crash.
+  - The hOCR renderer is more tolerant of extra whitespace in
+    input files.
+  - watcher.py now changes the output file extension to .pdf when
+    the input is not .pdf.
+  - Improved handling of PDFs that contain circularly referenced
+    Form XObjects. :issue:`1321`
+  - Fixed Alpine Docker image for ARM64, which was not building
+    correctly.
+  - Docker images now use pikepdf 9.0.0.
+  - Prevent use of Tesseract OCR 5.4.0, a version with known
+    regressions.
+  - Disabled progressbar for "Linearizing" when --no-progress-bar
+    set.
+  - Fixed some tests that warn about missing JBIG2 decoding via
+    pikepdf, by installing the necessary libraries during tests.
+- Update to 16.3.1:
+  - Fixed a test suite failure with Ghostscript 10.03.0+.
+    :issue:`1316`
+  - Fixed an issue with the presentation of the "OCR" progress
+    bar. :issue:`1313`
+- Update to 16.3.0:
+  - Fixed progress bar not displaying for Ghostscript PDF/A
+    conversion. :issue:`1313`
+  - Added progress bar for linearization. :issue:`1313`
+  - If --rotate-pages-threshold issued without --rotate-pages we
+    now exit with an error since the user likely intended to use
+    --rotate-pages. :issue:`1309`
+  - If Tesseract hOCR gives an invalid line box, print an error
+    message instead of exiting with an error. :issue:`1312`
+- Update to 16.2.0:
+  - Fixed issue 'NoneType' object has no attribute 'get' when
+    optimizing certain PDFs. :issue:`1293,1271`
+  - Switched formatting from black to ruff.
+  - Added support for sending sidecar output to io.BytesIO.
+  - Added support for converting HEIF/HEIC images (the native
+    image of iPhones and some other devices) to PDFs, when the
+    appropriate pi-hief library is installed. This library is
+    marked as a dependency, but maintainers may opt out if
+    needed.
+  - We now default to downsampling large images that would
+    exceed Tesseract's internal limits, but only if it cause
+    processing to fail. Previously, this behavior only occurred
+    if specifically requested on command line. It can still be
+    configured and disabled. See the --tesseract command line
+    options.
+  - Added Macports install instructions. Thanks @akierig.
+  - Improved logging output when an unexpected error occurs while
+    trying to obtain the version of a third party program.
+- Update to 16.1.2:
+  - Fixed test suite failure when using Ghostscript 10.3.
+  - Other minor corrections.
+- Update to 16.1.1:
+  - Fixed PyPy 3.10 support.
+- Update to 16.1.0:
+  - Improved hOCR renderer is now default for left to right
+    languages.
+  - Improved handling of rotated pages. Previously, OCR text
+    might be missing for pages that were rotated with a /Rotate
+    tag on the page entry.
+  - Improved handling of cropped pages. Previously, in some
+    cases a page with a crop box would not have its OCR applied
+    correctly and misalignment between OCR text and visible text
+    coudl occur.
+  - Documentation improvements, especially installation
+    instructions for less common platforms.
+
 -------------------------------------------------------------------
 Mon Jan  8 15:26:44 UTC 2024 - ecsos <ecsos@opensuse.org>
 
diff --git a/python-ocrmypdf.spec b/python-ocrmypdf.spec
index 12c0373..2be055c 100644
--- a/python-ocrmypdf.spec
+++ b/python-ocrmypdf.spec
@@ -1,7 +1,7 @@
 #
 # spec file for package python-ocrmypdf
 #
-# Copyright (c) 2023 SUSE LLC
+# Copyright (c) 2024 SUSE LLC
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -19,68 +19,68 @@
 %define skip_python39 1
 %{?sle15_python_module_pythons}
 Name:           python-ocrmypdf
-Version:        16.0.4
+Version:        16.6.0
 Release:        0
 Summary:        OCRmyPDF adds an OCR text layer to scanned PDF files
 License:        MPL-2.0
 Group:          Development/Languages/Python
 URL:            https://github.com/ocrmypdf/OCRmyPDF
 Source:         https://files.pythonhosted.org/packages/source/o/ocrmypdf/ocrmypdf-%{version}.tar.gz
-BuildRequires:  python-rpm-macros
-BuildRequires:  %{python_module pip}
 BuildRequires:  %{python_module base >= 3.10}
-BuildRequires:  %{python_module setuptools >= 61}
-BuildRequires:  %{python_module setuptools_scm >= 7.0.5}
+BuildRequires:  %{python_module hatch-vcs}
+BuildRequires:  %{python_module hatchling}
+BuildRequires:  %{python_module pip}
 BuildRequires:  %{python_module wheel}
+BuildRequires:  python-rpm-macros
 # SECTION test requirements
 BuildRequires:  %{python_module deprecation >= 2.1.0}
+BuildRequires:  %{python_module Pillow >= 10.0.1}
+BuildRequires:  %{python_module coverage >= 6.2}
+BuildRequires:  %{python_module hypothesis >= 6.36.0}
 BuildRequires:  %{python_module img2pdf >= 0.5}
 BuildRequires:  %{python_module packaging >= 20}
 BuildRequires:  %{python_module pdfminer.six >= 20220319}
 BuildRequires:  %{python_module pikepdf >= 8.10.1}
-BuildRequires:  %{python_module Pillow >= 10.0.1}
 BuildRequires:  %{python_module pluggy >= 1}
-BuildRequires:  %{python_module reportlab >= 3.6.8}
-BuildRequires:  %{python_module rich >= 13}
-BuildRequires:  %{python_module coverage >= 6.2}
-BuildRequires:  %{python_module hypothesis >= 6.36.0}
 BuildRequires:  %{python_module pytest >= 6.2.5}
 BuildRequires:  %{python_module pytest-cov >= 3.0.0}
 BuildRequires:  %{python_module pytest-xdist >= 2.5.0}
 BuildRequires:  %{python_module python-xmp-toolkit == 2.0.1}
-BuildRequires:  %{python_module types-humanfriendly}
+BuildRequires:  %{python_module reportlab >= 3.6.8}
+BuildRequires:  %{python_module rich >= 13}
 BuildRequires:  %{python_module types-Pillow}
+BuildRequires:  %{python_module types-humanfriendly}
 BuildRequires:  tesseract-ocr >= 5.3.2
 BuildRequires:  tesseract-ocr-traineddata-eng >= 4.1.0
 # upstream use BuildRequires:  ghostscript >= 10.02.1
 BuildRequires:  ghostscript >= 9.55
 # /SECTION
-BuildRequires:    fdupes
-Requires(post):   update-alternatives
+BuildRequires:  fdupes
+Requires(post): update-alternatives
 Requires(postun): update-alternatives
-Requires:         python-deprecation >= 2.1.0
-Requires:         python-img2pdf >= 0.5
-Requires:         python-packaging >= 20
-Requires:         python-pdfminer.six >= 20220319
-Requires:         python-pikepdf >= 8.10.1
-Requires:         python-Pillow >= 10.0.1
-Requires:         python-pluggy >= 1
-Requires:         python-reportlab >= 3.6.8
-Requires:         python-rich >= 13
-Requires:         tesseract-ocr >= 5.3.2
-Requires:         tesseract-ocr-traineddata-eng >= 4.1.0
-Requires:         tesseract-ocr-traineddata-deu >= 4.1.0
+Requires:       python-Pillow >= 10.0.1
+Requires:       python-deprecation >= 2.1.0
+Requires:       python-img2pdf >= 0.5
+Requires:       python-packaging >= 20
+Requires:       python-pdfminer.six >= 20220319
+Requires:       python-pikepdf >= 8.10.1
+Requires:       python-pluggy >= 1
+Requires:       python-reportlab >= 3.6.8
+Requires:       python-rich >= 13
+Requires:       tesseract-ocr >= 5.3.2
+Requires:       tesseract-ocr-traineddata-deu >= 4.1.0
+Requires:       tesseract-ocr-traineddata-eng >= 4.1.0
 # upstream use Requires:  ghostscript >= 10.01.2
-Requires:         ghostscript >= 9.55
-Suggests:         python-sphinx
-Suggests:         python-sphinx-issues
-Suggests:         python-sphinx-rtd-theme
-Suggests:         python-PyMuPDF >= 1.19.1
-Suggests:         python-watchdog >= 1.0.2
-Suggests:         python-typer
-Suggests:         python-python-dotenv
-Suggests:         python-Flask >= 2.0.1
-BuildArch:        noarch
+Requires:       ghostscript >= 9.55
+Suggests:       python-sphinx
+Suggests:       python-sphinx-issues
+Suggests:       python-sphinx-rtd-theme
+Suggests:       python-PyMuPDF >= 1.19.1
+Suggests:       python-watchdog >= 1.0.2
+Suggests:       python-typer
+Suggests:       python-python-dotenv
+Suggests:       python-Flask >= 2.0.1
+BuildArch:      noarch
 %python_subpackages
 
 %description
@@ -137,6 +137,8 @@ donttest+=" or test_tesseract_oem"
 donttest+=" or test_tesseract_thresholding"
 donttest+=" or test_user_words_ocr"
 donttest+=" or test_very_high_dpi"
+# gh#ocrmypdf/OCRmyPDF#1423
+donttest+=" or test_malformed_docinfo"
 %pytest -k "not ($donttest)"
 
 %post