Accepting request 1132937 from home:jonapap

- Update to 20221105 - Option to disable boxes flow layout analysis when using pdf2txt - Add support for PDF 2.0 (ISO 32000-2) AES-256 encryption - Support for Paeth PNG filter compression (predictor value = 4) - Type annotations - Export type annotations from pypi package per PEP561 - Support for identity cmap's - Add support for PDF page labels - Installation of Pillow as an optional extra dependency - Exporting images without any specific encoding - Output converter for the hOCR format - Font name aliases for Arial, Courier New and Times New Roman - Documentation on why special characters can sometimes not be extracted - Remove patch python-pdfminer.six-remove-nose.patch - Update dependencies OBS-URL: https://build.opensuse.org/request/show/1132937 OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-pdfminer.six?expand=0&rev=12
2023-12-14 09:40:54 +00:00
parent f4809546ff
commit aa03b1dd03
6 changed files with 44 additions and 980 deletions
--- a/import-from-non-pythonpath-files.patch
+++ b/import-from-non-pythonpath-files.patch
@@ -1,36 +1,34 @@
-Index: pdfminer.six-20200726/tests/test_tools_dumppdf.py
-===================================================================
--- pdfminer.six-20200726.orig/tests/test_tools_dumppdf.py
-+++ pdfminer.six-20200726/tests/test_tools_dumppdf.py
-@@ -5,8 +5,11 @@ from tempfile import NamedTemporaryFile
+diff '--color=auto' -rub pdfminer.six-20221105.orig/tests/test_tools_dumppdf.py pdfminer.six-20221105/tests/test_tools_dumppdf.py
+--- pdfminer.six-20221105.orig/tests/test_tools_dumppdf.py	2022-11-05 12:22:08.000000000 -0400
+++ pdfminer.six-20221105/tests/test_tools_dumppdf.py	2023-12-11 12:12:06.044210731 -0500
+@@ -4,7 +4,11 @@
 
 from helpers import absolute_sample_path
- from pdfminer.pdfdocument import PDFNoValidXRefWarning
+ from tempfilepath import TemporaryFilePath
 -from tools import dumppdf
- 
+ 
 +import importlib.util
 +spec = importlib.util.spec_from_file_location("dumppdf", "tools/dumppdf.py")
 +dumppdf = importlib.util.module_from_spec(spec)
 +spec.loader.exec_module(dumppdf)
 
+ 
 def run(filename, options=None):
-     absolute_path = absolute_sample_path(filename)
-Index: pdfminer.six-20200726/tests/test_tools_pdf2txt.py
-===================================================================
--- pdfminer.six-20200726.orig/tests/test_tools_pdf2txt.py
-+++ pdfminer.six-20200726/tests/test_tools_pdf2txt.py
-@@ -4,9 +4,13 @@ import os
- from shutil import rmtree
- from tempfile import NamedTemporaryFile, mkdtemp
+diff '--color=auto' -rub pdfminer.six-20221105.orig/tests/test_tools_pdf2txt.py pdfminer.six-20221105/tests/test_tools_pdf2txt.py
+--- pdfminer.six-20221105.orig/tests/test_tools_pdf2txt.py	2022-11-05 12:22:08.000000000 -0400
+++ pdfminer.six-20221105/tests/test_tools_pdf2txt.py	2023-12-11 12:12:40.848031179 -0500
+@@ -3,10 +3,13 @@
+ from tempfile import mkdtemp
+ import filecmp
 
 -import tools.pdf2txt as pdf2txt
 from helpers import absolute_sample_path
+ from tempfilepath import TemporaryFilePath
 
 +import importlib.util
 +spec = importlib.util.spec_from_file_location("pdf2txt", "tools/pdf2txt.py")
 +pdf2txt = importlib.util.module_from_spec(spec)
 +spec.loader.exec_module(pdf2txt)
-+
 
 def run(sample_path, options=None):
     absolute_path = absolute_sample_path(sample_path)
--- a/pdfminer.six-20200726.tar.gz
+++ b/pdfminer.six-20200726.tar.gz
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:77f0b6953f36aeeeb45ab959fabd8dfc964b7926676d5df3ac2f949cd4d524a3
-size 10260419
--- a/pdfminer.six-20221105.tar.gz
+++ b/pdfminer.six-20221105.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:820242f661589edb1ec8e110423a7cd06d776c54d9a0efdef17d3a4e61c01fa7
+size 10857730
--- a/python-pdfminer.six-remove-nose.patch
+++ b/python-pdfminer.six-remove-nose.patch
@@ -1,951 +0,0 @@
-Index: pdfminer.six-20200726/Makefile
-===================================================================
--- pdfminer.six-20200726.orig/Makefile	2020-07-26 15:14:15.000000000 +0200
-+++ pdfminer.six-20200726/Makefile	2020-09-08 17:23:52.811565562 +0200
-@@ -55,4 +55,4 @@ $(CMAPDST)/to-unicode-Adobe-Korea1.pickl
- 		$(CMAPDST) Adobe-Korea1 $(CMAPSRC)/cid2code_Adobe_Korea1.txt
- 
- test: cmap
-	nosetests
-+	pytest
-Index: pdfminer.six-20200726/setup.py
-===================================================================
--- pdfminer.six-20200726.orig/setup.py	2020-07-26 15:14:15.000000000 +0200
-+++ pdfminer.six-20200726/setup.py	2020-09-08 17:24:16.315707408 +0200
-@@ -18,7 +18,7 @@ setup(
-         'sortedcontainers',
-     ],
-     extras_require={
-        "dev": ["nose", "tox"],
-+        "dev": ["pytest", "tox"],
-         "docs": ["sphinx", "sphinx-argparse"],
-     },
-     description='PDF parser and analyzer',
-Index: pdfminer.six-20200726/tests/test_converter.py
-===================================================================
--- pdfminer.six-20200726.orig/tests/test_converter.py	2020-07-26 15:14:15.000000000 +0200
-+++ pdfminer.six-20200726/tests/test_converter.py	2020-09-08 17:57:40.627803775 +0200
-@@ -1,24 +1,24 @@
-from nose.tools import assert_equal
-+import unittest
- 
- from pdfminer.converter import PDFLayoutAnalyzer
- from pdfminer.layout import LTContainer
- from pdfminer.pdfinterp import PDFGraphicState
- 
- 
-class TestPaintPath():
-+class TestPaintPath(unittest.TestCase):
-     def test_paint_path(self):
-         path = [('m', 6, 7), ('l', 7, 7)]
-         analyzer = self._get_analyzer()
-         analyzer.cur_item = LTContainer([0, 100, 0, 100])
-         analyzer.paint_path(PDFGraphicState(), False, False, False, path)
-        assert_equal(len(analyzer.cur_item._objs), 1)
-+        self.assertEqual(len(analyzer.cur_item._objs), 1)
- 
-     def test_paint_path_mlllh(self):
-         path = [('m', 6, 7), ('l', 7, 7), ('l', 7, 91),  ('l', 6, 91), ('h',)]
-         analyzer = self._get_analyzer()
-         analyzer.cur_item = LTContainer([0, 100, 0, 100])
-         analyzer.paint_path(PDFGraphicState(), False, False, False, path)
-        assert_equal(len(analyzer.cur_item), 1)
-+        self.assertEqual(len(analyzer.cur_item), 1)
- 
-     def test_paint_path_multiple_mlllh(self):
-         """Path from samples/contrib/issue-00369-excel.pdf"""
-@@ -30,7 +30,7 @@ class TestPaintPath():
-         analyzer = self._get_analyzer()
-         analyzer.cur_item = LTContainer([0, 100, 0, 100])
-         analyzer.paint_path(PDFGraphicState(), False, False, False, path)
-        assert_equal(len(analyzer.cur_item._objs), 3)
-+        self.assertEqual(len(analyzer.cur_item._objs), 3)
- 
-     def _get_analyzer(self):
-         analyzer = PDFLayoutAnalyzer(None)
-Index: pdfminer.six-20200726/tests/test_encodingdb.py
-===================================================================
--- pdfminer.six-20200726.orig/tests/test_encodingdb.py	2020-07-26 15:14:15.000000000 +0200
-+++ pdfminer.six-20200726/tests/test_encodingdb.py	2020-09-08 17:58:10.595984640 +0200
-@@ -4,154 +4,154 @@ See: https://github.com/adobe-type-tools
- While not in the specification, lowercase unicode often occurs in pdf's.
- Therefore lowercase unittest variants are added.
- """
-from nose.tools import assert_raises
-+import unittest
- 
- from pdfminer.encodingdb import name2unicode, EncodingDB
- from pdfminer.psparser import PSLiteral
- 
-
-def test_name2unicode_name_in_agl():
-    """The name "Lcommaaccent" has a single component,
-    which is mapped to the string U+013B by AGL"""
-    assert '\u013B' == name2unicode('Lcommaaccent')
-
-
-def test_name2unicode_uni():
-    """The components "Lcommaaccent," "uni013B," and "u013B"
-    all map to the string U+013B"""
-    assert '\u013B' == name2unicode('uni013B')
-
-
-def test_name2unicode_uni_lowercase():
-    """The components "Lcommaaccent," "uni013B," and "u013B"
-    all map to the string U+013B"""
-    assert '\u013B' == name2unicode('uni013b')
-
-
-def test_name2unicode_uni_with_sequence_of_digits():
-    """The name "uni20AC0308" has a single component,
-    which is mapped to the string U+20AC U+0308"""
-    assert '\u20AC\u0308' == name2unicode('uni20AC0308')
-
-
-def test_name2unicode_uni_with_sequence_of_digits_lowercase():
-    """The name "uni20AC0308" has a single component,
-    which is mapped to the string U+20AC U+0308"""
-    assert '\u20AC\u0308' == name2unicode('uni20ac0308')
-
-
-def test_name2unicode_uni_empty_string():
-    """The name "uni20ac" has a single component,
-    which is mapped to a euro-sign.
-
-    According to the specification this should be mapped to an empty string,
-    but we also want to support lowercase hexadecimals"""
-    assert '\u20ac' == name2unicode('uni20ac')
-
-
-def test_name2unicode_uni_empty_string_long():
-    """The name "uniD801DC0C" has a single component,
-    which is mapped to an empty string
-
-    Neither D801 nor DC0C are in the appropriate set.
-    This form cannot be used to map to the character which is
-    expressed as D801 DC0C in UTF-16, specifically U+1040C.
-    This character can be correctly mapped by using the
-    glyph name "u1040C.
-    """
-    assert_raises(KeyError, name2unicode, 'uniD801DC0C')
-
-
-def test_name2unicode_uni_empty_string_long_lowercase():
-    """The name "uniD801DC0C" has a single component,
-    which is mapped to an empty string
-
-    Neither D801 nor DC0C are in the appropriate set.
-    This form cannot be used to map to the character which is
-    expressed as D801 DC0C in UTF-16, specifically U+1040C.
-    This character can be correctly mapped by using the
-    glyph name "u1040C."""
-    assert_raises(KeyError, name2unicode, 'uniD801DC0C')
-
-
-def test_name2unicode_uni_pua():
-    """"Ogoneksmall" and "uniF6FB" both map to the string that corresponds to
-     U+F6FB."""
-    assert '\uF6FB' == name2unicode('uniF6FB')
-
-
-def test_name2unicode_uni_pua_lowercase():
-    """"Ogoneksmall" and "uniF6FB" both map to the string that corresponds to
-     U+F6FB."""
-    assert '\uF6FB' == name2unicode('unif6fb')
-
-
-def test_name2unicode_u_with_4_digits():
-    """The components "Lcommaaccent," "uni013B," and "u013B" all map to the
-    string U+013B"""
-    assert '\u013B' == name2unicode('u013B')
-
-
-def test_name2unicode_u_with_4_digits_lowercase():
-    """The components "Lcommaaccent," "uni013B," and "u013B" all map to the
-    string U+013B"""
-    assert '\u013B' == name2unicode('u013b')
-
-
-def test_name2unicode_u_with_5_digits():
-    """The name "u1040C" has a single component, which is mapped to the string
-     U+1040C"""
-    assert '\U0001040C' == name2unicode('u1040C')
-
-
-def test_name2unicode_u_with_5_digits_lowercase():
-    """The name "u1040C" has a single component, which is mapped to the string
-     U+1040C"""
-    assert '\U0001040C' == name2unicode('u1040c')
-
-
-def test_name2unicode_multiple_components():
-    """The name "Lcommaaccent_uni20AC0308_u1040C.alternate" is mapped to the
-    string U+013B U+20AC U+0308 U+1040C"""
-    assert '\u013B\u20AC\u0308\U0001040C' == \
-           name2unicode('Lcommaaccent_uni20AC0308_u1040C.alternate')
-
-
-def test_name2unicode_multiple_components_lowercase():
-    """The name "Lcommaaccent_uni20AC0308_u1040C.alternate" is mapped to the
-     string U+013B U+20AC U+0308 U+1040C"""
-    assert '\u013B\u20AC\u0308\U0001040C' == \
-           name2unicode('Lcommaaccent_uni20ac0308_u1040c.alternate')
-
-
-def test_name2unicode_foo():
-    """The name 'foo' maps to an empty string,
-    because 'foo' is not in AGL,
-    and because it does not start with a 'u.'"""
-    assert_raises(KeyError, name2unicode, 'foo')
-
-
-def test_name2unicode_notdef():
-    """The name ".notdef" is reduced to an empty string (step 1)
-    and mapped to an empty string (step 3)"""
-    assert_raises(KeyError, name2unicode, '.notdef')
-
-
-def test_name2unicode_pua_ogoneksmall():
-    """"
-    Ogoneksmall" and "uniF6FB" both map to the string
-    that corresponds to U+F6FB."""
-    assert '\uF6FB' == name2unicode('Ogoneksmall')
-
-
-def test_name2unicode_overflow_error():
-    assert_raises(KeyError, name2unicode, '226215240241240240240240')
-
-
-def test_get_encoding_with_invalid_differences():
-    """Invalid differences should be silently ignored
-
-    Regression test for https://github.com/pdfminer/pdfminer.six/issues/385
-    """
-    invalid_differences = [PSLiteral('ubuntu'), PSLiteral('1234')]
-    EncodingDB.get_encoding('StandardEncoding', invalid_differences)
-+class TestEncodingDB(unittest.TestCase):
-+    def test_name2unicode_name_in_agl(self):
-+        """The name "Lcommaaccent" has a single component,
-+        which is mapped to the string U+013B by AGL"""
-+        assert '\u013B' == name2unicode('Lcommaaccent')
-+
-+
-+    def test_name2unicode_uni(self):
-+        """The components "Lcommaaccent," "uni013B," and "u013B"
-+        all map to the string U+013B"""
-+        assert '\u013B' == name2unicode('uni013B')
-+
-+
-+    def test_name2unicode_uni_lowercase(self):
-+        """The components "Lcommaaccent," "uni013B," and "u013B"
-+        all map to the string U+013B"""
-+        assert '\u013B' == name2unicode('uni013b')
-+
-+
-+    def test_name2unicode_uni_with_sequence_of_digits(self):
-+        """The name "uni20AC0308" has a single component,
-+        which is mapped to the string U+20AC U+0308"""
-+        assert '\u20AC\u0308' == name2unicode('uni20AC0308')
-+
-+
-+    def test_name2unicode_uni_with_sequence_of_digits_lowercase(self):
-+        """The name "uni20AC0308" has a single component,
-+        which is mapped to the string U+20AC U+0308"""
-+        assert '\u20AC\u0308' == name2unicode('uni20ac0308')
-+
-+
-+    def test_name2unicode_uni_empty_string(self):
-+        """The name "uni20ac" has a single component,
-+        which is mapped to a euro-sign.
-+
-+        According to the specification this should be mapped to an empty string,
-+        but we also want to support lowercase hexadecimals"""
-+        assert '\u20ac' == name2unicode('uni20ac')
-+
-+
-+    def test_name2unicode_uni_empty_string_long(self):
-+        """The name "uniD801DC0C" has a single component,
-+        which is mapped to an empty string
-+
-+        Neither D801 nor DC0C are in the appropriate set.
-+        This form cannot be used to map to the character which is
-+        expressed as D801 DC0C in UTF-16, specifically U+1040C.
-+        This character can be correctly mapped by using the
-+        glyph name "u1040C.
-+        """
-+        self.assertRaises(KeyError, name2unicode, 'uniD801DC0C')
-+
-+
-+    def test_name2unicode_uni_empty_string_long_lowercase(self):
-+        """The name "uniD801DC0C" has a single component,
-+        which is mapped to an empty string
-+
-+        Neither D801 nor DC0C are in the appropriate set.
-+        This form cannot be used to map to the character which is
-+        expressed as D801 DC0C in UTF-16, specifically U+1040C.
-+        This character can be correctly mapped by using the
-+        glyph name "u1040C."""
-+        self.assertRaises(KeyError, name2unicode, 'uniD801DC0C')
-+
-+
-+    def test_name2unicode_uni_pua(self):
-+        """"Ogoneksmall" and "uniF6FB" both map to the string that corresponds to
-+         U+F6FB."""
-+        assert '\uF6FB' == name2unicode('uniF6FB')
-+
-+
-+    def test_name2unicode_uni_pua_lowercase(self):
-+        """"Ogoneksmall" and "uniF6FB" both map to the string that corresponds to
-+         U+F6FB."""
-+        assert '\uF6FB' == name2unicode('unif6fb')
-+
-+
-+    def test_name2unicode_u_with_4_digits(self):
-+        """The components "Lcommaaccent," "uni013B," and "u013B" all map to the
-+        string U+013B"""
-+        assert '\u013B' == name2unicode('u013B')
-+
-+
-+    def test_name2unicode_u_with_4_digits_lowercase(self):
-+        """The components "Lcommaaccent," "uni013B," and "u013B" all map to the
-+        string U+013B"""
-+        assert '\u013B' == name2unicode('u013b')
-+
-+
-+    def test_name2unicode_u_with_5_digits(self):
-+        """The name "u1040C" has a single component, which is mapped to the string
-+         U+1040C"""
-+        assert '\U0001040C' == name2unicode('u1040C')
-+
-+
-+    def test_name2unicode_u_with_5_digits_lowercase(self):
-+        """The name "u1040C" has a single component, which is mapped to the string
-+         U+1040C"""
-+        assert '\U0001040C' == name2unicode('u1040c')
-+
-+
-+    def test_name2unicode_multiple_components(self):
-+        """The name "Lcommaaccent_uni20AC0308_u1040C.alternate" is mapped to the
-+        string U+013B U+20AC U+0308 U+1040C"""
-+        assert '\u013B\u20AC\u0308\U0001040C' == \
-+               name2unicode('Lcommaaccent_uni20AC0308_u1040C.alternate')
-+
-+
-+    def test_name2unicode_multiple_components_lowercase(self):
-+        """The name "Lcommaaccent_uni20AC0308_u1040C.alternate" is mapped to the
-+         string U+013B U+20AC U+0308 U+1040C"""
-+        assert '\u013B\u20AC\u0308\U0001040C' == \
-+               name2unicode('Lcommaaccent_uni20ac0308_u1040c.alternate')
-+
-+
-+    def test_name2unicode_foo(self):
-+        """The name 'foo' maps to an empty string,
-+        because 'foo' is not in AGL,
-+        and because it does not start with a 'u.'"""
-+        self.assertRaises(KeyError, name2unicode, 'foo')
-+
-+
-+    def test_name2unicode_notdef(self):
-+        """The name ".notdef" is reduced to an empty string (step 1)
-+        and mapped to an empty string (step 3)"""
-+        self.assertRaises(KeyError, name2unicode, '.notdef')
-+
-+
-+    def test_name2unicode_pua_ogoneksmall(self):
-+        """"
-+        Ogoneksmall" and "uniF6FB" both map to the string
-+        that corresponds to U+F6FB."""
-+        assert '\uF6FB' == name2unicode('Ogoneksmall')
-+
-+
-+    def test_name2unicode_overflow_error(self):
-+        self.assertRaises(KeyError, name2unicode, '226215240241240240240240')
-+
-+
-+    def test_get_encoding_with_invalid_differences(self):
-+        """Invalid differences should be silently ignored
-+
-+        Regression test for https://github.com/pdfminer/pdfminer.six/issues/385
-+        """
-+        invalid_differences = [PSLiteral('ubuntu'), PSLiteral('1234')]
-+        EncodingDB.get_encoding('StandardEncoding', invalid_differences)
-Index: pdfminer.six-20200726/tests/test_font_size.py
-===================================================================
--- pdfminer.six-20200726.orig/tests/test_font_size.py	2020-07-26 15:14:15.000000000 +0200
-+++ pdfminer.six-20200726/tests/test_font_size.py	2020-09-08 17:32:43.314767223 +0200
-@@ -1,22 +1,25 @@
-+import unittest
-+
- from helpers import absolute_sample_path
- from pdfminer.high_level import extract_pages
- from pdfminer.layout import LTChar, LTTextBox
- 
- 
-def test_font_size():
-    path = absolute_sample_path('font-size-test.pdf')
-    for page in extract_pages(path):
-        for text_box in page:
-            if isinstance(text_box, LTTextBox):
-                for line in text_box:
-                    possible_number = line.get_text().strip()
-                    if possible_number.isdigit():
-                        expected_size = int(possible_number)
-
-                        for char in line:
-                            if isinstance(char, LTChar):
-                                actual_size = int(round(char.size))
-                                print(char, actual_size, expected_size)
-                                assert expected_size == actual_size
-                    else:
-                        print(repr(line.get_text()))
-+class TestFontSize(unittest.TestCase):
-+    def test_font_size(self):
-+        path = absolute_sample_path('font-size-test.pdf')
-+        for page in extract_pages(path):
-+            for text_box in page:
-+                if isinstance(text_box, LTTextBox):
-+                    for line in text_box:
-+                        possible_number = line.get_text().strip()
-+                        if possible_number.isdigit():
-+                            expected_size = int(possible_number)
-+
-+                            for char in line:
-+                                if isinstance(char, LTChar):
-+                                    actual_size = int(round(char.size))
-+                                    print(char, actual_size, expected_size)
-+                                    assert expected_size == actual_size
-+                        else:
-+                            print(repr(line.get_text()))
-Index: pdfminer.six-20200726/tests/test_pdfdocument.py
-===================================================================
--- pdfminer.six-20200726.orig/tests/test_pdfdocument.py	2020-07-26 15:14:15.000000000 +0200
-+++ pdfminer.six-20200726/tests/test_pdfdocument.py	2020-09-08 18:03:35.153943407 +0200
-@@ -1,4 +1,4 @@
-from nose.tools import raises
-+import unittest
- 
- from helpers import absolute_sample_path
- from pdfminer.pdfdocument import PDFDocument
-@@ -6,11 +6,10 @@ from pdfminer.pdfparser import PDFParser
- from pdfminer.pdftypes import PDFObjectNotFound
- 
- 
-class TestPdfDocument(object):
-+class TestPdfDocument(unittest.TestCase):
- 
-    @raises(PDFObjectNotFound)
-     def test_get_zero_objid_raises_pdfobjectnotfound(self):
-         with open(absolute_sample_path('simple1.pdf'), 'rb') as in_file:
-             parser = PDFParser(in_file)
-             doc = PDFDocument(parser)
-            doc.getobj(0)
-+            self.assertRaises(PDFObjectNotFound, doc.getobj, 0)
-Index: pdfminer.six-20200726/tests/test_pdfencoding.py
-===================================================================
--- pdfminer.six-20200726.orig/tests/test_pdfencoding.py	2020-07-26 15:14:15.000000000 +0200
-+++ pdfminer.six-20200726/tests/test_pdfencoding.py	2020-09-08 17:39:04.149065629 +0200
-@@ -2,7 +2,7 @@
- 
- # -*- coding: utf-8 -*-
- 
-import nose
-+import unittest
- 
- from pdfminer.cmapdb import IdentityCMap, CMap, IdentityCMapByte
- from pdfminer.pdffont import PDFCIDFont
-@@ -10,7 +10,7 @@ from pdfminer.pdftypes import PDFStream
- from pdfminer.psparser import PSLiteral
- 
- 
-class TestPDFEncoding():
-+class TestPDFEncoding(unittest.TestCase):
- 
-     def test_cmapname_onebyteidentityV(self):
-         stream = PDFStream({'CMapName': PSLiteral('OneByteIdentityV')}, '')
-@@ -109,5 +109,3 @@ class TestPDFEncoding():
-         assert isinstance(font.cmap, CMap)
- 
- 
-if __name__ == '__main__':
-    nose.runmodule()
-Index: pdfminer.six-20200726/tests/test_pdffont.py
-===================================================================
--- pdfminer.six-20200726.orig/tests/test_pdffont.py	2020-07-26 15:14:15.000000000 +0200
-+++ pdfminer.six-20200726/tests/test_pdffont.py	2020-09-08 17:40:43.329664206 +0200
-@@ -1,21 +1,22 @@
-from nose.tools import assert_equal, assert_greater
-+import unittest
- 
- from pdfminer.pdffont import PDFCIDFont
- from pdfminer.pdfinterp import PDFResourceManager
- from pdfminer.psparser import PSLiteral
- 
- 
-def test_get_cmap_from_pickle():
-    """Test if cmap file is read from pdfminer/cmap
-
-    Regression test for https://github.com/pdfminer/pdfminer.six/issues/391
-    """
-    cmap_name = 'UniGB-UCS2-H'
-    spec = {'Encoding': PSLiteral(cmap_name)}
-    resource_manager = PDFResourceManager()
-    font = PDFCIDFont(resource_manager, spec)
-
-    cmap = font.get_cmap_from_spec(spec, False)
-
-    assert_equal(cmap.attrs.get('CMapName'), cmap_name)
-    assert_greater(len(cmap.code2cid), 0)
-+class TestPdfFont(unittest.TestCase):
-+    def test_get_cmap_from_pickle(self):
-+        """Test if cmap file is read from pdfminer/cmap
-+
-+        Regression test for https://github.com/pdfminer/pdfminer.six/issues/391
-+        """
-+        cmap_name = 'UniGB-UCS2-H'
-+        spec = {'Encoding': PSLiteral(cmap_name)}
-+        resource_manager = PDFResourceManager()
-+        font = PDFCIDFont(resource_manager, spec)
-+
-+        cmap = font.get_cmap_from_spec(spec, False)
-+
-+        self.assertEqual(cmap.attrs.get('CMapName'), cmap_name)
-+        self.assertGreater(len(cmap.code2cid), 0)
-Index: pdfminer.six-20200726/tests/test_pdfminer_ccitt.py
-===================================================================
--- pdfminer.six-20200726.orig/tests/test_pdfminer_ccitt.py	2020-07-26 15:14:15.000000000 +0200
-+++ pdfminer.six-20200726/tests/test_pdfminer_ccitt.py	2020-09-08 17:49:56.349001777 +0200
-@@ -1,9 +1,9 @@
-from nose.tools import assert_equal
-+import unittest
- 
- from pdfminer.ccitt import CCITTG4Parser
- 
- 
-class TestCCITTG4Parser():
-+class TestCCITTG4Parser(unittest.TestCase):
-     def get_parser(self, bits):
-         parser = CCITTG4Parser(len(bits))
-         parser._curline = [int(c) for c in bits]
-@@ -13,98 +13,98 @@ class TestCCITTG4Parser():
-     def test_b1(self):
-         parser = self.get_parser('00000')
-         parser._do_vertical(0)
-        assert_equal(parser._curpos, 0)
-+        self.assertEqual(parser._curpos, 0)
-         return
- 
-     def test_b2(self):
-         parser = self.get_parser('10000')
-         parser._do_vertical(-1)
-        assert_equal(parser._curpos, 0)
-+        self.assertEqual(parser._curpos, 0)
-         return
- 
-     def test_b3(self):
-         parser = self.get_parser('000111')
-         parser._do_pass()
-        assert_equal(parser._curpos, 3)
-        assert_equal(parser._get_bits(), '111')
-+        self.assertEqual(parser._curpos, 3)
-+        self.assertEqual(parser._get_bits(), '111')
-         return
- 
-     def test_b4(self):
-         parser = self.get_parser('00000')
-         parser._do_vertical(+2)
-        assert_equal(parser._curpos, 2)
-        assert_equal(parser._get_bits(), '11')
-+        self.assertEqual(parser._curpos, 2)
-+        self.assertEqual(parser._get_bits(), '11')
-         return
- 
-     def test_b5(self):
-         parser = self.get_parser('11111111100')
-         parser._do_horizontal(0, 3)
-        assert_equal(parser._curpos, 3)
-+        self.assertEqual(parser._curpos, 3)
-         parser._do_vertical(1)
-        assert_equal(parser._curpos, 10)
-        assert_equal(parser._get_bits(), '0001111111')
-+        self.assertEqual(parser._curpos, 10)
-+        self.assertEqual(parser._get_bits(), '0001111111')
-         return
- 
-     def test_e1(self):
-         parser = self.get_parser('10000')
-         parser._do_vertical(0)
-        assert_equal(parser._curpos, 1)
-+        self.assertEqual(parser._curpos, 1)
-         parser._do_vertical(0)
-        assert_equal(parser._curpos, 5)
-        assert_equal(parser._get_bits(), '10000')
-+        self.assertEqual(parser._curpos, 5)
-+        self.assertEqual(parser._get_bits(), '10000')
-         return
- 
-     def test_e2(self):
-         parser = self.get_parser('10011')
-         parser._do_vertical(0)
-        assert_equal(parser._curpos, 1)
-+        self.assertEqual(parser._curpos, 1)
-         parser._do_vertical(2)
-        assert_equal(parser._curpos, 5)
-        assert_equal(parser._get_bits(), '10000')
-+        self.assertEqual(parser._curpos, 5)
-+        self.assertEqual(parser._get_bits(), '10000')
-         return
- 
-     def test_e3(self):
-         parser = self.get_parser('011111')
-         parser._color = 0
-         parser._do_vertical(0)
-        assert_equal(parser._color, 1)
-        assert_equal(parser._curpos, 1)
-+        self.assertEqual(parser._color, 1)
-+        self.assertEqual(parser._curpos, 1)
-         parser._do_vertical(-2)
-        assert_equal(parser._color, 0)
-        assert_equal(parser._curpos, 4)
-+        self.assertEqual(parser._color, 0)
-+        self.assertEqual(parser._curpos, 4)
-         parser._do_vertical(0)
-        assert_equal(parser._curpos, 6)
-        assert_equal(parser._get_bits(), '011100')
-+        self.assertEqual(parser._curpos, 6)
-+        self.assertEqual(parser._get_bits(), '011100')
-         return
- 
-     def test_e4(self):
-         parser = self.get_parser('10000')
-         parser._do_vertical(0)
-        assert_equal(parser._curpos, 1)
-+        self.assertEqual(parser._curpos, 1)
-         parser._do_vertical(-2)
-        assert_equal(parser._curpos, 3)
-+        self.assertEqual(parser._curpos, 3)
-         parser._do_vertical(0)
-        assert_equal(parser._curpos, 5)
-        assert_equal(parser._get_bits(), '10011')
-+        self.assertEqual(parser._curpos, 5)
-+        self.assertEqual(parser._get_bits(), '10011')
-         return
- 
-     def test_e5(self):
-         parser = self.get_parser('011000')
-         parser._color = 0
-         parser._do_vertical(0)
-        assert_equal(parser._curpos, 1)
-+        self.assertEqual(parser._curpos, 1)
-         parser._do_vertical(3)
-        assert_equal(parser._curpos, 6)
-        assert_equal(parser._get_bits(), '011111')
-+        self.assertEqual(parser._curpos, 6)
-+        self.assertEqual(parser._get_bits(), '011111')
-         return
- 
-     def test_e6(self):
-         parser = self.get_parser('11001')
-         parser._do_pass()
-        assert_equal(parser._curpos, 4)
-+        self.assertEqual(parser._curpos, 4)
-         parser._do_vertical(0)
-        assert_equal(parser._curpos, 5)
-        assert_equal(parser._get_bits(), '11111')
-+        self.assertEqual(parser._curpos, 5)
-+        self.assertEqual(parser._get_bits(), '11111')
-         return
- 
-     def test_e7(self):
-@@ -112,8 +112,8 @@ class TestCCITTG4Parser():
-         parser._curpos = 2
-         parser._color = 1
-         parser._do_horizontal(2, 6)
-        assert_equal(parser._curpos, 10)
-        assert_equal(parser._get_bits(), '1111000000')
-+        self.assertEqual(parser._curpos, 10)
-+        self.assertEqual(parser._get_bits(), '1111000000')
-         return
- 
-     def test_e8(self):
-@@ -121,19 +121,19 @@ class TestCCITTG4Parser():
-         parser._curpos = 1
-         parser._color = 0
-         parser._do_vertical(0)
-        assert_equal(parser._curpos, 2)
-+        self.assertEqual(parser._curpos, 2)
-         parser._do_horizontal(7, 0)
-        assert_equal(parser._curpos, 9)
-        assert_equal(parser._get_bits(), '101111111')
-+        self.assertEqual(parser._curpos, 9)
-+        self.assertEqual(parser._get_bits(), '101111111')
-         return
- 
-     def test_m1(self):
-         parser = self.get_parser('10101')
-         parser._do_pass()
-        assert_equal(parser._curpos, 2)
-+        self.assertEqual(parser._curpos, 2)
-         parser._do_pass()
-        assert_equal(parser._curpos, 4)
-        assert_equal(parser._get_bits(), '1111')
-+        self.assertEqual(parser._curpos, 4)
-+        self.assertEqual(parser._get_bits(), '1111')
-         return
- 
-     def test_m2(self):
-@@ -142,7 +142,7 @@ class TestCCITTG4Parser():
-         parser._do_vertical(-1)
-         parser._do_vertical(1)
-         parser._do_horizontal(1, 1)
-        assert_equal(parser._get_bits(), '011101')
-+        self.assertEqual(parser._get_bits(), '011101')
-         return
- 
-     def test_m3(self):
-@@ -151,5 +151,5 @@ class TestCCITTG4Parser():
-         parser._do_pass()
-         parser._do_vertical(1)
-         parser._do_vertical(1)
-        assert_equal(parser._get_bits(), '00000001')
-+        self.assertEqual(parser._get_bits(), '00000001')
-         return
-Index: pdfminer.six-20200726/tests/test_pdfminer_crypto.py
-===================================================================
--- pdfminer.six-20200726.orig/tests/test_pdfminer_crypto.py	2020-07-26 15:14:15.000000000 +0200
-+++ pdfminer.six-20200726/tests/test_pdfminer_crypto.py	2020-09-08 17:48:43.188560241 +0200
-@@ -1,7 +1,7 @@
- """Test of various compression/encoding modules (previously in doctests)
- """
- import binascii
-from nose.tools import assert_equal
-+import unittest
- 
- from pdfminer.arcfour import Arcfour
- from pdfminer.ascii85 import asciihexdecode, ascii85decode
-@@ -20,49 +20,49 @@ def dehex(b):
-     return binascii.unhexlify(b)
- 
- 
-class TestAscii85():
-+class TestAscii85(unittest.TestCase):
-     def test_ascii85decode(self):
-         """The sample string is taken from:
-         http://en.wikipedia.org/w/index.php?title=Ascii85"""
-        assert_equal(ascii85decode(b'9jqo^BlbD-BleB1DJ+*+F(f,q'),
-+        self.assertEqual(ascii85decode(b'9jqo^BlbD-BleB1DJ+*+F(f,q'),
-                      b'Man is distinguished')
-        assert_equal(ascii85decode(b'E,9)oF*2M7/c~>'),
-+        self.assertEqual(ascii85decode(b'E,9)oF*2M7/c~>'),
-                      b'pleasure.')
- 
-     def test_asciihexdecode(self):
-        assert_equal(asciihexdecode(b'61 62 2e6364   65'),
-+        self.assertEqual(asciihexdecode(b'61 62 2e6364   65'),
-                      b'ab.cde')
-        assert_equal(asciihexdecode(b'61 62 2e6364   657>'),
-+        self.assertEqual(asciihexdecode(b'61 62 2e6364   657>'),
-                      b'ab.cdep')
-        assert_equal(asciihexdecode(b'7>'),
-+        self.assertEqual(asciihexdecode(b'7>'),
-                      b'p')
- 
- 
-class TestArcfour():
-+class TestArcfour(unittest.TestCase):
-     def test(self):
-        assert_equal(hex(Arcfour(b'Key').process(b'Plaintext')),
-+        self.assertEqual(hex(Arcfour(b'Key').process(b'Plaintext')),
-                      b'bbf316e8d940af0ad3')
-        assert_equal(hex(Arcfour(b'Wiki').process(b'pedia')),
-+        self.assertEqual(hex(Arcfour(b'Wiki').process(b'pedia')),
-                      b'1021bf0420')
-        assert_equal(hex(Arcfour(b'Secret').process(b'Attack at dawn')),
-+        self.assertEqual(hex(Arcfour(b'Secret').process(b'Attack at dawn')),
-                      b'45a01f645fc35b383552544b9bf5')
- 
- 
-class TestLzw():
-+class TestLzw(unittest.TestCase):
-     def test_lzwdecode(self):
-        assert_equal(lzwdecode(b'\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01'),
-+        self.assertEqual(lzwdecode(b'\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01'),
-                      b'\x2d\x2d\x2d\x2d\x2d\x41\x2d\x2d\x2d\x42')
- 
- 
-class TestRunlength():
-+class TestRunlength(unittest.TestCase):
-     def test_rldecode(self):
-        assert_equal(rldecode(b'\x05123456\xfa7\x04abcde\x80junk'),
-+        self.assertEqual(rldecode(b'\x05123456\xfa7\x04abcde\x80junk'),
-                      b'1234567777777abcde')
- 
- 
-class TestRijndaelEncryptor():
-+class TestRijndaelEncryptor(unittest.TestCase):
-     def test_RijndaelEncryptor(self):
-         key = dehex(b'00010203050607080a0b0c0d0f101112')
-         plaintext = dehex(b'506812a45f08c889b97f5980038b8359')
-        assert_equal(hex(RijndaelEncryptor(key, 128).encrypt(plaintext)),
-+        self.assertEqual(hex(RijndaelEncryptor(key, 128).encrypt(plaintext)),
-                      b'd8f532538289ef7d06b506a4fd5be9c9')
-Index: pdfminer.six-20200726/tests/test_pdfminer_psparser.py
-===================================================================
--- pdfminer.six-20200726.orig/tests/test_pdfminer_psparser.py	2020-07-26 15:14:15.000000000 +0200
-+++ pdfminer.six-20200726/tests/test_pdfminer_psparser.py	2020-09-08 17:47:06.423976246 +0200
-@@ -1,13 +1,13 @@
- import logging
- 
-from nose.tools import assert_equal
-+import unittest
- 
- from pdfminer.psparser import KWD, LIT, PSBaseParser, PSStackParser, PSEOF
- 
- logger = logging.getLogger(__name__)
- 
- 
-class TestPSBaseParser:
-+class TestPSBaseParser(unittest.TestCase):
-     """Simplistic Test cases"""
- 
-     TESTDATA = br'''%!PS
-@@ -92,11 +92,11 @@ func/a/b{(c)do*}def
-     def test_1(self):
-         tokens = self.get_tokens(self.TESTDATA)
-         logger.info(tokens)
-        assert_equal(tokens, self.TOKENS)
-+        self.assertEqual(tokens, self.TOKENS)
-         return
- 
-     def test_2(self):
-         objs = self.get_objects(self.TESTDATA)
-         logger.info(objs)
-        assert_equal(objs, self.OBJS)
-+        self.assertEqual(objs, self.OBJS)
-         return
-Index: pdfminer.six-20200726/tests/test_tools_dumppdf.py
-===================================================================
--- pdfminer.six-20200726.orig/tests/test_tools_dumppdf.py	2020-07-26 15:14:15.000000000 +0200
-+++ pdfminer.six-20200726/tests/test_tools_dumppdf.py	2020-09-08 17:45:02.647229236 +0200
-@@ -1,3 +1,5 @@
-+import unittest
-+
- import warnings
- from tempfile import NamedTemporaryFile
- 
-@@ -17,7 +19,7 @@ def run(filename, options=None):
-         dumppdf.main(s.split(' ')[1:])
- 
- 
-class TestDumpPDF():
-+class TestDumpPDF(unittest.TestCase):
-     def test_simple1(self):
-         """dumppdf.py simple1.pdf raises a warning because it has no xref"""
-         with warnings.catch_warnings(record=True) as ws:
-Index: pdfminer.six-20200726/tests/test_tools_pdf2txt.py
-===================================================================
--- pdfminer.six-20200726.orig/tests/test_tools_pdf2txt.py	2020-07-26 15:14:15.000000000 +0200
-+++ pdfminer.six-20200726/tests/test_tools_pdf2txt.py	2020-09-08 17:44:20.946977562 +0200
-@@ -1,3 +1,5 @@
-+import unittest
-+
- import os
- from shutil import rmtree
- from tempfile import NamedTemporaryFile, mkdtemp
-@@ -17,7 +19,7 @@ def run(sample_path, options=None):
-         pdf2txt.main(s.split(' ')[1:])
- 
- 
-class TestPdf2Txt():
-+class TestPdf2Txt(unittest.TestCase):
-     def test_jo(self):
-         run('jo.pdf')
- 
-@@ -104,7 +106,7 @@ class TestPdf2Txt():
-         run('encryption/rc4-128.pdf', '-P foo')
- 
- 
-class TestDumpImages:
-+class TestDumpImages(unittest.TestCase):
- 
-     @staticmethod
-     def extract_images(input_file):
-Index: pdfminer.six-20200726/tests/test_utils.py
-===================================================================
--- pdfminer.six-20200726.orig/tests/test_utils.py	2020-07-26 15:14:15.000000000 +0200
-+++ pdfminer.six-20200726/tests/test_utils.py	2020-09-08 17:43:28.830663039 +0200
-@@ -1,37 +1,37 @@
-from nose.tools import assert_equal
-+import unittest
- 
- from pdfminer.layout import LTComponent
- from pdfminer.utils import Plane, shorten_str
- 
- 
-class TestPlane:
-+class TestPlane(unittest.TestCase):
-     def test_find_nothing_in_empty_bbox(self):
-         plane, _ = self.given_plane_with_one_object()
-         result = list(plane.find((50, 50, 100, 100)))
-        assert_equal(result, [])
-+        self.assertEqual(result, [])
- 
-     def test_find_nothing_after_removing(self):
-         plane, obj = self.given_plane_with_one_object()
-         plane.remove(obj)
-         result = list(plane.find((0, 0, 100, 100)))
-        assert_equal(result, [])
-+        self.assertEqual(result, [])
- 
-     def test_find_object_in_whole_plane(self):
-         plane, obj = self.given_plane_with_one_object()
-         result = list(plane.find((0, 0, 100, 100)))
-        assert_equal(result, [obj])
-+        self.assertEqual(result, [obj])
- 
-     def test_find_if_object_is_smaller_than_gridsize(self):
-         plane, obj = self.given_plane_with_one_object(object_size=1,
-                                                       gridsize=100)
-         result = list(plane.find((0, 0, 100, 100)))
-        assert_equal(result, [obj])
-+        self.assertEqual(result, [obj])
- 
-     def test_find_object_if_much_larger_than_gridsize(self):
-         plane, obj = self.given_plane_with_one_object(object_size=100,
-                                                       gridsize=10)
-         result = list(plane.find((0, 0, 100, 100)))
-        assert_equal(result, [obj])
-+        self.assertEqual(result, [obj])
- 
-     @staticmethod
-     def given_plane_with_one_object(object_size=50, gridsize=50):
-@@ -42,14 +42,14 @@ class TestPlane:
-         return plane, obj
- 
- 
-class TestFunctions(object):
-+class TestFunctions(unittest.TestCase):
-     def test_shorten_str(self):
-         s = shorten_str('Hello there World', 15)
-        assert_equal(s, 'Hello ... World')
-+        self.assertEqual(s, 'Hello ... World')
- 
-     def test_shorten_short_str_is_same(self):
-         s = 'Hello World'
-        assert_equal(s, shorten_str(s, 50))
-+        self.assertEqual(s, shorten_str(s, 50))
- 
-     def test_shorten_to_really_short(self):
-        assert_equal('Hello', shorten_str('Hello World', 5))
-+        self.assertEqual('Hello', shorten_str('Hello World', 5))
-Index: pdfminer.six-20200726/tox.ini
-===================================================================
--- pdfminer.six-20200726.orig/tox.ini	2020-07-26 15:14:15.000000000 +0200
-+++ pdfminer.six-20200726/tox.ini	2020-09-08 17:27:56.365035449 +0200
-@@ -13,6 +13,6 @@ whitelist_externals =
-     flake8
- commands =
-     flake8 pdfminer/ tools/ tests/ --count --statistics
-    nosetests --nologcapture
-+    pytest
-     python -m sphinx -b html docs/source docs/build/html
-     python -m sphinx -b doctest docs/source docs/build/doctest
--- a/python-pdfminer.six.changes
+++ b/python-pdfminer.six.changes
@@ -1,3 +1,22 @@
+-------------------------------------------------------------------
+Mon Dec 11 17:24:21 UTC 2023 - Jonathan Papineau <jonathan@jontech.app>
+
+- Update to 20221105
+  - Option to disable boxes flow layout analysis when using pdf2txt 
+  - Add support for PDF 2.0 (ISO 32000-2) AES-256 encryption
+  - Support for Paeth PNG filter compression (predictor value = 4)
+  - Type annotations
+  - Export type annotations from pypi package per PEP561
+  - Support for identity cmap's
+  - Add support for PDF page labels
+  - Installation of Pillow as an optional extra dependency
+  - Exporting images without any specific encoding 
+  - Output converter for the hOCR format
+  - Font name aliases for Arial, Courier New and Times New Roman
+  - Documentation on why special characters can sometimes not be extracted
+- Remove patch python-pdfminer.six-remove-nose.patch
+- Update dependencies
+
 -------------------------------------------------------------------
 Fri Aug 25 14:07:07 UTC 2023 - ecsos <ecsos@opensuse.org>

--- a/python-pdfminer.six.spec
+++ b/python-pdfminer.six.spec
@@ -18,25 +18,22 @@

 %{?sle15_python_module_pythons}
 Name:           python-pdfminer.six
-Version:        20200726
+Version:        20221105
 Release:        0
 Summary:        PDF parser and analyzer
 License:        MIT
 URL:            https://github.com/pdfminer/pdfminer.six
 Source:         https://github.com/pdfminer/pdfminer.six/archive/%{version}.tar.gz#/pdfminer.six-%{version}.tar.gz
 # https://github.com/pdfminer/pdfminer.six/pull/489
-Patch0:         python-pdfminer.six-remove-nose.patch
-Patch1:         import-from-non-pythonpath-files.patch
-BuildRequires:  %{python_module chardet}
-BuildRequires:  %{python_module cryptography}
+Patch0:         import-from-non-pythonpath-files.patch
+BuildRequires:  %{python_module charset-normalizer >= 2.0.0}
+BuildRequires:  %{python_module cryptography >= 36.0.0}
 BuildRequires:  %{python_module pytest}
 BuildRequires:  %{python_module setuptools}
-BuildRequires:  %{python_module sortedcontainers}
 BuildRequires:  fdupes
 BuildRequires:  python-rpm-macros
-Requires:       python-chardet
-Requires:       python-cryptography
-Requires:       python-sortedcontainers
+Requires:       python-charset-normalizer >= 2.0.0
+Requires:       python-cryptography >= 36.0.0
 Requires(post): update-alternatives
 Requires(postun):update-alternatives
 Provides:       python-pdfminer3k = %{version}
@@ -56,6 +53,7 @@ the exact location, font or color of the text.
 %autopatch -p1
 sed -i -e '/^#!\//, 1d' pdfminer/psparser.py
 sed -i '1i #!%{_bindir}/python3' tools/dumppdf.py tools/pdf2txt.py
+sed -i "s/__VERSION__/%{version}/g" pdfminer/__init__.py

 %build
 %python_build