diff --git a/libxml2212-tests.patch b/libxml2212-tests.patch new file mode 100644 index 0000000..baa342b --- /dev/null +++ b/libxml2212-tests.patch @@ -0,0 +1,41 @@ +From 2a6770566ab57d601abc7c2f49a8051b9d97b64c Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Tue, 31 Oct 2023 12:36:02 +0100 +Subject: [PATCH] Make Unicode recovery test work with libxml2 2.12 (GH-383) + +When encountering encoding errors, libxml2 no longer switches to ISO-8859-1 since version 2.12. +--- + src/lxml/parser.pxi | 2 +- + src/lxml/tests/test_unicode.py | 6 +++++- + 2 files changed, 6 insertions(+), 2 deletions(-) + +diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi +index 4b7b52065..8ceec7d25 100644 +--- a/src/lxml/parser.pxi ++++ b/src/lxml/parser.pxi +@@ -693,7 +693,7 @@ cdef xmlDoc* _handleParseResult(_ParserContext context, + # An encoding error occurred and libxml2 switched from UTF-8 + # input to (undecoded) Latin-1, at some arbitrary point in the + # document. Better raise an error than allowing for a broken +- # tree with mixed encodings. ++ # tree with mixed encodings. This is fixed in libxml2 2.12. + well_formed = 0 + elif recover or (c_ctxt.wellFormed and + c_ctxt.lastError.level < xmlerror.XML_ERR_ERROR): +diff --git a/src/lxml/tests/test_unicode.py b/src/lxml/tests/test_unicode.py +index 6d4ee9c0f..3636539b2 100644 +--- a/src/lxml/tests/test_unicode.py ++++ b/src/lxml/tests/test_unicode.py +@@ -167,7 +167,11 @@ def test_illegal_utf8(self): + def test_illegal_utf8_recover(self): + data = _bytes('\x80\x80\x80', encoding='iso8859-1') + parser = etree.XMLParser(recover=True) +- self.assertRaises(etree.XMLSyntaxError, etree.fromstring, data, parser) ++ if etree.LIBXML_VERSION >= (2, 12, 0): ++ tree = etree.fromstring(data, parser) ++ self.assertEqual('\ufffd\ufffd\ufffd', tree.text) ++ else: ++ self.assertRaises(etree.XMLSyntaxError, etree.fromstring, data, parser) + + def _test_encoding(self, encoding, xml_encoding_name=None): + foo = """\n""" % ( diff --git a/python-lxml.changes b/python-lxml.changes index 9e545f8..f5f7de8 100644 --- a/python-lxml.changes +++ b/python-lxml.changes @@ -1,3 +1,8 @@ +------------------------------------------------------------------- +Mon Nov 27 12:36:51 UTC 2023 - Markéta Machová + +- Add libxml2212-tests.patch to fix tests with new libxml2 + ------------------------------------------------------------------- Wed Sep 6 20:04:29 UTC 2023 - Dirk Müller diff --git a/python-lxml.spec b/python-lxml.spec index 701e648..00bb493 100644 --- a/python-lxml.spec +++ b/python-lxml.spec @@ -42,6 +42,9 @@ Patch3: ISO-Schematron-schema-optional.patch # PATCH-FIX-UPSTREAM remove-ISO-Schematron-schema.patch gl#fedora/legal/fedora-license-data/-#154 mcepl@suse.com # Actually remove the schema Patch4: remove-ISO-Schematron-schema.patch +# PATCH-FIX-UPSTREAM libxml2212-tests.patch https://github.com/lxml/lxml/pull/383 +# Make Unicode recovery test work with upcoming libxml2 2.12 +Patch5: libxml2212-tests.patch BuildRequires: %{python_module Cython >= 0.29.7 with %python-Cython < 3} BuildRequires: %{python_module base} BuildRequires: %{python_module cssselect >= 0.9.1}