Sync from SUSE:SLFO:Main python-xmltodict revision 18a79c0db02678e1e0113fd9ed7d26ad

2025-05-17 16:14:55 +02:00
6 changed files with 35 additions and 391 deletions
--- a/CVE-2025-9375.patch
+++ b/CVE-2025-9375.patch
@@ -1,222 +0,0 @@
-From 6df43e8d0133f700c328e3da4028315fb9f873da Mon Sep 17 00:00:00 2001
-From: Martin Blech <78768+martinblech@users.noreply.github.com>
-Date: Thu, 4 Sep 2025 17:25:39 -0700
-Subject: [PATCH 2/2] Merge the following three upstream commits as a combined
- fix for CVE-2025-9375
-
- Prevent XML injection: reject '<'/'>' in element/attr names (incl. @xmlns) (ecd456a)
- Enhance unparse() XML name validation with stricter rules and tests (f98c90f)
- Harden XML name validation: reject quotes and '='; add tests (8860b0e)
---
- tests/test_dicttoxml.py | 106 ++++++++++++++++++++++++++++++++++++++++
- xmltodict.py            |  56 ++++++++++++++++++++-
- 2 files changed, 160 insertions(+), 2 deletions(-)
-
-diff --git a/tests/test_dicttoxml.py b/tests/test_dicttoxml.py
-index 7fc2171..9830e2a 100644
--- a/tests/test_dicttoxml.py
-+++ b/tests/test_dicttoxml.py
-@@ -213,3 +213,109 @@ xmlns:b="http://b.com/"><x a:attr="val">1</x><a:y>2</a:y><b:z>3</b:z></root>'''
-         expected_xml = '<?xml version="1.0" encoding="utf-8"?>\n<x>false</x>'
-         xml = unparse(dict(x=False))
-         self.assertEqual(xml, expected_xml)
-+
-+    def test_rejects_tag_name_with_angle_brackets(self):
-+        # Minimal guard: disallow '<' or '>' to prevent breaking tag context
-+        with self.assertRaises(ValueError):
-+            unparse({"m><tag>content</tag": "unsafe"}, full_document=False)
-+
-+    def test_rejects_attribute_name_with_angle_brackets(self):
-+        # Now we expect bad attribute names to be rejected
-+        with self.assertRaises(ValueError):
-+            unparse(
-+                {"a": {"@m><tag>content</tag": "unsafe", "#text": "x"}},
-+                full_document=False,
-+            )
-+
-+    def test_rejects_malicious_xmlns_prefix(self):
-+        # xmlns prefixes go under @xmlns mapping; reject angle brackets in prefix
-+        with self.assertRaises(ValueError):
-+            unparse(
-+                {
-+                    "a": {
-+                        "@xmlns": {"m><bad": "http://example.com/"},
-+                        "#text": "x",
-+                    }
-+                },
-+                full_document=False,
-+            )
-+
-+    def test_attribute_values_with_angle_brackets_are_escaped(self):
-+        # Attribute values should be escaped by XMLGenerator
-+        xml = unparse({"a": {"@attr": "1<middle>2", "#text": "x"}}, full_document=False)
-+        # The generated XML should contain escaped '<' and '>' within the attribute value
-+        self.assertIn('attr="1&lt;middle&gt;2"', xml)
-+
-+    def test_rejects_tag_name_starting_with_question(self):
-+        with self.assertRaises(ValueError):
-+            unparse({"?pi": "data"}, full_document=False)
-+
-+    def test_rejects_tag_name_starting_with_bang(self):
-+        with self.assertRaises(ValueError):
-+            unparse({"!decl": "data"}, full_document=False)
-+
-+    def test_rejects_attribute_name_starting_with_question(self):
-+        with self.assertRaises(ValueError):
-+            unparse({"a": {"@?weird": "x"}}, full_document=False)
-+
-+    def test_rejects_attribute_name_starting_with_bang(self):
-+        with self.assertRaises(ValueError):
-+            unparse({"a": {"@!weird": "x"}}, full_document=False)
-+
-+    def test_rejects_xmlns_prefix_starting_with_question_or_bang(self):
-+        with self.assertRaises(ValueError):
-+            unparse({"a": {"@xmlns": {"?p": "http://e/"}}}, full_document=False)
-+        with self.assertRaises(ValueError):
-+            unparse({"a": {"@xmlns": {"!p": "http://e/"}}}, full_document=False)
-+
-+    def test_rejects_non_string_names(self):
-+        class Weird:
-+            def __str__(self):
-+                return "bad>name"
-+
-+        # Non-string element key
-+        with self.assertRaises(ValueError):
-+            unparse({Weird(): "x"}, full_document=False)
-+        # Non-string attribute key
-+        with self.assertRaises(ValueError):
-+            unparse({"a": {Weird(): "x"}}, full_document=False)
-+
-+    def test_rejects_tag_name_with_slash(self):
-+        with self.assertRaises(ValueError):
-+            unparse({"bad/name": "x"}, full_document=False)
-+
-+    def test_rejects_tag_name_with_whitespace(self):
-+        for name in ["bad name", "bad\tname", "bad\nname"]:
-+            with self.assertRaises(ValueError):
-+                unparse({name: "x"}, full_document=False)
-+
-+    def test_rejects_attribute_name_with_slash(self):
-+        with self.assertRaises(ValueError):
-+            unparse({"a": {"@bad/name": "x"}}, full_document=False)
-+
-+    def test_rejects_attribute_name_with_whitespace(self):
-+        for name in ["@bad name", "@bad\tname", "@bad\nname"]:
-+            with self.assertRaises(ValueError):
-+                unparse({"a": {name: "x"}}, full_document=False)
-+
-+    def test_rejects_xmlns_prefix_with_slash_or_whitespace(self):
-+        # Slash
-+        with self.assertRaises(ValueError):
-+            unparse({"a": {"@xmlns": {"bad/prefix": "http://e/"}}}, full_document=False)
-+        # Whitespace
-+        with self.assertRaises(ValueError):
-+            unparse({"a": {"@xmlns": {"bad prefix": "http://e/"}}}, full_document=False)
-+
-+    def test_rejects_names_with_quotes_and_equals(self):
-+        # Element names
-+        for name in ['a"b', "a'b", "a=b"]:
-+            with self.assertRaises(ValueError):
-+                unparse({name: "x"}, full_document=False)
-+        # Attribute names
-+        for name in ['@a"b', "@a'b", "@a=b"]:
-+            with self.assertRaises(ValueError):
-+                unparse({"a": {name: "x"}}, full_document=False)
-+        # xmlns prefixes
-+        for prefix in ['a"b', "a'b", "a=b"]:
-+            with self.assertRaises(ValueError):
-+                unparse({"a": {"@xmlns": {prefix: "http://e/"}}}, full_document=False)
-diff --git a/xmltodict.py b/xmltodict.py
-index a0ba0de..67c248a 100755
--- a/xmltodict.py
-+++ b/xmltodict.py
-@@ -369,7 +369,54 @@ def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
-     return handler.item
- 
- 
-+def _has_angle_brackets(value):
-+    """Return True if value (a str) contains '<' or '>'.
-+
-+    Non-string values return False. Uses fast substring checks implemented in C.
-+    """
-+    return isinstance(value, str) and ("<" in value or ">" in value)
-+
-+
-+def _has_invalid_name_chars(value):
-+    """Return True if value (a str) contains any disallowed name characters.
-+
-+    Disallowed: '<', '>', '/', or any whitespace character.
-+    Non-string values return False.
-+    """
-+    if not isinstance(value, str):
-+        return False
-+    if "<" in value or ">" in value or "/" in value:
-+        return True
-+    # Check for any whitespace (spaces, tabs, newlines, etc.)
-+    return any(ch.isspace() for ch in value)
-+
-+
-+def _validate_name(value, kind):
-+    """Validate an element/attribute name for XML safety.
-+
-+    Raises ValueError with a specific reason when invalid.
-+
-+    kind: 'element' or 'attribute' (used in error messages)
-+    """
-+    if not isinstance(value, str):
-+        raise ValueError(f"{kind} name must be a string")
-+    if value.startswith("?") or value.startswith("!"):
-+        raise ValueError(f'Invalid {kind} name: cannot start with "?" or "!"')
-+    if "<" in value or ">" in value:
-+        raise ValueError(f'Invalid {kind} name: "<" or ">" not allowed')
-+    if "/" in value:
-+        raise ValueError(f'Invalid {kind} name: "/" not allowed')
-+    if '"' in value or "'" in value:
-+        raise ValueError(f"Invalid {kind} name: quotes not allowed")
-+    if "=" in value:
-+        raise ValueError(f'Invalid {kind} name: "=" not allowed')
-+    if any(ch.isspace() for ch in value):
-+        raise ValueError(f"Invalid {kind} name: whitespace not allowed")
-+
-+
- def _process_namespace(name, namespaces, ns_sep=':', attr_prefix='@'):
-+    if not isinstance(name, str):
-+        return name
-     if not namespaces:
-         return name
-     try:
-@@ -402,6 +449,8 @@ def _emit(key, value, content_handler,
-         if result is None:
-             return
-         key, value = result
-+    # Minimal validation to avoid breaking out of tag context
-+    _validate_name(key, "element")
-     if not hasattr(value, '__iter__') or isinstance(value, (str, dict)):
-         value = [value]
-     for index, v in enumerate(value):
-@@ -425,17 +474,20 @@ def _emit(key, value, content_handler,
-             if ik == cdata_key:
-                 cdata = iv
-                 continue
-            if ik.startswith(attr_prefix):
-+            if isinstance(ik, str) and ik.startswith(attr_prefix):
-                 ik = _process_namespace(ik, namespaces, namespace_separator,
-                                         attr_prefix)
-                 if ik == '@xmlns' and isinstance(iv, dict):
-                     for k, v in iv.items():
-+                        _validate_name(k, "attribute")
-                         attr = 'xmlns{}'.format(f':{k}' if k else '')
-                         attrs[attr] = str(v)
-                     continue
-                 if not isinstance(iv, str):
-                     iv = str(iv)
-                attrs[ik[len(attr_prefix):]] = iv
-+                attr_name = ik[len(attr_prefix) :]
-+                _validate_name(attr_name, "attribute")
-+                attrs[attr_name] = iv
-                 continue
-             children.append((ik, iv))
-         if pretty:
-- 
-2.51.0
-
--- a/python-xmltodict.changes
+++ b/python-xmltodict.changes
@@ -1,9 +1,29 @@
 -------------------------------------------------------------------
-Tue Sep  9 08:21:40 UTC 2025 - John Paul Adrian Glaubitz <adrian.glaubitz@suse.com>
+Tue Oct 29 16:11:13 UTC 2024 - Martin Hauke <mardnh@gmx.de>

- Cherry-pick security-fix-prereqs.patch to allow backport of CVE fix
- Cherry-pick CVE-2025-9375.patch to fix multiple XML Injection
-  vulnerabilities in XML parser (bsc#1249036, CVE-2025-9375)
+- Update to version 0.14.2
+  * Revert "Ensure significant whitespace is not trimmed"
+    This changed was backwards incompatible and caused downstream
+    issues.
+- Update to version 0.14.1
+  * Drop support for Python older than 3.6
+  * Additional ruff/Pyflakes/codespell fixes.
+- Update to version 0.14.0
+  * Drop old Python 2 support leftover code and apply several RUFF
+    code health fixes.
+  * Add Python 3.11, 3.12 and 3.13 support and tests.
+  * Remove defusedexpat import.
+  * Replace deprecated BadZipfile with BadZipFile.
+  * Support indent using integer format,
+    enable python -m unittest tests/*.py.
+  * Ensure significant whitespace is not trimmed
+  * added conda installation command
+  * fix attributes not appearing in streaming mode
+
+-------------------------------------------------------------------
+Mon Feb 12 15:02:30 UTC 2024 - Matej Cepl <mcepl@cepl.eu>
+
+- Clean up the SPEC file.

 -------------------------------------------------------------------
 Tue May  9 13:41:52 UTC 2023 - Johannes Kastl <kastl@b1-systems.de>
--- a/python-xmltodict.spec
+++ b/python-xmltodict.spec
@@ -1,7 +1,7 @@
 #
 # spec file for package python-xmltodict
 #
-# Copyright (c) 2023 SUSE LLC
+# Copyright (c) 2024 SUSE LLC
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -18,22 +18,22 @@

 %{?sle15_python_module_pythons}
 Name:           python-xmltodict
-Version:        0.13.0
+Version:        0.14.2
 Release:        0
 Summary:        Module to make XML working resemble JSON
 License:        MIT
 URL:            https://github.com/martinblech/xmltodict
 Source:         https://files.pythonhosted.org/packages/source/x/xmltodict/xmltodict-%{version}.tar.gz
 Patch0:         skip-tests-expat-245.patch
-Patch1:         security-fix-prereqs.patch
-Patch2:         CVE-2025-9375.patch
+BuildRequires:  %{python_module pip}
+BuildRequires:  %{python_module pytest}
 BuildRequires:  %{python_module setuptools}
+BuildRequires:  %{python_module wheel}
 BuildRequires:  %{python_module xml}
 BuildRequires:  fdupes
 BuildRequires:  python-rpm-macros
 Requires:       python-xml
 BuildArch:      noarch
-BuildRequires:  %{python_module pytest}
 %python_subpackages

 %description
@@ -47,11 +47,11 @@ sed -i '1{\@^#!%{_bindir}/env python@d}' xmltodict.py

 %build
 export LANG=en_US.UTF-8
-%python_build
+%pyproject_wheel

 %install
 export LANG=en_US.UTF-8
-%python_install
+%pyproject_install
 %python_expand %fdupes %{buildroot}%{$python_sitelib}

 %check
@@ -63,6 +63,6 @@ export LANG=en_US.UTF-8
 %doc README.md
 %{python_sitelib}/xmltodict.py*
 %pycache_only %{python_sitelib}/__pycache__/xmltodict.*.py*
-%{python_sitelib}/xmltodict-%{version}-py*.egg-info
+%{python_sitelib}/xmltodict-%{version}*-info

 %changelog
--- a/security-fix-prereqs.patch
+++ b/security-fix-prereqs.patch
@@ -1,154 +0,0 @@
-From 239ef35e545ef6333c0b5dff6484659ffd62c9b0 Mon Sep 17 00:00:00 2001
-From: Dimitri Papadopoulos
- <3234522+DimitriPapadopoulos@users.noreply.github.com>
-Date: Sun, 17 Jul 2022 09:28:21 +0200
-Subject: [PATCH 1/2] Merge the following four upstream commits required for
- CVE-2025-9375
-
- Merge `isinstance` calls (9c3ec3c)
- Apply ruff/pyupgrade rule UP031 (d4a50f5)
- Apply ruff/pyupgrade rule UP032 (3df6d23)
- Get rid of Python 2 basestring and unicode (#346) (5b1b511)
---
- ez_setup.py             |  7 +++----
- tests/test_xmltodict.py |  4 ++--
- xmltodict.py            | 39 ++++++++++++---------------------------
- 3 files changed, 17 insertions(+), 33 deletions(-)
-
-diff --git a/ez_setup.py b/ez_setup.py
-index 800c31e..b5cc352 100644
--- a/ez_setup.py
-+++ b/ez_setup.py
-@@ -131,7 +131,7 @@ def archive_context(filename):
- 
- def _do_download(version, download_base, to_dir, download_delay):
-     """Download Setuptools."""
-    py_desig = 'py{sys.version_info[0]}.{sys.version_info[1]}'.format(sys=sys)
-+    py_desig = f'py{sys.version_info[0]}.{sys.version_info[1]}'
-     tp = 'setuptools-{version}-{py_desig}.egg'
-     egg = os.path.join(to_dir, tp.format(**locals()))
-     if not os.path.exists(egg):
-@@ -245,8 +245,7 @@ def download_file_powershell(url, target):
-     ps_cmd = (
-         "[System.Net.WebRequest]::DefaultWebProxy.Credentials = "
-         "[System.Net.CredentialCache]::DefaultCredentials; "
-        '(new-object System.Net.WebClient).DownloadFile("%(url)s", "%(target)s")'
-        % locals()
-+        '(new-object System.Net.WebClient).DownloadFile("{url}", "{target}")'.format(**locals())
-     )
-     cmd = [
-         'powershell',
-@@ -346,7 +345,7 @@ def download_setuptools(
-     """
-     # making sure we use the absolute path
-     to_dir = os.path.abspath(to_dir)
-    zip_name = "setuptools-%s.zip" % version
-+    zip_name = f"setuptools-{version}.zip"
-     url = download_base + zip_name
-     saveto = os.path.join(to_dir, zip_name)
-     if not os.path.exists(saveto):  # Avoid repeated downloads
-diff --git a/tests/test_xmltodict.py b/tests/test_xmltodict.py
-index 04137f9..aa7a1a4 100644
--- a/tests/test_xmltodict.py
-+++ b/tests/test_xmltodict.py
-@@ -168,14 +168,14 @@ class XMLToDictTestCase(unittest.TestCase):
-         except NameError:
-             value = chr(39321)
-         self.assertEqual({'a': value},
-                         parse('<a>%s</a>' % value))
-+                         parse(f'<a>{value}</a>'))
- 
-     def test_encoded_string(self):
-         try:
-             value = unichr(39321)
-         except NameError:
-             value = chr(39321)
-        xml = '<a>%s</a>' % value
-+        xml = f'<a>{value}</a>'
-         self.assertEqual(parse(xml),
-                          parse(xml.encode('utf-8')))
- 
-diff --git a/xmltodict.py b/xmltodict.py
-index ca760aa..a0ba0de 100755
--- a/xmltodict.py
-+++ b/xmltodict.py
-@@ -22,15 +22,6 @@ if tuple(map(int, platform.python_version_tuple()[:2])) < (3, 7):
- 
- from inspect import isgenerator
- 
-try:  # pragma no cover
-    _basestring = basestring
-except NameError:  # pragma no cover
-    _basestring = str
-try:  # pragma no cover
-    _unicode = unicode
-except NameError:  # pragma no cover
-    _unicode = str
-
- __author__ = 'Martin Blech'
- __version__ = '0.13.0'
- __license__ = 'MIT'
-@@ -335,9 +326,8 @@ def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
-     """
-     handler = _DictSAXHandler(namespace_separator=namespace_separator,
-                               **kwargs)
-    if isinstance(xml_input, _unicode):
-        if not encoding:
-            encoding = 'utf-8'
-+    if isinstance(xml_input, str):
-+        encoding = encoding or 'utf-8'
-         xml_input = xml_input.encode(encoding)
-     if not process_namespaces:
-         namespace_separator = None
-@@ -412,9 +402,7 @@ def _emit(key, value, content_handler,
-         if result is None:
-             return
-         key, value = result
-    if (not hasattr(value, '__iter__')
-            or isinstance(value, _basestring)
-            or isinstance(value, dict)):
-+    if not hasattr(value, '__iter__') or isinstance(value, (str, dict)):
-         value = [value]
-     for index, v in enumerate(value):
-         if full_document and depth == 0 and index > 0:
-@@ -422,16 +410,13 @@ def _emit(key, value, content_handler,
-         if v is None:
-             v = _dict()
-         elif isinstance(v, bool):
-            if v:
-                v = _unicode('true')
-            else:
-                v = _unicode('false')
-        elif not isinstance(v, dict):
-            if expand_iter and hasattr(v, '__iter__') and not isinstance(v, _basestring):
-+            v = 'true' if v else 'false'
-+        elif not isinstance(v, (dict, str)):
-+            if expand_iter and hasattr(v, '__iter__'):
-                 v = _dict(((expand_iter, v),))
-             else:
-                v = _unicode(v)
-        if isinstance(v, _basestring):
-+                v = str(v)
-+        if isinstance(v, str):
-             v = _dict(((cdata_key, v),))
-         cdata = None
-         attrs = _dict()
-@@ -445,11 +430,11 @@ def _emit(key, value, content_handler,
-                                         attr_prefix)
-                 if ik == '@xmlns' and isinstance(iv, dict):
-                     for k, v in iv.items():
-                        attr = 'xmlns{}'.format(':{}'.format(k) if k else '')
-                        attrs[attr] = _unicode(v)
-+                        attr = 'xmlns{}'.format(f':{k}' if k else '')
-+                        attrs[attr] = str(v)
-                     continue
-                if not isinstance(iv, _unicode):
-                    iv = _unicode(iv)
-+                if not isinstance(iv, str):
-+                    iv = str(iv)
-                 attrs[ik[len(attr_prefix):]] = iv
-                 continue
-             children.append((ik, iv))
-- 
-2.51.0
-
--- a/xmltodict-0.13.0.tar.gz
+++ b/xmltodict-0.13.0.tar.gz
--- a/xmltodict-0.14.2.tar.gz
+++ b/xmltodict-0.14.2.tar.gz