Backport fix for CVE-2025-9375

- Cherry-pick CVE-2025-9375.patch to fix multiple XML Injection vulnerabilities in XML parser (bsc#1249036, CVE-2025-9375)
Accepting request 1219092 from devel:languages:python
2025-11-18 11:28:55 +01:00 · 2024-10-30 16:32:51 +00:00 · 2024-10-29 17:03:36 +00:00
5 changed files with 255 additions and 4 deletions
--- a/CVE-2025-9375.patch
+++ b/CVE-2025-9375.patch
@@ -0,0 +1,222 @@
 From 065c500169f9c4f5109df1902027f778d2ee8730 Mon Sep 17 00:00:00 2001
 From: Martin Blech <78768+martinblech@users.noreply.github.com>
 Date: Thu, 4 Sep 2025 17:25:39 -0700
 Subject: [PATCH] Merge the following three upstream commits as a combined fix
 for CVE-2025-9375
 - Prevent XML injection: reject '<'/'>' in element/attr names (incl. @xmlns) (ecd456a)
 - Enhance unparse() XML name validation with stricter rules and tests (f98c90f)
 - Harden XML name validation: reject quotes and '='; add tests (8860b0e)
 ---
 tests/test_dicttoxml.py | 106 ++++++++++++++++++++++++++++++++++++++++
 xmltodict.py            |  56 ++++++++++++++++++++-
 2 files changed, 160 insertions(+), 2 deletions(-)
 diff --git a/tests/test_dicttoxml.py b/tests/test_dicttoxml.py
 index 470aca9..1fa5ba7 100644
 --- a/tests/test_dicttoxml.py
 +++ b/tests/test_dicttoxml.py
@@ -231,3 +231,109 @@ xmlns:b="http://b.com/"><x a:attr="val">1</x><a:y>2</a:y><b:z>3</b:z></root>'''
         expected_xml = '<?xml version="1.0" encoding="utf-8"?>\n<x>false</x>'
         xml = unparse(dict(x=False))
         self.assertEqual(xml, expected_xml)
 +
 +    def test_rejects_tag_name_with_angle_brackets(self):
 +        # Minimal guard: disallow '<' or '>' to prevent breaking tag context
 +        with self.assertRaises(ValueError):
 +            unparse({"m><tag>content</tag": "unsafe"}, full_document=False)
 +
 +    def test_rejects_attribute_name_with_angle_brackets(self):
 +        # Now we expect bad attribute names to be rejected
 +        with self.assertRaises(ValueError):
 +            unparse(
 +                {"a": {"@m><tag>content</tag": "unsafe", "#text": "x"}},
 +                full_document=False,
 +            )
 +
 +    def test_rejects_malicious_xmlns_prefix(self):
 +        # xmlns prefixes go under @xmlns mapping; reject angle brackets in prefix
 +        with self.assertRaises(ValueError):
 +            unparse(
 +                {
 +                    "a": {
 +                        "@xmlns": {"m><bad": "http://example.com/"},
 +                        "#text": "x",
 +                    }
 +                },
 +                full_document=False,
 +            )
 +
 +    def test_attribute_values_with_angle_brackets_are_escaped(self):
 +        # Attribute values should be escaped by XMLGenerator
 +        xml = unparse({"a": {"@attr": "1<middle>2", "#text": "x"}}, full_document=False)
 +        # The generated XML should contain escaped '<' and '>' within the attribute value
 +        self.assertIn('attr="1&lt;middle&gt;2"', xml)
 +
 +    def test_rejects_tag_name_starting_with_question(self):
 +        with self.assertRaises(ValueError):
 +            unparse({"?pi": "data"}, full_document=False)
 +
 +    def test_rejects_tag_name_starting_with_bang(self):
 +        with self.assertRaises(ValueError):
 +            unparse({"!decl": "data"}, full_document=False)
 +
 +    def test_rejects_attribute_name_starting_with_question(self):
 +        with self.assertRaises(ValueError):
 +            unparse({"a": {"@?weird": "x"}}, full_document=False)
 +
 +    def test_rejects_attribute_name_starting_with_bang(self):
 +        with self.assertRaises(ValueError):
 +            unparse({"a": {"@!weird": "x"}}, full_document=False)
 +
 +    def test_rejects_xmlns_prefix_starting_with_question_or_bang(self):
 +        with self.assertRaises(ValueError):
 +            unparse({"a": {"@xmlns": {"?p": "http://e/"}}}, full_document=False)
 +        with self.assertRaises(ValueError):
 +            unparse({"a": {"@xmlns": {"!p": "http://e/"}}}, full_document=False)
 +
 +    def test_rejects_non_string_names(self):
 +        class Weird:
 +            def __str__(self):
 +                return "bad>name"
 +
 +        # Non-string element key
 +        with self.assertRaises(ValueError):
 +            unparse({Weird(): "x"}, full_document=False)
 +        # Non-string attribute key
 +        with self.assertRaises(ValueError):
 +            unparse({"a": {Weird(): "x"}}, full_document=False)
 +
 +    def test_rejects_tag_name_with_slash(self):
 +        with self.assertRaises(ValueError):
 +            unparse({"bad/name": "x"}, full_document=False)
 +
 +    def test_rejects_tag_name_with_whitespace(self):
 +        for name in ["bad name", "bad\tname", "bad\nname"]:
 +            with self.assertRaises(ValueError):
 +                unparse({name: "x"}, full_document=False)
 +
 +    def test_rejects_attribute_name_with_slash(self):
 +        with self.assertRaises(ValueError):
 +            unparse({"a": {"@bad/name": "x"}}, full_document=False)
 +
 +    def test_rejects_attribute_name_with_whitespace(self):
 +        for name in ["@bad name", "@bad\tname", "@bad\nname"]:
 +            with self.assertRaises(ValueError):
 +                unparse({"a": {name: "x"}}, full_document=False)
 +
 +    def test_rejects_xmlns_prefix_with_slash_or_whitespace(self):
 +        # Slash
 +        with self.assertRaises(ValueError):
 +            unparse({"a": {"@xmlns": {"bad/prefix": "http://e/"}}}, full_document=False)
 +        # Whitespace
 +        with self.assertRaises(ValueError):
 +            unparse({"a": {"@xmlns": {"bad prefix": "http://e/"}}}, full_document=False)
 +
 +    def test_rejects_names_with_quotes_and_equals(self):
 +        # Element names
 +        for name in ['a"b', "a'b", "a=b"]:
 +            with self.assertRaises(ValueError):
 +                unparse({name: "x"}, full_document=False)
 +        # Attribute names
 +        for name in ['@a"b', "@a'b", "@a=b"]:
 +            with self.assertRaises(ValueError):
 +                unparse({"a": {name: "x"}}, full_document=False)
 +        # xmlns prefixes
 +        for prefix in ['a"b', "a'b", "a=b"]:
 +            with self.assertRaises(ValueError):
 +                unparse({"a": {"@xmlns": {prefix: "http://e/"}}}, full_document=False)
 diff --git a/xmltodict.py b/xmltodict.py
 index 098f627..15e2748 100755
 --- a/xmltodict.py
 +++ b/xmltodict.py
@@ -360,7 +360,54 @@ def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
     return handler.item
 +def _has_angle_brackets(value):
 +    """Return True if value (a str) contains '<' or '>'.
 +
 +    Non-string values return False. Uses fast substring checks implemented in C.
 +    """
 +    return isinstance(value, str) and ("<" in value or ">" in value)
 +
 +
 +def _has_invalid_name_chars(value):
 +    """Return True if value (a str) contains any disallowed name characters.
 +
 +    Disallowed: '<', '>', '/', or any whitespace character.
 +    Non-string values return False.
 +    """
 +    if not isinstance(value, str):
 +        return False
 +    if "<" in value or ">" in value or "/" in value:
 +        return True
 +    # Check for any whitespace (spaces, tabs, newlines, etc.)
 +    return any(ch.isspace() for ch in value)
 +
 +
 +def _validate_name(value, kind):
 +    """Validate an element/attribute name for XML safety.
 +
 +    Raises ValueError with a specific reason when invalid.
 +
 +    kind: 'element' or 'attribute' (used in error messages)
 +    """
 +    if not isinstance(value, str):
 +        raise ValueError(f"{kind} name must be a string")
 +    if value.startswith("?") or value.startswith("!"):
 +        raise ValueError(f'Invalid {kind} name: cannot start with "?" or "!"')
 +    if "<" in value or ">" in value:
 +        raise ValueError(f'Invalid {kind} name: "<" or ">" not allowed')
 +    if "/" in value:
 +        raise ValueError(f'Invalid {kind} name: "/" not allowed')
 +    if '"' in value or "'" in value:
 +        raise ValueError(f"Invalid {kind} name: quotes not allowed")
 +    if "=" in value:
 +        raise ValueError(f'Invalid {kind} name: "=" not allowed')
 +    if any(ch.isspace() for ch in value):
 +        raise ValueError(f"Invalid {kind} name: whitespace not allowed")
 +
 +
 def _process_namespace(name, namespaces, ns_sep=':', attr_prefix='@'):
 +    if not isinstance(name, str):
 +        return name
     if not namespaces:
         return name
     try:
@@ -393,6 +440,8 @@ def _emit(key, value, content_handler,
         if result is None:
             return
         key, value = result
 +    # Minimal validation to avoid breaking out of tag context
 +    _validate_name(key, "element")
     if not hasattr(value, '__iter__') or isinstance(value, (str, dict)):
         value = [value]
     for index, v in enumerate(value):
@@ -416,17 +465,20 @@ def _emit(key, value, content_handler,
             if ik == cdata_key:
                 cdata = iv
                 continue
 -            if ik.startswith(attr_prefix):
 +            if isinstance(ik, str) and ik.startswith(attr_prefix):
                 ik = _process_namespace(ik, namespaces, namespace_separator,
                                         attr_prefix)
                 if ik == '@xmlns' and isinstance(iv, dict):
                     for k, v in iv.items():
 +                        _validate_name(k, "attribute")
                         attr = 'xmlns{}'.format(f':{k}' if k else '')
                         attrs[attr] = str(v)
                     continue
                 if not isinstance(iv, str):
                     iv = str(iv)
 -                attrs[ik[len(attr_prefix):]] = iv
 +                attr_name = ik[len(attr_prefix) :]
 +                _validate_name(attr_name, "attribute")
 +                attrs[attr_name] = iv
                 continue
             children.append((ik, iv))
         if isinstance(indent, int):
 -- 
 2.51.0
--- a/python-xmltodict.changes
+++ b/python-xmltodict.changes
@@ -1,3 +1,31 @@
 -------------------------------------------------------------------
 Tue Sep  9 08:38:44 UTC 2025 - John Paul Adrian Glaubitz <adrian.glaubitz@suse.com>
 - Cherry-pick CVE-2025-9375.patch to fix multiple XML Injection
  vulnerabilities in XML parser (bsc#1249036, CVE-2025-9375)
 -------------------------------------------------------------------
 Tue Oct 29 16:11:13 UTC 2024 - Martin Hauke <mardnh@gmx.de>
 - Update to version 0.14.2
  * Revert "Ensure significant whitespace is not trimmed"
    This changed was backwards incompatible and caused downstream
    issues.
 - Update to version 0.14.1
  * Drop support for Python older than 3.6
  * Additional ruff/Pyflakes/codespell fixes.
 - Update to version 0.14.0
  * Drop old Python 2 support leftover code and apply several RUFF
    code health fixes.
  * Add Python 3.11, 3.12 and 3.13 support and tests.
  * Remove defusedexpat import.
  * Replace deprecated BadZipfile with BadZipFile.
  * Support indent using integer format,
    enable python -m unittest tests/*.py.
  * Ensure significant whitespace is not trimmed
  * added conda installation command
  * fix attributes not appearing in streaming mode
 -------------------------------------------------------------------
 Mon Feb 12 15:02:30 UTC 2024 - Matej Cepl <mcepl@cepl.eu>
--- a/python-xmltodict.spec
+++ b/python-xmltodict.spec
@@ -18,13 +18,14 @@
 %{?sle15_python_module_pythons}
 Name:           python-xmltodict
-Version:        0.13.0
+Version:        0.14.2
 Release:        0
 Summary:        Module to make XML working resemble JSON
 License:        MIT
 URL:            https://github.com/martinblech/xmltodict
 Source:         https://files.pythonhosted.org/packages/source/x/xmltodict/xmltodict-%{version}.tar.gz
 Patch0:         skip-tests-expat-245.patch
 Patch1:         CVE-2025-9375.patch
 BuildRequires:  %{python_module pip}
 BuildRequires:  %{python_module pytest}
 BuildRequires:  %{python_module setuptools}
--- a/xmltodict-0.13.0.tar.gz
+++ b/xmltodict-0.13.0.tar.gz
--- a/xmltodict-0.14.2.tar.gz
+++ b/xmltodict-0.14.2.tar.gz