Compare commits

1 Commits
1.1 ... main

6 changed files with 35 additions and 391 deletions

View File

@@ -1,222 +0,0 @@
From 6df43e8d0133f700c328e3da4028315fb9f873da Mon Sep 17 00:00:00 2001
From: Martin Blech <78768+martinblech@users.noreply.github.com>
Date: Thu, 4 Sep 2025 17:25:39 -0700
Subject: [PATCH 2/2] Merge the following three upstream commits as a combined
fix for CVE-2025-9375
- Prevent XML injection: reject '<'/'>' in element/attr names (incl. @xmlns) (ecd456a)
- Enhance unparse() XML name validation with stricter rules and tests (f98c90f)
- Harden XML name validation: reject quotes and '='; add tests (8860b0e)
---
tests/test_dicttoxml.py | 106 ++++++++++++++++++++++++++++++++++++++++
xmltodict.py | 56 ++++++++++++++++++++-
2 files changed, 160 insertions(+), 2 deletions(-)
diff --git a/tests/test_dicttoxml.py b/tests/test_dicttoxml.py
index 7fc2171..9830e2a 100644
--- a/tests/test_dicttoxml.py
+++ b/tests/test_dicttoxml.py
@@ -213,3 +213,109 @@ xmlns:b="http://b.com/"><x a:attr="val">1</x><a:y>2</a:y><b:z>3</b:z></root>'''
expected_xml = '<?xml version="1.0" encoding="utf-8"?>\n<x>false</x>'
xml = unparse(dict(x=False))
self.assertEqual(xml, expected_xml)
+
+ def test_rejects_tag_name_with_angle_brackets(self):
+ # Minimal guard: disallow '<' or '>' to prevent breaking tag context
+ with self.assertRaises(ValueError):
+ unparse({"m><tag>content</tag": "unsafe"}, full_document=False)
+
+ def test_rejects_attribute_name_with_angle_brackets(self):
+ # Now we expect bad attribute names to be rejected
+ with self.assertRaises(ValueError):
+ unparse(
+ {"a": {"@m><tag>content</tag": "unsafe", "#text": "x"}},
+ full_document=False,
+ )
+
+ def test_rejects_malicious_xmlns_prefix(self):
+ # xmlns prefixes go under @xmlns mapping; reject angle brackets in prefix
+ with self.assertRaises(ValueError):
+ unparse(
+ {
+ "a": {
+ "@xmlns": {"m><bad": "http://example.com/"},
+ "#text": "x",
+ }
+ },
+ full_document=False,
+ )
+
+ def test_attribute_values_with_angle_brackets_are_escaped(self):
+ # Attribute values should be escaped by XMLGenerator
+ xml = unparse({"a": {"@attr": "1<middle>2", "#text": "x"}}, full_document=False)
+ # The generated XML should contain escaped '<' and '>' within the attribute value
+ self.assertIn('attr="1&lt;middle&gt;2"', xml)
+
+ def test_rejects_tag_name_starting_with_question(self):
+ with self.assertRaises(ValueError):
+ unparse({"?pi": "data"}, full_document=False)
+
+ def test_rejects_tag_name_starting_with_bang(self):
+ with self.assertRaises(ValueError):
+ unparse({"!decl": "data"}, full_document=False)
+
+ def test_rejects_attribute_name_starting_with_question(self):
+ with self.assertRaises(ValueError):
+ unparse({"a": {"@?weird": "x"}}, full_document=False)
+
+ def test_rejects_attribute_name_starting_with_bang(self):
+ with self.assertRaises(ValueError):
+ unparse({"a": {"@!weird": "x"}}, full_document=False)
+
+ def test_rejects_xmlns_prefix_starting_with_question_or_bang(self):
+ with self.assertRaises(ValueError):
+ unparse({"a": {"@xmlns": {"?p": "http://e/"}}}, full_document=False)
+ with self.assertRaises(ValueError):
+ unparse({"a": {"@xmlns": {"!p": "http://e/"}}}, full_document=False)
+
+ def test_rejects_non_string_names(self):
+ class Weird:
+ def __str__(self):
+ return "bad>name"
+
+ # Non-string element key
+ with self.assertRaises(ValueError):
+ unparse({Weird(): "x"}, full_document=False)
+ # Non-string attribute key
+ with self.assertRaises(ValueError):
+ unparse({"a": {Weird(): "x"}}, full_document=False)
+
+ def test_rejects_tag_name_with_slash(self):
+ with self.assertRaises(ValueError):
+ unparse({"bad/name": "x"}, full_document=False)
+
+ def test_rejects_tag_name_with_whitespace(self):
+ for name in ["bad name", "bad\tname", "bad\nname"]:
+ with self.assertRaises(ValueError):
+ unparse({name: "x"}, full_document=False)
+
+ def test_rejects_attribute_name_with_slash(self):
+ with self.assertRaises(ValueError):
+ unparse({"a": {"@bad/name": "x"}}, full_document=False)
+
+ def test_rejects_attribute_name_with_whitespace(self):
+ for name in ["@bad name", "@bad\tname", "@bad\nname"]:
+ with self.assertRaises(ValueError):
+ unparse({"a": {name: "x"}}, full_document=False)
+
+ def test_rejects_xmlns_prefix_with_slash_or_whitespace(self):
+ # Slash
+ with self.assertRaises(ValueError):
+ unparse({"a": {"@xmlns": {"bad/prefix": "http://e/"}}}, full_document=False)
+ # Whitespace
+ with self.assertRaises(ValueError):
+ unparse({"a": {"@xmlns": {"bad prefix": "http://e/"}}}, full_document=False)
+
+ def test_rejects_names_with_quotes_and_equals(self):
+ # Element names
+ for name in ['a"b', "a'b", "a=b"]:
+ with self.assertRaises(ValueError):
+ unparse({name: "x"}, full_document=False)
+ # Attribute names
+ for name in ['@a"b', "@a'b", "@a=b"]:
+ with self.assertRaises(ValueError):
+ unparse({"a": {name: "x"}}, full_document=False)
+ # xmlns prefixes
+ for prefix in ['a"b', "a'b", "a=b"]:
+ with self.assertRaises(ValueError):
+ unparse({"a": {"@xmlns": {prefix: "http://e/"}}}, full_document=False)
diff --git a/xmltodict.py b/xmltodict.py
index a0ba0de..67c248a 100755
--- a/xmltodict.py
+++ b/xmltodict.py
@@ -369,7 +369,54 @@ def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
return handler.item
+def _has_angle_brackets(value):
+ """Return True if value (a str) contains '<' or '>'.
+
+ Non-string values return False. Uses fast substring checks implemented in C.
+ """
+ return isinstance(value, str) and ("<" in value or ">" in value)
+
+
+def _has_invalid_name_chars(value):
+ """Return True if value (a str) contains any disallowed name characters.
+
+ Disallowed: '<', '>', '/', or any whitespace character.
+ Non-string values return False.
+ """
+ if not isinstance(value, str):
+ return False
+ if "<" in value or ">" in value or "/" in value:
+ return True
+ # Check for any whitespace (spaces, tabs, newlines, etc.)
+ return any(ch.isspace() for ch in value)
+
+
+def _validate_name(value, kind):
+ """Validate an element/attribute name for XML safety.
+
+ Raises ValueError with a specific reason when invalid.
+
+ kind: 'element' or 'attribute' (used in error messages)
+ """
+ if not isinstance(value, str):
+ raise ValueError(f"{kind} name must be a string")
+ if value.startswith("?") or value.startswith("!"):
+ raise ValueError(f'Invalid {kind} name: cannot start with "?" or "!"')
+ if "<" in value or ">" in value:
+ raise ValueError(f'Invalid {kind} name: "<" or ">" not allowed')
+ if "/" in value:
+ raise ValueError(f'Invalid {kind} name: "/" not allowed')
+ if '"' in value or "'" in value:
+ raise ValueError(f"Invalid {kind} name: quotes not allowed")
+ if "=" in value:
+ raise ValueError(f'Invalid {kind} name: "=" not allowed')
+ if any(ch.isspace() for ch in value):
+ raise ValueError(f"Invalid {kind} name: whitespace not allowed")
+
+
def _process_namespace(name, namespaces, ns_sep=':', attr_prefix='@'):
+ if not isinstance(name, str):
+ return name
if not namespaces:
return name
try:
@@ -402,6 +449,8 @@ def _emit(key, value, content_handler,
if result is None:
return
key, value = result
+ # Minimal validation to avoid breaking out of tag context
+ _validate_name(key, "element")
if not hasattr(value, '__iter__') or isinstance(value, (str, dict)):
value = [value]
for index, v in enumerate(value):
@@ -425,17 +474,20 @@ def _emit(key, value, content_handler,
if ik == cdata_key:
cdata = iv
continue
- if ik.startswith(attr_prefix):
+ if isinstance(ik, str) and ik.startswith(attr_prefix):
ik = _process_namespace(ik, namespaces, namespace_separator,
attr_prefix)
if ik == '@xmlns' and isinstance(iv, dict):
for k, v in iv.items():
+ _validate_name(k, "attribute")
attr = 'xmlns{}'.format(f':{k}' if k else '')
attrs[attr] = str(v)
continue
if not isinstance(iv, str):
iv = str(iv)
- attrs[ik[len(attr_prefix):]] = iv
+ attr_name = ik[len(attr_prefix) :]
+ _validate_name(attr_name, "attribute")
+ attrs[attr_name] = iv
continue
children.append((ik, iv))
if pretty:
--
2.51.0

View File

@@ -1,9 +1,29 @@
-------------------------------------------------------------------
Tue Sep 9 08:21:40 UTC 2025 - John Paul Adrian Glaubitz <adrian.glaubitz@suse.com>
Tue Oct 29 16:11:13 UTC 2024 - Martin Hauke <mardnh@gmx.de>
- Cherry-pick security-fix-prereqs.patch to allow backport of CVE fix
- Cherry-pick CVE-2025-9375.patch to fix multiple XML Injection
vulnerabilities in XML parser (bsc#1249036, CVE-2025-9375)
- Update to version 0.14.2
* Revert "Ensure significant whitespace is not trimmed"
This changed was backwards incompatible and caused downstream
issues.
- Update to version 0.14.1
* Drop support for Python older than 3.6
* Additional ruff/Pyflakes/codespell fixes.
- Update to version 0.14.0
* Drop old Python 2 support leftover code and apply several RUFF
code health fixes.
* Add Python 3.11, 3.12 and 3.13 support and tests.
* Remove defusedexpat import.
* Replace deprecated BadZipfile with BadZipFile.
* Support indent using integer format,
enable python -m unittest tests/*.py.
* Ensure significant whitespace is not trimmed
* added conda installation command
* fix attributes not appearing in streaming mode
-------------------------------------------------------------------
Mon Feb 12 15:02:30 UTC 2024 - Matej Cepl <mcepl@cepl.eu>
- Clean up the SPEC file.
-------------------------------------------------------------------
Tue May 9 13:41:52 UTC 2023 - Johannes Kastl <kastl@b1-systems.de>

View File

@@ -1,7 +1,7 @@
#
# spec file for package python-xmltodict
#
# Copyright (c) 2023 SUSE LLC
# Copyright (c) 2024 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
@@ -18,22 +18,22 @@
%{?sle15_python_module_pythons}
Name: python-xmltodict
Version: 0.13.0
Version: 0.14.2
Release: 0
Summary: Module to make XML working resemble JSON
License: MIT
URL: https://github.com/martinblech/xmltodict
Source: https://files.pythonhosted.org/packages/source/x/xmltodict/xmltodict-%{version}.tar.gz
Patch0: skip-tests-expat-245.patch
Patch1: security-fix-prereqs.patch
Patch2: CVE-2025-9375.patch
BuildRequires: %{python_module pip}
BuildRequires: %{python_module pytest}
BuildRequires: %{python_module setuptools}
BuildRequires: %{python_module wheel}
BuildRequires: %{python_module xml}
BuildRequires: fdupes
BuildRequires: python-rpm-macros
Requires: python-xml
BuildArch: noarch
BuildRequires: %{python_module pytest}
%python_subpackages
%description
@@ -47,11 +47,11 @@ sed -i '1{\@^#!%{_bindir}/env python@d}' xmltodict.py
%build
export LANG=en_US.UTF-8
%python_build
%pyproject_wheel
%install
export LANG=en_US.UTF-8
%python_install
%pyproject_install
%python_expand %fdupes %{buildroot}%{$python_sitelib}
%check
@@ -63,6 +63,6 @@ export LANG=en_US.UTF-8
%doc README.md
%{python_sitelib}/xmltodict.py*
%pycache_only %{python_sitelib}/__pycache__/xmltodict.*.py*
%{python_sitelib}/xmltodict-%{version}-py*.egg-info
%{python_sitelib}/xmltodict-%{version}*-info
%changelog

View File

@@ -1,154 +0,0 @@
From 239ef35e545ef6333c0b5dff6484659ffd62c9b0 Mon Sep 17 00:00:00 2001
From: Dimitri Papadopoulos
<3234522+DimitriPapadopoulos@users.noreply.github.com>
Date: Sun, 17 Jul 2022 09:28:21 +0200
Subject: [PATCH 1/2] Merge the following four upstream commits required for
CVE-2025-9375
- Merge `isinstance` calls (9c3ec3c)
- Apply ruff/pyupgrade rule UP031 (d4a50f5)
- Apply ruff/pyupgrade rule UP032 (3df6d23)
- Get rid of Python 2 basestring and unicode (#346) (5b1b511)
---
ez_setup.py | 7 +++----
tests/test_xmltodict.py | 4 ++--
xmltodict.py | 39 ++++++++++++---------------------------
3 files changed, 17 insertions(+), 33 deletions(-)
diff --git a/ez_setup.py b/ez_setup.py
index 800c31e..b5cc352 100644
--- a/ez_setup.py
+++ b/ez_setup.py
@@ -131,7 +131,7 @@ def archive_context(filename):
def _do_download(version, download_base, to_dir, download_delay):
"""Download Setuptools."""
- py_desig = 'py{sys.version_info[0]}.{sys.version_info[1]}'.format(sys=sys)
+ py_desig = f'py{sys.version_info[0]}.{sys.version_info[1]}'
tp = 'setuptools-{version}-{py_desig}.egg'
egg = os.path.join(to_dir, tp.format(**locals()))
if not os.path.exists(egg):
@@ -245,8 +245,7 @@ def download_file_powershell(url, target):
ps_cmd = (
"[System.Net.WebRequest]::DefaultWebProxy.Credentials = "
"[System.Net.CredentialCache]::DefaultCredentials; "
- '(new-object System.Net.WebClient).DownloadFile("%(url)s", "%(target)s")'
- % locals()
+ '(new-object System.Net.WebClient).DownloadFile("{url}", "{target}")'.format(**locals())
)
cmd = [
'powershell',
@@ -346,7 +345,7 @@ def download_setuptools(
"""
# making sure we use the absolute path
to_dir = os.path.abspath(to_dir)
- zip_name = "setuptools-%s.zip" % version
+ zip_name = f"setuptools-{version}.zip"
url = download_base + zip_name
saveto = os.path.join(to_dir, zip_name)
if not os.path.exists(saveto): # Avoid repeated downloads
diff --git a/tests/test_xmltodict.py b/tests/test_xmltodict.py
index 04137f9..aa7a1a4 100644
--- a/tests/test_xmltodict.py
+++ b/tests/test_xmltodict.py
@@ -168,14 +168,14 @@ class XMLToDictTestCase(unittest.TestCase):
except NameError:
value = chr(39321)
self.assertEqual({'a': value},
- parse('<a>%s</a>' % value))
+ parse(f'<a>{value}</a>'))
def test_encoded_string(self):
try:
value = unichr(39321)
except NameError:
value = chr(39321)
- xml = '<a>%s</a>' % value
+ xml = f'<a>{value}</a>'
self.assertEqual(parse(xml),
parse(xml.encode('utf-8')))
diff --git a/xmltodict.py b/xmltodict.py
index ca760aa..a0ba0de 100755
--- a/xmltodict.py
+++ b/xmltodict.py
@@ -22,15 +22,6 @@ if tuple(map(int, platform.python_version_tuple()[:2])) < (3, 7):
from inspect import isgenerator
-try: # pragma no cover
- _basestring = basestring
-except NameError: # pragma no cover
- _basestring = str
-try: # pragma no cover
- _unicode = unicode
-except NameError: # pragma no cover
- _unicode = str
-
__author__ = 'Martin Blech'
__version__ = '0.13.0'
__license__ = 'MIT'
@@ -335,9 +326,8 @@ def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
"""
handler = _DictSAXHandler(namespace_separator=namespace_separator,
**kwargs)
- if isinstance(xml_input, _unicode):
- if not encoding:
- encoding = 'utf-8'
+ if isinstance(xml_input, str):
+ encoding = encoding or 'utf-8'
xml_input = xml_input.encode(encoding)
if not process_namespaces:
namespace_separator = None
@@ -412,9 +402,7 @@ def _emit(key, value, content_handler,
if result is None:
return
key, value = result
- if (not hasattr(value, '__iter__')
- or isinstance(value, _basestring)
- or isinstance(value, dict)):
+ if not hasattr(value, '__iter__') or isinstance(value, (str, dict)):
value = [value]
for index, v in enumerate(value):
if full_document and depth == 0 and index > 0:
@@ -422,16 +410,13 @@ def _emit(key, value, content_handler,
if v is None:
v = _dict()
elif isinstance(v, bool):
- if v:
- v = _unicode('true')
- else:
- v = _unicode('false')
- elif not isinstance(v, dict):
- if expand_iter and hasattr(v, '__iter__') and not isinstance(v, _basestring):
+ v = 'true' if v else 'false'
+ elif not isinstance(v, (dict, str)):
+ if expand_iter and hasattr(v, '__iter__'):
v = _dict(((expand_iter, v),))
else:
- v = _unicode(v)
- if isinstance(v, _basestring):
+ v = str(v)
+ if isinstance(v, str):
v = _dict(((cdata_key, v),))
cdata = None
attrs = _dict()
@@ -445,11 +430,11 @@ def _emit(key, value, content_handler,
attr_prefix)
if ik == '@xmlns' and isinstance(iv, dict):
for k, v in iv.items():
- attr = 'xmlns{}'.format(':{}'.format(k) if k else '')
- attrs[attr] = _unicode(v)
+ attr = 'xmlns{}'.format(f':{k}' if k else '')
+ attrs[attr] = str(v)
continue
- if not isinstance(iv, _unicode):
- iv = _unicode(iv)
+ if not isinstance(iv, str):
+ iv = str(iv)
attrs[ik[len(attr_prefix):]] = iv
continue
children.append((ik, iv))
--
2.51.0

Binary file not shown.

BIN
xmltodict-0.14.2.tar.gz LFS Normal file

Binary file not shown.