forked from pool/python-Django
200 lines
7.4 KiB
Diff
200 lines
7.4 KiB
Diff
From 99e7d22f55497278d0bcb2e15e72ef532e62a31d Mon Sep 17 00:00:00 2001
|
|
From: Shai Berger <shai@platonix.com>
|
|
Date: Sat, 11 Oct 2025 21:42:56 +0300
|
|
Subject: [PATCH] [5.2.x] Fixed CVE-2025-64460 -- Corrected quadratic inner
|
|
text accumulation in XML serializer.
|
|
|
|
Previously, `getInnerText()` recursively used `list.extend()` on strings,
|
|
which added each character from child nodes as a separate list element.
|
|
On deeply nested XML content, this caused the overall deserialization
|
|
work to grow quadratically with input size, potentially allowing
|
|
disproportionate CPU consumption for crafted XML.
|
|
|
|
The fix separates collection of inner texts from joining them, so that
|
|
each subtree is joined only once, reducing the complexity to linear in
|
|
the size of the input. These changes also include a mitigation for a
|
|
xml.dom.minidom performance issue.
|
|
|
|
Thanks Seokchan Yoon (https://ch4n3.kr/) for report.
|
|
|
|
Co-authored-by: Jacob Walls <jacobtylerwalls@gmail.com>
|
|
Co-authored-by: Natalia <124304+nessita@users.noreply.github.com>
|
|
|
|
Backport of 50efb718b31333051bc2dcb06911b8fa1358c98c from main.
|
|
---
|
|
django/core/serializers/xml_serializer.py | 39 +++++++++++++---
|
|
docs/releases/4.2.27.txt | 10 +++++
|
|
docs/releases/5.1.15.txt | 10 +++++
|
|
docs/releases/5.2.9.txt | 10 +++++
|
|
docs/topics/serialization.txt | 2 +
|
|
tests/serializers/test_deserialization.py | 54 +++++++++++++++++++++++
|
|
6 files changed, 119 insertions(+), 6 deletions(-)
|
|
|
|
diff --git a/django/core/serializers/xml_serializer.py b/django/core/serializers/xml_serializer.py
|
|
index 360d5309d853..0fa48acf06e5 100644
|
|
--- a/django/core/serializers/xml_serializer.py
|
|
+++ b/django/core/serializers/xml_serializer.py
|
|
@@ -3,7 +3,8 @@
|
|
"""
|
|
|
|
import json
|
|
-from xml.dom import pulldom
|
|
+from contextlib import contextmanager
|
|
+from xml.dom import minidom, pulldom
|
|
from xml.sax import handler
|
|
from xml.sax.expatreader import ExpatParser as _ExpatParser
|
|
|
|
@@ -15,6 +16,25 @@
|
|
from django.utils.xmlutils import SimplerXMLGenerator, UnserializableContentError
|
|
|
|
|
|
+@contextmanager
|
|
+def fast_cache_clearing():
|
|
+ """Workaround for performance issues in minidom document checks.
|
|
+
|
|
+ Speeds up repeated DOM operations by skipping unnecessary full traversal
|
|
+ of the DOM tree.
|
|
+ """
|
|
+ module_helper_was_lambda = False
|
|
+ if original_fn := getattr(minidom, "_in_document", None):
|
|
+ module_helper_was_lambda = original_fn.__name__ == "<lambda>"
|
|
+ if not module_helper_was_lambda:
|
|
+ minidom._in_document = lambda node: bool(node.ownerDocument)
|
|
+ try:
|
|
+ yield
|
|
+ finally:
|
|
+ if original_fn and not module_helper_was_lambda:
|
|
+ minidom._in_document = original_fn
|
|
+
|
|
+
|
|
class Serializer(base.Serializer):
|
|
"""Serialize a QuerySet to XML."""
|
|
|
|
@@ -210,7 +230,8 @@ def _make_parser(self):
|
|
def __next__(self):
|
|
for event, node in self.event_stream:
|
|
if event == "START_ELEMENT" and node.nodeName == "object":
|
|
- self.event_stream.expandNode(node)
|
|
+ with fast_cache_clearing():
|
|
+ self.event_stream.expandNode(node)
|
|
return self._handle_object(node)
|
|
raise StopIteration
|
|
|
|
@@ -394,19 +415,25 @@ def _get_model_from_node(self, node, attr):
|
|
|
|
def getInnerText(node):
|
|
"""Get all the inner text of a DOM node (recursively)."""
|
|
+ inner_text_list = getInnerTextList(node)
|
|
+ return "".join(inner_text_list)
|
|
+
|
|
+
|
|
+def getInnerTextList(node):
|
|
+ """Return a list of the inner texts of a DOM node (recursively)."""
|
|
# inspired by https://mail.python.org/pipermail/xml-sig/2005-March/011022.html
|
|
- inner_text = []
|
|
+ result = []
|
|
for child in node.childNodes:
|
|
if (
|
|
child.nodeType == child.TEXT_NODE
|
|
or child.nodeType == child.CDATA_SECTION_NODE
|
|
):
|
|
- inner_text.append(child.data)
|
|
+ result.append(child.data)
|
|
elif child.nodeType == child.ELEMENT_NODE:
|
|
- inner_text.extend(getInnerText(child))
|
|
+ result.extend(getInnerTextList(child))
|
|
else:
|
|
pass
|
|
- return "".join(inner_text)
|
|
+ return result
|
|
|
|
|
|
# Below code based on Christian Heimes' defusedxml
|
|
diff --git a/docs/topics/serialization.txt b/docs/topics/serialization.txt
|
|
index 1e573e6e1d53..e9523e2ac133 100644
|
|
--- a/docs/topics/serialization.txt
|
|
+++ b/docs/topics/serialization.txt
|
|
@@ -173,6 +173,8 @@ Identifier Information
|
|
.. _jsonl: https://jsonlines.org/
|
|
.. _PyYAML: https://pyyaml.org/
|
|
|
|
+.. _serialization-formats-xml:
|
|
+
|
|
XML
|
|
---
|
|
|
|
diff --git a/tests/serializers/test_deserialization.py b/tests/serializers/test_deserialization.py
|
|
index 0bbb46b7ce1c..a718a990385a 100644
|
|
--- a/tests/serializers/test_deserialization.py
|
|
+++ b/tests/serializers/test_deserialization.py
|
|
@@ -1,11 +1,15 @@
|
|
import json
|
|
+import time
|
|
import unittest
|
|
|
|
from django.core.serializers.base import DeserializationError, DeserializedObject
|
|
from django.core.serializers.json import Deserializer as JsonDeserializer
|
|
from django.core.serializers.jsonl import Deserializer as JsonlDeserializer
|
|
from django.core.serializers.python import Deserializer
|
|
+from django.core.serializers.xml_serializer import Deserializer as XMLDeserializer
|
|
+from django.db import models
|
|
from django.test import SimpleTestCase
|
|
+from django.test.utils import garbage_collect
|
|
|
|
from .models import Author
|
|
|
|
@@ -133,3 +137,53 @@ def test_yaml_bytes_input(self):
|
|
|
|
self.assertEqual(first_item.object, self.jane)
|
|
self.assertEqual(second_item.object, self.joe)
|
|
+
|
|
+ def test_crafted_xml_performance(self):
|
|
+ """The time to process invalid inputs is not quadratic."""
|
|
+
|
|
+ def build_crafted_xml(depth, leaf_text_len):
|
|
+ nested_open = "<nested>" * depth
|
|
+ nested_close = "</nested>" * depth
|
|
+ leaf = "x" * leaf_text_len
|
|
+ field_content = f"{nested_open}{leaf}{nested_close}"
|
|
+ return f"""
|
|
+ <django-objects version="1.0">
|
|
+ <object model="contenttypes.contenttype" pk="1">
|
|
+ <field name="app_label">{field_content}</field>
|
|
+ <field name="model">m</field>
|
|
+ </object>
|
|
+ </django-objects>
|
|
+ """
|
|
+
|
|
+ def deserialize(crafted_xml):
|
|
+ iterator = XMLDeserializer(crafted_xml)
|
|
+ garbage_collect()
|
|
+
|
|
+ start_time = time.perf_counter()
|
|
+ result = list(iterator)
|
|
+ end_time = time.perf_counter()
|
|
+
|
|
+ self.assertEqual(len(result), 1)
|
|
+ self.assertIsInstance(result[0].object, models.Model)
|
|
+ return end_time - start_time
|
|
+
|
|
+ def assertFactor(label, params, factor=2):
|
|
+ factors = []
|
|
+ prev_time = None
|
|
+ for depth, length in params:
|
|
+ crafted_xml = build_crafted_xml(depth, length)
|
|
+ elapsed = deserialize(crafted_xml)
|
|
+ if prev_time is not None:
|
|
+ factors.append(elapsed / prev_time)
|
|
+ prev_time = elapsed
|
|
+
|
|
+ with self.subTest(label):
|
|
+ # Assert based on the average factor to reduce test flakiness.
|
|
+ self.assertLessEqual(sum(factors) / len(factors), factor)
|
|
+
|
|
+ assertFactor(
|
|
+ "varying depth, varying length",
|
|
+ [(50, 2000), (100, 4000), (200, 8000), (400, 16000), (800, 32000)],
|
|
+ 2,
|
|
+ )
|
|
+ assertFactor("constant depth, varying length", [(100, 1), (100, 1000)], 2)
|