python-Django/CVE-2024-38875.patch

From 79f368764295df109a37192f6182fb6f361d85b5 Mon Sep 17 00:00:00 2001
From: Adam Johnson <me@adamj.eu>
Date: Mon, 24 Jun 2024 15:30:59 +0200
Subject: [PATCH] [4.2.x] Fixed CVE-2024-38875 -- Mitigated potential DoS in
 urlize and urlizetrunc template filters.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you to Elias Myllymäki for the report.

Co-authored-by: Sarah Boyce <42296566+sarahboyce@users.noreply.github.com>
---
 django/utils/html.py           | 90 +++++++++++++++++++++++++---------
 tests/utils_tests/test_html.py |  7 +++
 2 files changed, 73 insertions(+), 24 deletions(-)

diff --git a/django/utils/html.py b/django/utils/html.py
index fdb88d6709..fd313ff9ca 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -7,7 +7,7 @@ from html.parser import HTMLParser
 from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsplit
 
 from django.utils.encoding import punycode
-from django.utils.functional import Promise, keep_lazy, keep_lazy_text
+from django.utils.functional import Promise, cached_property, keep_lazy, keep_lazy_text
 from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS
 from django.utils.regex_helper import _lazy_re_compile
 from django.utils.safestring import SafeData, SafeString, mark_safe
@@ -225,6 +225,16 @@ def smart_urlquote(url):
     return urlunsplit((scheme, netloc, path, query, fragment))
 
 
+class CountsDict(dict):
+    def __init__(self, *args, word, **kwargs):
+        super().__init__(*args, *kwargs)
+        self.word = word
+
+    def __missing__(self, key):
+        self[key] = self.word.count(key)
+        return self[key]
+
+
 class Urlizer:
     """
     Convert any URLs in text into clickable links.
@@ -330,40 +340,72 @@ class Urlizer:
             return x
         return "%s…" % x[: max(0, limit - 1)]
 
+    @cached_property
+    def wrapping_punctuation_openings(self):
+        return "".join(dict(self.wrapping_punctuation).keys())
+
+    @cached_property
+    def trailing_punctuation_chars_no_semicolon(self):
+        return self.trailing_punctuation_chars.replace(";", "")
+
+    @cached_property
+    def trailing_punctuation_chars_has_semicolon(self):
+        return ";" in self.trailing_punctuation_chars
+
     def trim_punctuation(self, word):
         """
         Trim trailing and wrapping punctuation from `word`. Return the items of
         the new state.
         """
-        lead, middle, trail = "", word, ""
+        # Strip all opening wrapping punctuation.
+        middle = word.lstrip(self.wrapping_punctuation_openings)
+        lead = word[: len(word) - len(middle)]
+        trail = ""
+
         # Continue trimming until middle remains unchanged.
         trimmed_something = True
-        while trimmed_something:
+        counts = CountsDict(word=middle)
+        while trimmed_something and middle:
             trimmed_something = False
             # Trim wrapping punctuation.
             for opening, closing in self.wrapping_punctuation:
-                if middle.startswith(opening):
-                    middle = middle[len(opening) :]
-                    lead += opening
-                    trimmed_something = True
-                # Keep parentheses at the end only if they're balanced.
-                if (
-                    middle.endswith(closing)
-                    and middle.count(closing) == middle.count(opening) + 1
-                ):
-                    middle = middle[: -len(closing)]
-                    trail = closing + trail
-                    trimmed_something = True
-            # Trim trailing punctuation (after trimming wrapping punctuation,
-            # as encoded entities contain ';'). Unescape entities to avoid
-            # breaking them by removing ';'.
-            middle_unescaped = html.unescape(middle)
-            stripped = middle_unescaped.rstrip(self.trailing_punctuation_chars)
-            if middle_unescaped != stripped:
-                punctuation_count = len(middle_unescaped) - len(stripped)
-                trail = middle[-punctuation_count:] + trail
-                middle = middle[:-punctuation_count]
+                if counts[opening] < counts[closing]:
+                    rstripped = middle.rstrip(closing)
+                    if rstripped != middle:
+                        strip = counts[closing] - counts[opening]
+                        trail = middle[-strip:]
+                        middle = middle[:-strip]
+                        trimmed_something = True
+                        counts[closing] -= strip
+
+            rstripped = middle.rstrip(self.trailing_punctuation_chars_no_semicolon)
+            if rstripped != middle:
+                trail = middle[len(rstripped) :] + trail
+                middle = rstripped
                 trimmed_something = True
+
+            if self.trailing_punctuation_chars_has_semicolon and middle.endswith(";"):
+                # Only strip if not part of an HTML entity.
+                amp = middle.rfind("&")
+                if amp == -1:
+                    can_strip = True
+                else:
+                    potential_entity = middle[amp:]
+                    escaped = html.unescape(potential_entity)
+                    can_strip = (escaped == potential_entity) or escaped.endswith(";")
+
+                if can_strip:
+                    rstripped = middle.rstrip(";")
+                    amount_stripped = len(middle) - len(rstripped)
+                    if amp > -1 and amount_stripped > 1:
+                        # Leave a trailing semicolon as might be an entity.
+                        trail = middle[len(rstripped) + 1 :] + trail
+                        middle = rstripped + ";"
+                    else:
+                        trail = middle[len(rstripped) :] + trail
+                        middle = rstripped
+                    trimmed_something = True
+
         return lead, middle, trail
 
     @staticmethod
diff --git a/tests/utils_tests/test_html.py b/tests/utils_tests/test_html.py
index b7a7396075..6dab41634a 100644
--- a/tests/utils_tests/test_html.py
+++ b/tests/utils_tests/test_html.py
@@ -342,6 +342,13 @@ class TestUtilsHtml(SimpleTestCase):
             "foo@.example.com",
             "foo@localhost",
             "foo@localhost.",
+            # trim_punctuation catastrophic tests
+            "(" * 100_000 + ":" + ")" * 100_000,
+            "(" * 100_000 + "&:" + ")" * 100_000,
+            "([" * 100_000 + ":" + "])" * 100_000,
+            "[(" * 100_000 + ":" + ")]" * 100_000,
+            "([[" * 100_000 + ":" + "]])" * 100_000,
+            "&:" + ";" * 100_000,
         )
         for value in tests:
             with self.subTest(value=value):
-- 
2.45.2
Sync from SUSE:SLFO:Main python-Django revision 2bd7ba1c498ee607b8fe04ff26398472 2024-09-04 09:08:57 +02:00			`From 79f368764295df109a37192f6182fb6f361d85b5 Mon Sep 17 00:00:00 2001`
			`From: Adam Johnson <me@adamj.eu>`
			`Date: Mon, 24 Jun 2024 15:30:59 +0200`
			`Subject: [PATCH] [4.2.x] Fixed CVE-2024-38875 -- Mitigated potential DoS in`
			`urlize and urlizetrunc template filters.`
			`MIME-Version: 1.0`
			`Content-Type: text/plain; charset=UTF-8`
			`Content-Transfer-Encoding: 8bit`

			`Thank you to Elias Myllymäki for the report.`

			`Co-authored-by: Sarah Boyce <42296566+sarahboyce@users.noreply.github.com>`
			`---`
			`django/utils/html.py \| 90 +++++++++++++++++++++++++---------`
			`tests/utils_tests/test_html.py \| 7 +++`
			`2 files changed, 73 insertions(+), 24 deletions(-)`

			`diff --git a/django/utils/html.py b/django/utils/html.py`
			`index fdb88d6709..fd313ff9ca 100644`
			`--- a/django/utils/html.py`
			`+++ b/django/utils/html.py`
			`@@ -7,7 +7,7 @@ from html.parser import HTMLParser`
			`from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsplit`

			`from django.utils.encoding import punycode`
			`-from django.utils.functional import Promise, keep_lazy, keep_lazy_text`
			`+from django.utils.functional import Promise, cached_property, keep_lazy, keep_lazy_text`
			`from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS`
			`from django.utils.regex_helper import _lazy_re_compile`
			`from django.utils.safestring import SafeData, SafeString, mark_safe`
			`@@ -225,6 +225,16 @@ def smart_urlquote(url):`
			`return urlunsplit((scheme, netloc, path, query, fragment))`


			`+class CountsDict(dict):`
			`+ def __init__(self, args, word, *kwargs):`
			`+ super().__init__(args, kwargs)`
			`+ self.word = word`
			`+`
			`+ def __missing__(self, key):`
			`+ self[key] = self.word.count(key)`
			`+ return self[key]`
			`+`
			`+`
			`class Urlizer:`
			`"""`
			`Convert any URLs in text into clickable links.`
			`@@ -330,40 +340,72 @@ class Urlizer:`
			`return x`
			`return "%s…" % x[: max(0, limit - 1)]`

			`+ @cached_property`
			`+ def wrapping_punctuation_openings(self):`
			`+ return "".join(dict(self.wrapping_punctuation).keys())`
			`+`
			`+ @cached_property`
			`+ def trailing_punctuation_chars_no_semicolon(self):`
			`+ return self.trailing_punctuation_chars.replace(";", "")`
			`+`
			`+ @cached_property`
			`+ def trailing_punctuation_chars_has_semicolon(self):`
			`+ return ";" in self.trailing_punctuation_chars`
			`+`
			`def trim_punctuation(self, word):`
			`"""`
			Trim trailing and wrapping punctuation from `word`. Return the items of
			`the new state.`
			`"""`
			`- lead, middle, trail = "", word, ""`
			`+ # Strip all opening wrapping punctuation.`
			`+ middle = word.lstrip(self.wrapping_punctuation_openings)`
			`+ lead = word[: len(word) - len(middle)]`
			`+ trail = ""`
			`+`
			`# Continue trimming until middle remains unchanged.`
			`trimmed_something = True`
			`- while trimmed_something:`
			`+ counts = CountsDict(word=middle)`
			`+ while trimmed_something and middle:`
			`trimmed_something = False`
			`# Trim wrapping punctuation.`
			`for opening, closing in self.wrapping_punctuation:`
			`- if middle.startswith(opening):`
			`- middle = middle[len(opening) :]`
			`- lead += opening`
			`- trimmed_something = True`
			`- # Keep parentheses at the end only if they're balanced.`
			`- if (`
			`- middle.endswith(closing)`
			`- and middle.count(closing) == middle.count(opening) + 1`
			`- ):`
			`- middle = middle[: -len(closing)]`
			`- trail = closing + trail`
			`- trimmed_something = True`
			`- # Trim trailing punctuation (after trimming wrapping punctuation,`
			`- # as encoded entities contain ';'). Unescape entities to avoid`
			`- # breaking them by removing ';'.`
			`- middle_unescaped = html.unescape(middle)`
			`- stripped = middle_unescaped.rstrip(self.trailing_punctuation_chars)`
			`- if middle_unescaped != stripped:`
			`- punctuation_count = len(middle_unescaped) - len(stripped)`
			`- trail = middle[-punctuation_count:] + trail`
			`- middle = middle[:-punctuation_count]`
			`+ if counts[opening] < counts[closing]:`
			`+ rstripped = middle.rstrip(closing)`
			`+ if rstripped != middle:`
			`+ strip = counts[closing] - counts[opening]`
			`+ trail = middle[-strip:]`
			`+ middle = middle[:-strip]`
			`+ trimmed_something = True`
			`+ counts[closing] -= strip`
			`+`
			`+ rstripped = middle.rstrip(self.trailing_punctuation_chars_no_semicolon)`
			`+ if rstripped != middle:`
			`+ trail = middle[len(rstripped) :] + trail`
			`+ middle = rstripped`
			`trimmed_something = True`
			`+`
			`+ if self.trailing_punctuation_chars_has_semicolon and middle.endswith(";"):`
			`+ # Only strip if not part of an HTML entity.`
			`+ amp = middle.rfind("&")`
			`+ if amp == -1:`
			`+ can_strip = True`
			`+ else:`
			`+ potential_entity = middle[amp:]`
			`+ escaped = html.unescape(potential_entity)`
			`+ can_strip = (escaped == potential_entity) or escaped.endswith(";")`
			`+`
			`+ if can_strip:`
			`+ rstripped = middle.rstrip(";")`
			`+ amount_stripped = len(middle) - len(rstripped)`
			`+ if amp > -1 and amount_stripped > 1:`
			`+ # Leave a trailing semicolon as might be an entity.`
			`+ trail = middle[len(rstripped) + 1 :] + trail`
			`+ middle = rstripped + ";"`
			`+ else:`
			`+ trail = middle[len(rstripped) :] + trail`
			`+ middle = rstripped`
			`+ trimmed_something = True`
			`+`
			`return lead, middle, trail`

			`@staticmethod`
			`diff --git a/tests/utils_tests/test_html.py b/tests/utils_tests/test_html.py`
			`index b7a7396075..6dab41634a 100644`
			`--- a/tests/utils_tests/test_html.py`
			`+++ b/tests/utils_tests/test_html.py`
			`@@ -342,6 +342,13 @@ class TestUtilsHtml(SimpleTestCase):`
			`"foo@.example.com",`
			`"foo@localhost",`
			`"foo@localhost.",`
			`+ # trim_punctuation catastrophic tests`
			`+ "(" * 100_000 + ":" + ")" * 100_000,`
			`+ "(" * 100_000 + "&:" + ")" * 100_000,`
			`+ "([" * 100_000 + ":" + "])" * 100_000,`
			`+ "[(" * 100_000 + ":" + ")]" * 100_000,`
			`+ "([[" * 100_000 + ":" + "]])" * 100_000,`
			`+ "&:" + ";" * 100_000,`
			`)`
			`for value in tests:`
			`with self.subTest(value=value):`
			`--`
			`2.45.2`