From 79f368764295df109a37192f6182fb6f361d85b5 Mon Sep 17 00:00:00 2001 From: Adam Johnson Date: Mon, 24 Jun 2024 15:30:59 +0200 Subject: [PATCH] [4.2.x] Fixed CVE-2024-38875 -- Mitigated potential DoS in urlize and urlizetrunc template filters. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you to Elias Myllymäki for the report. Co-authored-by: Sarah Boyce <42296566+sarahboyce@users.noreply.github.com> --- django/utils/html.py | 90 +++++++++++++++++++++++++--------- tests/utils_tests/test_html.py | 7 +++ 2 files changed, 73 insertions(+), 24 deletions(-) diff --git a/django/utils/html.py b/django/utils/html.py index fdb88d6709..fd313ff9ca 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -7,7 +7,7 @@ from html.parser import HTMLParser from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsplit from django.utils.encoding import punycode -from django.utils.functional import Promise, keep_lazy, keep_lazy_text +from django.utils.functional import Promise, cached_property, keep_lazy, keep_lazy_text from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS from django.utils.regex_helper import _lazy_re_compile from django.utils.safestring import SafeData, SafeString, mark_safe @@ -225,6 +225,16 @@ def smart_urlquote(url): return urlunsplit((scheme, netloc, path, query, fragment)) +class CountsDict(dict): + def __init__(self, *args, word, **kwargs): + super().__init__(*args, *kwargs) + self.word = word + + def __missing__(self, key): + self[key] = self.word.count(key) + return self[key] + + class Urlizer: """ Convert any URLs in text into clickable links. @@ -330,40 +340,72 @@ class Urlizer: return x return "%s…" % x[: max(0, limit - 1)] + @cached_property + def wrapping_punctuation_openings(self): + return "".join(dict(self.wrapping_punctuation).keys()) + + @cached_property + def trailing_punctuation_chars_no_semicolon(self): + return self.trailing_punctuation_chars.replace(";", "") + + @cached_property + def trailing_punctuation_chars_has_semicolon(self): + return ";" in self.trailing_punctuation_chars + def trim_punctuation(self, word): """ Trim trailing and wrapping punctuation from `word`. Return the items of the new state. """ - lead, middle, trail = "", word, "" + # Strip all opening wrapping punctuation. + middle = word.lstrip(self.wrapping_punctuation_openings) + lead = word[: len(word) - len(middle)] + trail = "" + # Continue trimming until middle remains unchanged. trimmed_something = True - while trimmed_something: + counts = CountsDict(word=middle) + while trimmed_something and middle: trimmed_something = False # Trim wrapping punctuation. for opening, closing in self.wrapping_punctuation: - if middle.startswith(opening): - middle = middle[len(opening) :] - lead += opening - trimmed_something = True - # Keep parentheses at the end only if they're balanced. - if ( - middle.endswith(closing) - and middle.count(closing) == middle.count(opening) + 1 - ): - middle = middle[: -len(closing)] - trail = closing + trail - trimmed_something = True - # Trim trailing punctuation (after trimming wrapping punctuation, - # as encoded entities contain ';'). Unescape entities to avoid - # breaking them by removing ';'. - middle_unescaped = html.unescape(middle) - stripped = middle_unescaped.rstrip(self.trailing_punctuation_chars) - if middle_unescaped != stripped: - punctuation_count = len(middle_unescaped) - len(stripped) - trail = middle[-punctuation_count:] + trail - middle = middle[:-punctuation_count] + if counts[opening] < counts[closing]: + rstripped = middle.rstrip(closing) + if rstripped != middle: + strip = counts[closing] - counts[opening] + trail = middle[-strip:] + middle = middle[:-strip] + trimmed_something = True + counts[closing] -= strip + + rstripped = middle.rstrip(self.trailing_punctuation_chars_no_semicolon) + if rstripped != middle: + trail = middle[len(rstripped) :] + trail + middle = rstripped trimmed_something = True + + if self.trailing_punctuation_chars_has_semicolon and middle.endswith(";"): + # Only strip if not part of an HTML entity. + amp = middle.rfind("&") + if amp == -1: + can_strip = True + else: + potential_entity = middle[amp:] + escaped = html.unescape(potential_entity) + can_strip = (escaped == potential_entity) or escaped.endswith(";") + + if can_strip: + rstripped = middle.rstrip(";") + amount_stripped = len(middle) - len(rstripped) + if amp > -1 and amount_stripped > 1: + # Leave a trailing semicolon as might be an entity. + trail = middle[len(rstripped) + 1 :] + trail + middle = rstripped + ";" + else: + trail = middle[len(rstripped) :] + trail + middle = rstripped + trimmed_something = True + return lead, middle, trail @staticmethod diff --git a/tests/utils_tests/test_html.py b/tests/utils_tests/test_html.py index b7a7396075..6dab41634a 100644 --- a/tests/utils_tests/test_html.py +++ b/tests/utils_tests/test_html.py @@ -342,6 +342,13 @@ class TestUtilsHtml(SimpleTestCase): "foo@.example.com", "foo@localhost", "foo@localhost.", + # trim_punctuation catastrophic tests + "(" * 100_000 + ":" + ")" * 100_000, + "(" * 100_000 + "&:" + ")" * 100_000, + "([" * 100_000 + ":" + "])" * 100_000, + "[(" * 100_000 + ":" + ")]" * 100_000, + "([[" * 100_000 + ":" + "]])" * 100_000, + "&:" + ";" * 100_000, ) for value in tests: with self.subTest(value=value): -- 2.45.2