python-Django/CVE-2024-27351.patch

From 2d173757922183f7e9b79d31fd4ccd9086cc6ce2 Mon Sep 17 00:00:00 2001
From: Shai Berger <shai@platonix.com>
Date: Mon, 19 Feb 2024 13:56:37 +0100
Subject: [PATCH] [4.2.x] Fixed CVE-2024-27351 -- Prevented potential ReDoS in
 Truncator.words().

Thanks Seokchan Yoon for the report.

Co-Authored-By: Mariusz Felisiak <felisiak.mariusz@gmail.com>
---
 django/utils/text.py           | 57 ++++++++++++++++++++++++++++++++--
 docs/releases/3.2.25.txt       |  8 +++++
 docs/releases/4.2.11.txt       |  8 +++++
 tests/utils_tests/test_text.py | 26 ++++++++++++++++
 4 files changed, 97 insertions(+), 2 deletions(-)

Index: Django-4.2.6/django/utils/text.py
===================================================================
--- Django-4.2.6.orig/django/utils/text.py
+++ Django-4.2.6/django/utils/text.py
@@ -23,8 +23,61 @@ def capfirst(x):
     return x[0].upper() + x[1:]
 
 
-# Set up regular expressions
-re_words = _lazy_re_compile(r"<[^>]+?>|([^<>\s]+)", re.S)
+# ----- Begin security-related performance workaround -----
+
+# We used to have, below
+#
+# re_words = _lazy_re_compile(r"<[^>]+?>|([^<>\s]+)", re.S)
+#
+# But it was shown that this regex, in the way we use it here, has some
+# catastrophic edge-case performance features. Namely, when it is applied to
+# text with only open brackets "<<<...". The class below provides the services
+# and correct answers for the use cases, but in these edge cases does it much
+# faster.
+re_notag = _lazy_re_compile(r"([^<>\s]+)", re.S)
+re_prt = _lazy_re_compile(r"<|([^<>\s]+)", re.S)
+
+
+class WordsRegex:
+    @staticmethod
+    def search(text, pos):
+        # Look for "<" or a non-tag word.
+        partial = re_prt.search(text, pos)
+        if partial is None or partial[1] is not None:
+            return partial
+
+        # "<" was found, look for a closing ">".
+        end = text.find(">", partial.end(0))
+        if end < 0:
+            # ">" cannot be found, look for a word.
+            return re_notag.search(text, pos + 1)
+        else:
+            # "<" followed by a ">" was found -- fake a match.
+            end += 1
+            return FakeMatch(text[partial.start(0) : end], end)
+
+
+class FakeMatch:
+    __slots__ = ["_text", "_end"]
+
+    def end(self, group=0):
+        assert group == 0, "This specific object takes only group=0"
+        return self._end
+
+    def __getitem__(self, group):
+        if group == 1:
+            return None
+        assert group == 0, "This specific object takes only group in {0,1}"
+        return self._text
+
+    def __init__(self, text, end):
+        self._text, self._end = text, end
+
+
+# ----- End security-related performance workaround -----
+
+# Set up regular expressions.
+re_words = WordsRegex
 re_chars = _lazy_re_compile(r"<[^>]+?>|(.)", re.S)
 re_tag = _lazy_re_compile(r"<(/)?(\S+?)(?:(\s*/)|\s.*?)?>", re.S)
 re_newlines = _lazy_re_compile(r"\r\n|\r")  # Used in normalize_newlines
Index: Django-4.2.6/tests/utils_tests/test_text.py
===================================================================
--- Django-4.2.6.orig/tests/utils_tests/test_text.py
+++ Django-4.2.6/tests/utils_tests/test_text.py
@@ -183,6 +183,32 @@ class TestUtilsText(SimpleTestCase):
         truncator = text.Truncator("<p>I &lt;3 python, what about you?</p>")
         self.assertEqual("<p>I &lt;3 python,…</p>", truncator.words(3, html=True))
 
+        # Only open brackets.
+        test = "<" * 60_000
+        truncator = text.Truncator(test)
+        self.assertEqual(truncator.words(1, html=True), test)
+
+        # Tags with special chars in attrs.
+        truncator = text.Truncator(
+            """<i style="margin: 5%; font: *;">Hello, my dear lady!</i>"""
+        )
+        self.assertEqual(
+            """<i style="margin: 5%; font: *;">Hello, my dear…</i>""",
+            truncator.words(3, html=True),
+        )
+
+        # Tags with special non-latin chars in attrs.
+        truncator = text.Truncator("""<p data-x="א">Hello, my dear lady!</p>""")
+        self.assertEqual(
+            """<p data-x="א">Hello, my dear…</p>""",
+            truncator.words(3, html=True),
+        )
+
+        # Misplaced brackets.
+        truncator = text.Truncator("hello >< world")
+        self.assertEqual(truncator.words(1, html=True), "hello…")
+        self.assertEqual(truncator.words(2, html=True), "hello >< world")
+
     @patch("django.utils.text.Truncator.MAX_LENGTH_HTML", 10_000)
     def test_truncate_words_html_size_limit(self):
         max_len = text.Truncator.MAX_LENGTH_HTML
Sync from SUSE:ALP:Source:Standard:1.0 python-Django revision fb4dfe21e6fe0c7578e7172d216dfe99 2024-03-05 18:05:06 +01:00			`From 2d173757922183f7e9b79d31fd4ccd9086cc6ce2 Mon Sep 17 00:00:00 2001`
			`From: Shai Berger <shai@platonix.com>`
			`Date: Mon, 19 Feb 2024 13:56:37 +0100`
			`Subject: [PATCH] [4.2.x] Fixed CVE-2024-27351 -- Prevented potential ReDoS in`
			`Truncator.words().`

			`Thanks Seokchan Yoon for the report.`

			`Co-Authored-By: Mariusz Felisiak <felisiak.mariusz@gmail.com>`
			`---`
			`django/utils/text.py \| 57 ++++++++++++++++++++++++++++++++--`
			`docs/releases/3.2.25.txt \| 8 +++++`
			`docs/releases/4.2.11.txt \| 8 +++++`
			`tests/utils_tests/test_text.py \| 26 ++++++++++++++++`
			`4 files changed, 97 insertions(+), 2 deletions(-)`

			`Index: Django-4.2.6/django/utils/text.py`
			`===================================================================`
			`--- Django-4.2.6.orig/django/utils/text.py`
			`+++ Django-4.2.6/django/utils/text.py`
			`@@ -23,8 +23,61 @@ def capfirst(x):`
			`return x[0].upper() + x[1:]`


			`-# Set up regular expressions`
			`-re_words = _lazy_re_compile(r"<[^>]+?>\|([^<>\s]+)", re.S)`
			`+# ----- Begin security-related performance workaround -----`
			`+`
			`+# We used to have, below`
			`+#`
			`+# re_words = _lazy_re_compile(r"<[^>]+?>\|([^<>\s]+)", re.S)`
			`+#`
			`+# But it was shown that this regex, in the way we use it here, has some`
			`+# catastrophic edge-case performance features. Namely, when it is applied to`
			`+# text with only open brackets "<<<...". The class below provides the services`
			`+# and correct answers for the use cases, but in these edge cases does it much`
			`+# faster.`
			`+re_notag = _lazy_re_compile(r"([^<>\s]+)", re.S)`
			`+re_prt = _lazy_re_compile(r"<\|([^<>\s]+)", re.S)`
			`+`
			`+`
			`+class WordsRegex:`
			`+ @staticmethod`
			`+ def search(text, pos):`
			`+ # Look for "<" or a non-tag word.`
			`+ partial = re_prt.search(text, pos)`
			`+ if partial is None or partial[1] is not None:`
			`+ return partial`
			`+`
			`+ # "<" was found, look for a closing ">".`
			`+ end = text.find(">", partial.end(0))`
			`+ if end < 0:`
			`+ # ">" cannot be found, look for a word.`
			`+ return re_notag.search(text, pos + 1)`
			`+ else:`
			`+ # "<" followed by a ">" was found -- fake a match.`
			`+ end += 1`
			`+ return FakeMatch(text[partial.start(0) : end], end)`
			`+`
			`+`
			`+class FakeMatch:`
			`+ __slots__ = ["_text", "_end"]`
			`+`
			`+ def end(self, group=0):`
			`+ assert group == 0, "This specific object takes only group=0"`
			`+ return self._end`
			`+`
			`+ def __getitem__(self, group):`
			`+ if group == 1:`
			`+ return None`
			`+ assert group == 0, "This specific object takes only group in {0,1}"`
			`+ return self._text`
			`+`
			`+ def __init__(self, text, end):`
			`+ self._text, self._end = text, end`
			`+`
			`+`
			`+# ----- End security-related performance workaround -----`
			`+`
			`+# Set up regular expressions.`
			`+re_words = WordsRegex`
			`re_chars = _lazy_re_compile(r"<[^>]+?>\|(.)", re.S)`
			`re_tag = _lazy_re_compile(r"<(/)?(\S+?)(?:(\s/)\|\s.?)?>", re.S)`
			`re_newlines = _lazy_re_compile(r"\r\n\|\r") # Used in normalize_newlines`
			`Index: Django-4.2.6/tests/utils_tests/test_text.py`
			`===================================================================`
			`--- Django-4.2.6.orig/tests/utils_tests/test_text.py`
			`+++ Django-4.2.6/tests/utils_tests/test_text.py`
			`@@ -183,6 +183,32 @@ class TestUtilsText(SimpleTestCase):`
			`truncator = text.Truncator("<p>I <3 python, what about you?</p>")`
			`self.assertEqual("<p>I <3 python,…</p>", truncator.words(3, html=True))`

			`+ # Only open brackets.`
			`+ test = "<" * 60_000`
			`+ truncator = text.Truncator(test)`
			`+ self.assertEqual(truncator.words(1, html=True), test)`
			`+`
			`+ # Tags with special chars in attrs.`
			`+ truncator = text.Truncator(`
			`+ """<i style="margin: 5%; font: *;">Hello, my dear lady!</i>"""`
			`+ )`
			`+ self.assertEqual(`
			`+ """<i style="margin: 5%; font: *;">Hello, my dear…</i>""",`
			`+ truncator.words(3, html=True),`
			`+ )`
			`+`
			`+ # Tags with special non-latin chars in attrs.`
			`+ truncator = text.Truncator("""<p data-x="א">Hello, my dear lady!</p>""")`
			`+ self.assertEqual(`
			`+ """<p data-x="א">Hello, my dear…</p>""",`
			`+ truncator.words(3, html=True),`
			`+ )`
			`+`
			`+ # Misplaced brackets.`
			`+ truncator = text.Truncator("hello >< world")`
			`+ self.assertEqual(truncator.words(1, html=True), "hello…")`
			`+ self.assertEqual(truncator.words(2, html=True), "hello >< world")`
			`+`
			`@patch("django.utils.text.Truncator.MAX_LENGTH_HTML", 10_000)`
			`def test_truncate_words_html_size_limit(self):`
			`max_len = text.Truncator.MAX_LENGTH_HTML`