95 lines
3.7 KiB
Diff
95 lines
3.7 KiB
Diff
From 771472cfdaeebc0d89a9cc46e249f8891a6b29cd Mon Sep 17 00:00:00 2001
|
|
From: Ben Darnell <ben@bendarnell.com>
|
|
Date: Wed, 10 Dec 2025 10:55:02 -0500
|
|
Subject: [PATCH] httputil: Fix quadratic behavior in _parseparam
|
|
|
|
Prior to this change, _parseparam had O(n^2) behavior when parsing
|
|
certain inputs, which could be a DoS vector. This change adapts
|
|
logic from the equivalent function in the python standard library
|
|
in https://github.com/python/cpython/pull/136072/files
|
|
---
|
|
tornado/httputil.py | 29 ++++++++++++++++++++++-------
|
|
tornado/test/httputil_test.py | 23 +++++++++++++++++++++++
|
|
2 files changed, 45 insertions(+), 7 deletions(-)
|
|
|
|
Index: tornado-6.3.2/tornado/httputil.py
|
|
===================================================================
|
|
--- tornado-6.3.2.orig/tornado/httputil.py
|
|
+++ tornado-6.3.2/tornado/httputil.py
|
|
@@ -936,19 +936,34 @@ def parse_response_start_line(line: str)
|
|
# It has also been modified to support valueless parameters as seen in
|
|
# websocket extension negotiations, and to support non-ascii values in
|
|
# RFC 2231/5987 format.
|
|
+#
|
|
+# _parseparam has been further modified with the logic from
|
|
+# https://github.com/python/cpython/pull/136072/files
|
|
+# to avoid quadratic behavior when parsing semicolons in quoted strings.
|
|
+#
|
|
+# TODO: See if we can switch to email.message.Message for this functionality.
|
|
+# This is the suggested replacement for the cgi.py module now that cgi has
|
|
+# been removed from recent versions of Python. We need to verify that
|
|
+# the email module is consistent with our existing behavior (and all relevant
|
|
+# RFCs for multipart/form-data) before making this change.
|
|
|
|
|
|
def _parseparam(s: str) -> Generator[str, None, None]:
|
|
- while s[:1] == ";":
|
|
- s = s[1:]
|
|
- end = s.find(";")
|
|
- while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
|
|
- end = s.find(";", end + 1)
|
|
+ start = 0
|
|
+ while s.find(";", start) == start:
|
|
+ start += 1
|
|
+ end = s.find(";", start)
|
|
+ ind, diff = start, 0
|
|
+ while end > 0:
|
|
+ diff += s.count('"', ind, end) - s.count('\\"', ind, end)
|
|
+ if diff % 2 == 0:
|
|
+ break
|
|
+ end, ind = ind, s.find(";", end + 1)
|
|
if end < 0:
|
|
end = len(s)
|
|
- f = s[:end]
|
|
+ f = s[start:end]
|
|
yield f.strip()
|
|
- s = s[end:]
|
|
+ start = end
|
|
|
|
|
|
def _parse_header(line: str) -> Tuple[str, Dict[str, str]]:
|
|
Index: tornado-6.3.2/tornado/test/httputil_test.py
|
|
===================================================================
|
|
--- tornado-6.3.2.orig/tornado/test/httputil_test.py
|
|
+++ tornado-6.3.2/tornado/test/httputil_test.py
|
|
@@ -261,6 +261,29 @@ Foo
|
|
self.assertEqual(file["filename"], "ab.txt")
|
|
self.assertEqual(file["body"], b"Foo")
|
|
|
|
+ def test_disposition_param_linear_performance(self):
|
|
+ # This is a regression test for performance of parsing parameters
|
|
+ # to the content-disposition header, specifically for semicolons within
|
|
+ # quoted strings.
|
|
+ def f(n):
|
|
+ start = time.time()
|
|
+ message = (
|
|
+ b"--1234\r\nContent-Disposition: form-data; "
|
|
+ + b'x="'
|
|
+ + b";" * n
|
|
+ + b'"; '
|
|
+ + b'name="files"; filename="a.txt"\r\n\r\nFoo\r\n--1234--\r\n'
|
|
+ )
|
|
+ args: dict[str, list[bytes]] = {}
|
|
+ files: dict[str, list[HTTPFile]] = {}
|
|
+ parse_multipart_form_data(b"1234", message, args, files)
|
|
+ return time.time() - start
|
|
+
|
|
+ d1 = f(1_000)
|
|
+ d2 = f(10_000)
|
|
+ if d2 / d1 > 20:
|
|
+ self.fail(f"Disposition param parsing is not linear: {d1=} vs {d2=}")
|
|
+
|
|
|
|
class HTTPHeadersTest(unittest.TestCase):
|
|
def test_multi_line(self):
|