125 lines
5.4 KiB
Diff
125 lines
5.4 KiB
Diff
From 68e81b4a3385161877408a7a49c7ed12b45a614d Mon Sep 17 00:00:00 2001
|
|
From: Ben Darnell <ben@bendarnell.com>
|
|
Date: Tue, 9 Dec 2025 13:27:27 -0500
|
|
Subject: [PATCH] httputil: Fix quadratic performance of repeated header lines
|
|
|
|
Previouisly, when many header lines with the same name were found
|
|
in an HTTP request or response, repeated string concatenation would
|
|
result in quadratic performance. This change does the concatenation
|
|
lazily (with a cache) so that repeated headers can be processed
|
|
efficiently.
|
|
|
|
Security: The previous behavior allowed a denial of service attack
|
|
via a maliciously crafted HTTP message, but only if the
|
|
max_header_size was increased from its default of 64kB.
|
|
---
|
|
tornado/httputil.py | 36 ++++++++++++++++++++++++-----------
|
|
tornado/test/httputil_test.py | 15 +++++++++++++++
|
|
2 files changed, 40 insertions(+), 11 deletions(-)
|
|
|
|
Index: tornado-6.3.2/tornado/httputil.py
|
|
===================================================================
|
|
--- tornado-6.3.2.orig/tornado/httputil.py
|
|
+++ tornado-6.3.2/tornado/httputil.py
|
|
@@ -118,8 +118,14 @@ class HTTPHeaders(collections.abc.Mutabl
|
|
pass
|
|
|
|
def __init__(self, *args: typing.Any, **kwargs: str) -> None: # noqa: F811
|
|
- self._dict = {} # type: typing.Dict[str, str]
|
|
- self._as_list = {} # type: typing.Dict[str, typing.List[str]]
|
|
+ # Formally, HTTP headers are a mapping from a field name to a "combined field value",
|
|
+ # which may be constructed from multiple field lines by joining them with commas.
|
|
+ # In practice, however, some headers (notably Set-Cookie) do not follow this convention,
|
|
+ # so we maintain a mapping from field name to a list of field lines in self._as_list.
|
|
+ # self._combined_cache is a cache of the combined field values derived from self._as_list
|
|
+ # on demand (and cleared whenever the list is modified).
|
|
+ self._as_list: dict[str, list[str]] = {}
|
|
+ self._combined_cache: dict[str, str] = {}
|
|
self._last_key = None # type: Optional[str]
|
|
if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], HTTPHeaders):
|
|
# Copy constructor
|
|
@@ -136,9 +142,7 @@ class HTTPHeaders(collections.abc.Mutabl
|
|
norm_name = _normalize_header(name)
|
|
self._last_key = norm_name
|
|
if norm_name in self:
|
|
- self._dict[norm_name] = (
|
|
- native_str(self[norm_name]) + "," + native_str(value)
|
|
- )
|
|
+ self._combined_cache.pop(norm_name, None)
|
|
self._as_list[norm_name].append(value)
|
|
else:
|
|
self[norm_name] = value
|
|
@@ -172,7 +176,7 @@ class HTTPHeaders(collections.abc.Mutabl
|
|
raise HTTPInputError("first header line cannot start with whitespace")
|
|
new_part = " " + line.lstrip()
|
|
self._as_list[self._last_key][-1] += new_part
|
|
- self._dict[self._last_key] += new_part
|
|
+ self._combined_cache.pop(self._last_key, None)
|
|
else:
|
|
try:
|
|
name, value = line.split(":", 1)
|
|
@@ -208,22 +212,32 @@ class HTTPHeaders(collections.abc.Mutabl
|
|
|
|
def __setitem__(self, name: str, value: str) -> None:
|
|
norm_name = _normalize_header(name)
|
|
- self._dict[norm_name] = value
|
|
+ self._combined_cache[norm_name] = value
|
|
self._as_list[norm_name] = [value]
|
|
|
|
+ def __contains__(self, name: object) -> bool:
|
|
+ # This is an important optimization to avoid the expensive concatenation
|
|
+ # in __getitem__ when it's not needed.
|
|
+ if not isinstance(name, str):
|
|
+ return False
|
|
+ return name in self._as_list
|
|
+
|
|
def __getitem__(self, name: str) -> str:
|
|
- return self._dict[_normalize_header(name)]
|
|
+ header = _normalize_header(name)
|
|
+ if header not in self._combined_cache:
|
|
+ self._combined_cache[header] = ",".join(self._as_list[header])
|
|
+ return self._combined_cache[header]
|
|
|
|
def __delitem__(self, name: str) -> None:
|
|
norm_name = _normalize_header(name)
|
|
- del self._dict[norm_name]
|
|
+ del self._combined_cache[norm_name]
|
|
del self._as_list[norm_name]
|
|
|
|
def __len__(self) -> int:
|
|
- return len(self._dict)
|
|
+ return len(self._as_list)
|
|
|
|
def __iter__(self) -> Iterator[typing.Any]:
|
|
- return iter(self._dict)
|
|
+ return iter(self._as_list)
|
|
|
|
def copy(self) -> "HTTPHeaders":
|
|
# defined in dict but not in MutableMapping.
|
|
Index: tornado-6.3.2/tornado/test/httputil_test.py
|
|
===================================================================
|
|
--- tornado-6.3.2.orig/tornado/test/httputil_test.py
|
|
+++ tornado-6.3.2/tornado/test/httputil_test.py
|
|
@@ -451,6 +451,21 @@ class ParseRequestStartLineTest(unittest
|
|
self.assertEqual(parsed_start_line.path, self.PATH)
|
|
self.assertEqual(parsed_start_line.version, self.VERSION)
|
|
|
|
+ def test_linear_performance(self):
|
|
+ def f(n):
|
|
+ start = time.time()
|
|
+ headers = HTTPHeaders()
|
|
+ for i in range(n):
|
|
+ headers.add("X-Foo", "bar")
|
|
+ return time.time() - start
|
|
+
|
|
+ # This runs under 50ms on my laptop as of 2025-12-09.
|
|
+ d1 = f(10_000)
|
|
+ d2 = f(100_000)
|
|
+ if d2 / d1 > 20:
|
|
+ # d2 should be about 10x d1 but allow a wide margin for variability.
|
|
+ self.fail(f"HTTPHeaders.add() does not scale linearly: {d1=} vs {d2=}")
|
|
+
|
|
|
|
class ParseCookieTest(unittest.TestCase):
|
|
# These tests copied from Django:
|