Files
python311/CVE-2025-15282-urllib-ctrl-chars.patch
Matěj Cepl 02f09793e7 Fix seven CVEs
CVE-2025-11468: preserving parens when folding comments in
  email headers (bsc#1257029, gh#python/cpython#143935).
  CVE-2025-11468-email-hdr-fold-comment.patch
CVE-2026-0672: rejects control characters in http cookies.
  (bsc#1257031, gh#python/cpython#143919)
  CVE-2026-0672-http-hdr-inject-cookie-Morsel.patch
CVE-2026-0865: rejecting control characters in
  wsgiref.headers.Headers, which could be abused for injecting
  false HTTP headers. (bsc#1257042, gh#python/cpython#143916)
  CVE-2026-0865-wsgiref-ctrl-chars.patch
CVE-2025-15366: basically the same as the previous patch for
  IMAP protocol. (bsc#1257044, gh#python/cpython#143921)
  CVE-2025-15366-imap-ctrl-chars.patch
CVE-2025-15282: basically the same as the previous patch for
  urllib library. (bsc#1257046, gh#python/cpython#143925)
  CVE-2025-15282-urllib-ctrl-chars.patch
CVE-2025-15367: basically the same as the previous patch for
  poplib library. (bsc#1257041, gh#python/cpython#143923)
  CVE-2025-15367-poplib-ctrl-chars.patch
CVE-2025-12781: fix decoding with non-standard Base64 alphabet
  (bsc#1257108, gh#python/cpython#125346)
  CVE-2025-12781-b64decode-alt-chars.patch
2026-02-13 00:45:50 +01:00

66 lines
2.8 KiB
Diff

From 66c966a2d07cfcf555117309ef6aa088001bc487 Mon Sep 17 00:00:00 2001
From: Seth Michael Larson <seth@python.org>
Date: Tue, 20 Jan 2026 14:45:58 -0600
Subject: [PATCH] [3.11] gh-143925: Reject control characters in data: URL
mediatypes (cherry picked from commit
f25509e78e8be6ea73c811ac2b8c928c28841b9f) (cherry picked from commit
2c9c746077d8119b5bcf5142316992e464594946)
Co-authored-by: Seth Michael Larson <seth@python.org>
---
Lib/test/test_urllib.py | 8 ++++++++
Lib/urllib/request.py | 5 +++++
.../2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst | 1 +
3 files changed, 14 insertions(+)
create mode 100644 Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index f067560ca6caa1..497372a38e392a 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -12,6 +12,7 @@
from test.support import os_helper
from test.support import socket_helper
from test.support import warnings_helper
+from test.support import control_characters_c0
import os
try:
import ssl
@@ -683,6 +684,13 @@ def test_invalid_base64_data(self):
# missing padding character
self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
+ def test_invalid_mediatype(self):
+ for c0 in control_characters_c0():
+ self.assertRaises(ValueError,urllib.request.urlopen,
+ f'data:text/html;{c0},data')
+ for c0 in control_characters_c0():
+ self.assertRaises(ValueError,urllib.request.urlopen,
+ f'data:text/html{c0};base64,ZGF0YQ==')
class urlretrieve_FileTests(unittest.TestCase):
"""Test urllib.urlretrieve() on local files"""
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index d98ba5dd1983b9..3abb7ae1b049b7 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -1654,6 +1654,11 @@ def data_open(self, req):
scheme, data = url.split(":",1)
mediatype, data = data.split(",",1)
+ # Disallow control characters within mediatype.
+ if re.search(r"[\x00-\x1F\x7F]", mediatype):
+ raise ValueError(
+ "Control characters not allowed in data: mediatype")
+
# even base64 encoded data URLs might be quoted so unquote in any case:
data = unquote_to_bytes(data)
if mediatype.endswith(";base64"):
diff --git a/Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst b/Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst
new file mode 100644
index 00000000000000..46109dfbef3ee7
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst
@@ -0,0 +1 @@
+Reject control characters in ``data:`` URL media types.