- Add security patches: * CVE-2025-55197.patch (bsc#1248089) * CVE-2026-27024.patch (bsc#1258691) * CVE-2026-27025.patch (bsc#1258692) * CVE-2026-27026.patch (bsc#1258693) OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-PyPDF2?expand=0&rev=26
190 lines
7.1 KiB
Diff
190 lines
7.1 KiB
Diff
From 77d7b8d7cfbe8dd179858dfa42666f73fc6e57a2 Mon Sep 17 00:00:00 2001
|
||
From: Stefan <96178532+stefan6419846@users.noreply.github.com>
|
||
Date: Tue, 17 Feb 2026 17:46:56 +0100
|
||
Subject: [PATCH] SEC: Limit size of `/ToUnicode` entries (#3646)
|
||
|
||
---
|
||
PyPDF2/_cmap.py | 20 ++++++++++
|
||
tests/test_cmap.py | 91 +++++++++++++++++++++++++++++++++++++++++++++-
|
||
2 files changed, 109 insertions(+), 2 deletions(-)
|
||
|
||
Index: PyPDF2-2.11.1/PyPDF2/_cmap.py
|
||
===================================================================
|
||
--- PyPDF2-2.11.1.orig/PyPDF2/_cmap.py
|
||
+++ PyPDF2-2.11.1/PyPDF2/_cmap.py
|
||
@@ -5,7 +5,7 @@ from typing import Any, Dict, List, Tupl
|
||
|
||
from ._codecs import adobe_glyphs, charset_encoding
|
||
from ._utils import logger_warning
|
||
-from .errors import PdfReadWarning
|
||
+from .errors import PdfReadWarning, LimitReachedError
|
||
from .generic import DecodedStreamObject, DictionaryObject
|
||
|
||
|
||
@@ -262,6 +262,15 @@ def process_cm_line(
|
||
return process_rg, process_char, multiline_rg
|
||
|
||
|
||
+# Usual values should be up to 65_536.
|
||
+MAPPING_DICTIONARY_SIZE_LIMIT = 100_000
|
||
+
|
||
+
|
||
+def _check_mapping_size(size: int) -> None:
|
||
+ if size > MAPPING_DICTIONARY_SIZE_LIMIT:
|
||
+ raise LimitReachedError(f"Maximum /ToUnicode size limit reached: {size} > {MAPPING_DICTIONARY_SIZE_LIMIT}.")
|
||
+
|
||
+
|
||
def parse_bfrange(
|
||
l: bytes,
|
||
map_dict: Dict[Any, Any],
|
||
@@ -273,6 +282,8 @@ def parse_bfrange(
|
||
nbi = max(len(lst[0]), len(lst[1]))
|
||
map_dict[-1] = ceil(nbi / 2)
|
||
fmt = b"%%0%dX" % (map_dict[-1] * 2)
|
||
+ entry_count = len(int_entry)
|
||
+ _check_mapping_size(entry_count)
|
||
if multiline_rg is not None:
|
||
a = multiline_rg[0] # a, b not in the current line
|
||
b = multiline_rg[1]
|
||
@@ -280,6 +291,8 @@ def parse_bfrange(
|
||
if sq == b"]":
|
||
closure_found = True
|
||
break
|
||
+ entry_count += 1
|
||
+ _check_mapping_size(entry_count)
|
||
map_dict[
|
||
unhexlify(fmt % a).decode(
|
||
"charmap" if map_dict[-1] == 1 else "utf-16-be",
|
||
@@ -296,6 +309,8 @@ def parse_bfrange(
|
||
if sq == b"]":
|
||
closure_found = True
|
||
break
|
||
+ entry_count += 1
|
||
+ _check_mapping_size(entry_count)
|
||
map_dict[
|
||
unhexlify(fmt % a).decode(
|
||
"charmap" if map_dict[-1] == 1 else "utf-16-be",
|
||
@@ -308,6 +323,8 @@ def parse_bfrange(
|
||
c = int(lst[2], 16)
|
||
fmt2 = b"%%0%dX" % max(4, len(lst[2]))
|
||
closure_found = True
|
||
+ range_size = max(0, b - a + 1)
|
||
+ _check_mapping_size(entry_count + range_size) # This can be checked beforehand.
|
||
while a <= b:
|
||
map_dict[
|
||
unhexlify(fmt % a).decode(
|
||
@@ -323,6 +340,8 @@ def parse_bfrange(
|
||
|
||
def parse_bfchar(l: bytes, map_dict: Dict[Any, Any], int_entry: List[int]) -> None:
|
||
lst = [x for x in l.split(b" ") if x]
|
||
+ new_count = len(lst) // 2
|
||
+ _check_mapping_size(len(int_entry) + new_count) # This can be checked beforehand.
|
||
map_dict[-1] = len(lst[0]) // 2
|
||
while len(lst) > 1:
|
||
map_to = ""
|
||
Index: PyPDF2-2.11.1/tests/test_cmap.py
|
||
===================================================================
|
||
--- PyPDF2-2.11.1.orig/tests/test_cmap.py
|
||
+++ PyPDF2-2.11.1/tests/test_cmap.py
|
||
@@ -3,7 +3,9 @@ from io import BytesIO
|
||
import pytest
|
||
|
||
from PyPDF2 import PdfReader
|
||
-from PyPDF2.errors import PdfReadWarning
|
||
+from PyPDF2._cmap import parse_bfchar, parse_bfrange
|
||
+from PyPDF2.errors import PdfReadWarning, LimitReachedError
|
||
+from PyPDF2.generic import StreamObject
|
||
|
||
from . import get_pdf_from_url
|
||
|
||
@@ -91,3 +93,89 @@ def test_iss1379():
|
||
name = "02voc.pdf"
|
||
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
|
||
reader.pages[2].extract_text()
|
||
+
|
||
+
|
||
+def test_parse_bfrange__iteration_limit():
|
||
+ writer = PdfWriter()
|
||
+
|
||
+ to_unicode = StreamObject()
|
||
+ to_unicode.set_data(
|
||
+ b"beginbfrange\n"
|
||
+ b"<00000000> <001FFFFF> <00000000>\n"
|
||
+ b"endbfrange\n"
|
||
+ )
|
||
+ font = writer._add_object(DictionaryObject({
|
||
+ NameObject("/Type"): NameObject("/Font"),
|
||
+ NameObject("/Subtype"): NameObject("/Type1"),
|
||
+ NameObject("/BaseFont"): NameObject("/Helvetica"),
|
||
+ NameObject("/ToUnicode"): to_unicode,
|
||
+ }))
|
||
+
|
||
+ page = writer.add_blank_page(width=100, height=100)
|
||
+ page[NameObject("/Resources")] = DictionaryObject({
|
||
+ NameObject("/Font"): DictionaryObject({
|
||
+ NameObject("/F1"): font.indirect_reference,
|
||
+ })
|
||
+ })
|
||
+
|
||
+ # Case without list, exceeding list directly.
|
||
+ with pytest.raises(
|
||
+ expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 2097152 > 100000\.$"
|
||
+ ):
|
||
+ _ = page.extract_text()
|
||
+
|
||
+ # Use a pre-filled dummy list to simulate multiple calls where the upper bound does
|
||
+ # not overflow, but the overall size does. Case without list.
|
||
+ int_entry = [0] * 99_999
|
||
+ map_dict = {}
|
||
+ with pytest.raises(
|
||
+ expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 165535 > 100000\.$"
|
||
+ ):
|
||
+ _ = parse_bfrange(line=b"0000 FFFF 0000", map_dict=map_dict, int_entry=int_entry, multiline_rg=None)
|
||
+ assert map_dict == {-1: 2}
|
||
+
|
||
+ # Exceeding from previous call.
|
||
+ int_entry.append(1)
|
||
+ map_dict = {}
|
||
+ with pytest.raises(
|
||
+ expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 100001 > 100000\.$"
|
||
+ ):
|
||
+ _ = parse_bfrange(line=b"00000000 00000000 00000000", map_dict=map_dict, int_entry=int_entry, multiline_rg=None)
|
||
+ assert map_dict == {-1: 4}
|
||
+
|
||
+ # multiline_rg
|
||
+ int_entry = [0] * 99_995
|
||
+ map_dict = {-1: 1}
|
||
+ with pytest.raises(
|
||
+ expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 100001 > 100000\.$"
|
||
+ ):
|
||
+ _ = parse_bfrange(
|
||
+ line=b"0020 0021 0022 0023 0024 0025 0026 2019",
|
||
+ map_dict=map_dict, int_entry=int_entry, multiline_rg=(32, 251)
|
||
+ )
|
||
+ assert map_dict == {-1: 1, " ": " ", "!": "!", '"': '"', "#": "#", "$": "$"}
|
||
+
|
||
+ # No multiline_rg, but list.
|
||
+ int_entry = [0] * 99_995
|
||
+ map_dict = {}
|
||
+ with pytest.raises(
|
||
+ expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 100001 > 100000\.$"
|
||
+ ):
|
||
+ _ = parse_bfrange(
|
||
+ line=b"01 8A [ FFFD FFFD FFFD FFFF FFAB AAAA BBBB",
|
||
+ map_dict=map_dict, int_entry=int_entry, multiline_rg=None
|
||
+ )
|
||
+ assert map_dict == {-1: 1, "\x01": "<22>", "\x02": "<22>", "\x03": "<22>", "\x04": "\uffff", "\x05": "ᆱ"}
|
||
+
|
||
+
|
||
+def test_parse_bfchar__iteration_limit():
|
||
+ int_entry = [0] * 99_995
|
||
+ map_dict = {}
|
||
+ with pytest.raises(
|
||
+ expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 100002 > 100000\.$"
|
||
+ ):
|
||
+ parse_bfchar(
|
||
+ line=b"0003 0020 0008 0025 0009 0026 000A 0027 000B 0028 000C 0029 000D 002A",
|
||
+ map_dict=map_dict, int_entry=int_entry,
|
||
+ )
|
||
+ assert map_dict == {}
|