From bb3a69030fde7da545229438ff327b8c971cef49 Mon Sep 17 00:00:00 2001 From: Stefan <96178532+stefan6419846@users.noreply.github.com> Date: Mon, 11 Aug 2025 16:10:25 +0200 Subject: [PATCH] SEC: Limit decompressed size for FlateDecode filter (#3430) Closes #3429. --- PyPDF2/_reader.py | 10 ++++- PyPDF2/errors.py | 4 ++ PyPDF2/filters.py | 89 +++++++++++++++++++++++++--------------- tests/example_files.yaml | 2 + tests/test_filters.py | 41 ++++++++++++++---- tests/test_reader.py | 15 +++++++ 6 files changed, 119 insertions(+), 42 deletions(-) Index: PyPDF2-2.11.1/PyPDF2/_reader.py =================================================================== --- PyPDF2-2.11.1.orig/PyPDF2/_reader.py +++ PyPDF2-2.11.1/PyPDF2/_reader.py @@ -1681,15 +1681,20 @@ class PdfReader: xrefstream = cast(ContentStream, read_object(stream, self)) assert cast(str, xrefstream["/Type"]) == "/XRef" self.cache_indirect_object(generation, idnum, xrefstream) - stream_data = BytesIO(b_(xrefstream.get_data())) + # Index pairs specify the subsections in the dictionary. If # none create one subsection that spans everything. - idx_pairs = xrefstream.get("/Index", [0, xrefstream.get("/Size")]) + if "/Size" not in xrefstream: + # According to table 17 of the PDF 2.0 specification, this key is required. + raise PdfReadError(f"Size missing from XRef stream {xrefstream!r}!") + idx_pairs = xrefstream.get("/Index", [0, xrefstream["/Size"]]) entry_sizes = cast(Dict[Any, Any], xrefstream.get("/W")) assert len(entry_sizes) >= 3 if self.strict and len(entry_sizes) > 3: raise PdfReadError(f"Too many entry sizes: {entry_sizes}") + stream_data = BytesIO(xrefstream.get_data()) + def get_entry(i: int) -> Union[int, Tuple[int, ...]]: # Reads the correct number of bytes for each entry. See the # discussion of the W parameter in PDF spec table 17. Index: PyPDF2-2.11.1/PyPDF2/errors.py =================================================================== --- PyPDF2-2.11.1.orig/PyPDF2/errors.py +++ PyPDF2-2.11.1/PyPDF2/errors.py @@ -46,3 +46,7 @@ class EmptyFileError(PdfReadError): STREAM_TRUNCATED_PREMATURELY = "Stream has ended unexpectedly" + + +class LimitReachedError(PyPdfError): + """Raised when a limit is reached.""" Index: PyPDF2-2.11.1/PyPDF2/filters.py =================================================================== --- PyPDF2-2.11.1.orig/PyPDF2/filters.py +++ PyPDF2-2.11.1/PyPDF2/filters.py @@ -58,12 +58,12 @@ from .constants import GraphicsStatePara from .constants import ImageAttributes as IA from .constants import LzwFilterParameters as LZW from .constants import StreamAttributes as SA -from .errors import PdfReadError, PdfStreamError +from .errors import LimitReachedError, PdfReadError, PdfStreamError def decompress(data: bytes) -> bytes: try: - return zlib.decompress(data) + return _decompress_with_limit(data) except zlib.error: d = zlib.decompressobj(zlib.MAX_WBITS | 32) result_str = b"" @@ -74,6 +74,18 @@ def decompress(data: bytes) -> bytes: pass return result_str +ZLIB_MAX_OUTPUT_LENGTH = 75_000_000 + + +def _decompress_with_limit(data: bytes) -> bytes: + decompressor = zlib.decompressobj() + result = decompressor.decompress(data, max_length=ZLIB_MAX_OUTPUT_LENGTH) + if decompressor.unconsumed_tail: + raise LimitReachedError( + f"Limit reached while decompressing. {len(decompressor.unconsumed_tail)} bytes remaining." + ) + return result + class FlateDecode: @staticmethod Index: PyPDF2-2.11.1/tests/test_filters.py =================================================================== --- PyPDF2-2.11.1.orig/tests/test_filters.py +++ PyPDF2-2.11.1/tests/test_filters.py @@ -1,4 +1,5 @@ import string +import zlib from io import BytesIO from itertools import product as cartesian_product from unittest.mock import patch @@ -6,13 +7,14 @@ from unittest.mock import patch import pytest from PyPDF2 import PdfReader -from PyPDF2.errors import PdfReadError, PdfStreamError +from PyPDF2.errors import LimitReachedError, PdfReadError, PdfStreamError from PyPDF2.filters import ( ASCII85Decode, ASCIIHexDecode, CCITParameters, CCITTFaxDecode, FlateDecode, + decompress, ) from PyPDF2.generic import ArrayObject, DictionaryObject, NumberObject