From 0fbd95938724ad2d72688d4112207c0590f0483f Mon Sep 17 00:00:00 2001 From: rampageservices <20424586+rampageservices@users.noreply.github.com> Date: Sat, 21 Feb 2026 23:17:20 +0800 Subject: [PATCH] BUG: Prevent infinite loop from circular xref /Prev references Malformed PDFs can contain circular /Prev references in the xref chain (e.g., xref A -> /Prev -> xref B -> /Prev -> xref A). This causes _read_xref_tables_and_trailers() to loop forever, spamming "Overwriting cache for N M" warnings on every iteration as the same objects are re-parsed and re-cached indefinitely. Fix: Track visited xref offsets in a set. If a startxref value has already been visited, log a warning and break the loop. Closes #3654 --- PyPDF2/_reader.py | 9 +++++++++ 1 file changed, 9 insertions(+) Index: PyPDF2-2.11.1/PyPDF2/_reader.py =================================================================== --- PyPDF2-2.11.1.orig/PyPDF2/_reader.py +++ PyPDF2-2.11.1/PyPDF2/_reader.py @@ -1564,7 +1564,16 @@ class PdfReader: self.xref_free_entry: Dict[int, Dict[Any, Any]] = {} self.xref_objStm: Dict[int, Tuple[Any, Any]] = {} self.trailer = DictionaryObject() + visited_xref_offsets: set[int] = set() while startxref is not None: + # Detect circular /Prev references in the xref chain + if startxref in visited_xref_offsets: + logger_warning( + f"Circular xref chain detected at offset {startxref}, stopping", + __name__, + ) + break + visited_xref_offsets.add(startxref) # load the xref table stream.seek(startxref, 0) x = stream.read(1)