forked from pool/python-PyPDF2
42 lines
1.7 KiB
Diff
42 lines
1.7 KiB
Diff
|
|
From 0fbd95938724ad2d72688d4112207c0590f0483f Mon Sep 17 00:00:00 2001
|
||
|
|
From: rampageservices <20424586+rampageservices@users.noreply.github.com>
|
||
|
|
Date: Sat, 21 Feb 2026 23:17:20 +0800
|
||
|
|
Subject: [PATCH] BUG: Prevent infinite loop from circular xref /Prev
|
||
|
|
references
|
||
|
|
|
||
|
|
Malformed PDFs can contain circular /Prev references in the xref
|
||
|
|
chain (e.g., xref A -> /Prev -> xref B -> /Prev -> xref A).
|
||
|
|
This causes _read_xref_tables_and_trailers() to loop forever,
|
||
|
|
spamming "Overwriting cache for N M" warnings on every iteration
|
||
|
|
as the same objects are re-parsed and re-cached indefinitely.
|
||
|
|
|
||
|
|
Fix: Track visited xref offsets in a set. If a startxref value
|
||
|
|
has already been visited, log a warning and break the loop.
|
||
|
|
|
||
|
|
Closes #3654
|
||
|
|
---
|
||
|
|
PyPDF2/_reader.py | 9 +++++++++
|
||
|
|
1 file changed, 9 insertions(+)
|
||
|
|
|
||
|
|
Index: PyPDF2-2.11.1/PyPDF2/_reader.py
|
||
|
|
===================================================================
|
||
|
|
--- PyPDF2-2.11.1.orig/PyPDF2/_reader.py
|
||
|
|
+++ PyPDF2-2.11.1/PyPDF2/_reader.py
|
||
|
|
@@ -1564,7 +1564,16 @@ class PdfReader:
|
||
|
|
self.xref_free_entry: Dict[int, Dict[Any, Any]] = {}
|
||
|
|
self.xref_objStm: Dict[int, Tuple[Any, Any]] = {}
|
||
|
|
self.trailer = DictionaryObject()
|
||
|
|
+ visited_xref_offsets: set[int] = set()
|
||
|
|
while startxref is not None:
|
||
|
|
+ # Detect circular /Prev references in the xref chain
|
||
|
|
+ if startxref in visited_xref_offsets:
|
||
|
|
+ logger_warning(
|
||
|
|
+ f"Circular xref chain detected at offset {startxref}, stopping",
|
||
|
|
+ __name__,
|
||
|
|
+ )
|
||
|
|
+ break
|
||
|
|
+ visited_xref_offsets.add(startxref)
|
||
|
|
# load the xref table
|
||
|
|
stream.seek(startxref, 0)
|
||
|
|
x = stream.read(1)
|