Skip to content

Commit

Permalink
FIX: robustify stream extraction (#2526)
Browse files Browse the repository at this point in the history
Fixes #2523.

Situation met:

* Length field is not correct
* xref may contains not ordered stream data
* xref contains some free entries (i.e. does not contain stream offset)
  • Loading branch information
pubpub-zz authored Mar 18, 2024
1 parent 5a4c35e commit bbbc9dd
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 10 deletions.
3 changes: 2 additions & 1 deletion pypdf/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1612,7 +1612,8 @@ def _read_standard_xref_table(self, stream: StreamType) -> None:
# any existing key is already set correctly.
pass
else:
self.xref[generation][num] = offset
if entry_type_b == b"n":
self.xref[generation][num] = offset
try:
self.xref_free_entry[generation][num] = entry_type_b == b"f"
except Exception:
Expand Down
2 changes: 1 addition & 1 deletion pypdf/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2809,7 +2809,7 @@ def _get_filtered_outline(
if node is None:
node = NullObject()
node = node.get_object()
if isinstance(node, NullObject):
if node is None or isinstance(node, NullObject):
node = DictionaryObject()
if node.get("/Type", "") == "/Outlines" or "/Title" not in node:
node = node.get("/First", None)
Expand Down
16 changes: 8 additions & 8 deletions pypdf/generic/_data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,14 +436,14 @@ def read_from_stream(
def get_next_obj_pos(
p: int, p1: int, rem_gens: List[int], pdf: PdfReaderProtocol
) -> int:
loc = pdf.xref[rem_gens[0]]
for o in loc:
if p1 > loc[o] and p < loc[o]:
p1 = loc[o]
if len(rem_gens) == 1:
return p1
else:
return get_next_obj_pos(p, p1, rem_gens[1:], pdf)
out = p1
for gen in rem_gens:
loc = pdf.xref[gen]
try:
out = min(out, min([x for x in loc.values() if p < x <= p1]))
except ValueError:
pass
return out

def read_unsized_from_stream(
stream: StreamType, pdf: PdfReaderProtocol
Expand Down

0 comments on commit bbbc9dd

Please sign in to comment.