Skip to content

Commit

Permalink
Rewriting using Protocols
Browse files Browse the repository at this point in the history
includes also reintroduction of py-pdf#1303 wrongly cancelled in py-pdf#1309
  • Loading branch information
pubpub-zz committed Nov 12, 2022
1 parent deb4ec9 commit e1c3ed3
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 67 deletions.
5 changes: 3 additions & 2 deletions PyPDF2/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
)

from ._cmap import build_char_map, unknown_char_map
from ._protocols import PdfReaderProtocol
from ._utils import (
CompressedTransformationMatrix,
File,
Expand Down Expand Up @@ -291,13 +292,13 @@ class PageObject(DictionaryObject):

def __init__(
self,
pdf: Optional[Any] = None, # PdfReader
pdf: Optional[PdfReaderProtocol] = None,
indirect_ref: Optional[IndirectObject] = None,
) -> None:
from ._reader import PdfReader

DictionaryObject.__init__(self)
self.pdf: Optional[PdfReader] = pdf
self.pdf: Optional[PdfReaderProtocol] = pdf
self.indirect_ref = indirect_ref

def hash_value_data(self) -> bytes:
Expand Down
71 changes: 71 additions & 0 deletions PyPDF2/_protocols.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""Helpers for working with PDF types."""

from io import BufferedReader, BufferedWriter, BytesIO, FileIO
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union

try:
# Python 3.8+: https://peps.python.org/pep-0586
from typing import Literal, Protocol # type: ignore[attr-defined]
except ImportError:
from typing_extensions import Literal, Protocol # type: ignore[misc]

try:
# Python 3.10+: https://www.python.org/dev/peps/pep-0484/
from typing import TypeAlias # type: ignore[attr-defined]
except ImportError:
from typing_extensions import TypeAlias

from ._utils import StrByteType


class PdfObjectProtocol(Protocol):
indirect_ref: Any

def clone(
self,
pdf_dest: Any,
force_duplicate: bool = False,
ignore_fields: Union[Tuple[str, ...], List[str], None] = (),
) -> Any:
...

def _reference_clone(self, clone: Any, pdf_dest: Any) -> Any:
...

def get_object(self) -> Optional["PdfObjectProtocol"]:
...


class PdfReaderProtocol(Protocol): # pragma: no cover
@property
def pdf_header(self) -> str:
...

@property
def strict(self) -> bool:
...

@property
def xref(self) -> Dict[int, Dict[int, Any]]:
...

@property
def pages(self) -> List[Any]:
...

def get_object(self, indirect_reference: Any) -> Optional[PdfObjectProtocol]:
...


class PdfWriterProtocol(Protocol): # pragma: no cover
_objects: List[Any]
_id_translated: Dict[int, Dict[int, int]]

def get_object(self, indirect_reference: Any) -> Optional[PdfObjectProtocol]:
...

def write(
self, stream: Union[Path, StrByteType]
) -> Tuple[bool, Union[FileIO, BytesIO, BufferedReader, BufferedWriter]]:
...
8 changes: 4 additions & 4 deletions PyPDF2/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@

from ._encryption import Encryption
from ._page import PageObject, _VirtualList
from ._protocols import PdfWriterProtocol
from ._reader import PdfReader
from ._security import _alg33, _alg34, _alg35
from ._utils import (
Expand Down Expand Up @@ -104,7 +105,6 @@
StreamObject,
TextStringObject,
TreeObject,
_PdfWriterInterface,
create_string_object,
hex_to_rgb,
)
Expand All @@ -127,17 +127,17 @@
ALL_DOCUMENT_PERMISSIONS = UserAccessPermissions((2**31 - 1) - 3)


class PdfWriter(_PdfWriterInterface):
class PdfWriter:
"""
This class supports writing PDF files out, given pages produced by another
class (typically :class:`PdfReader<PyPDF2.PdfReader>`).
"""

def __init__(self, fileobj: StrByteType = "") -> None:
self._header = b"%PDF-1.3"
self._objects = [] # array of indirect objects
self._objects: List[PdfObject] = [] # array of indirect objects
self._idnum_hash: Dict[bytes, IndirectObject] = {}
self._id_translated = {}
self._id_translated: Dict[int, Dict[int, int]] = {}

# The root of our page tree node.
pages = DictionaryObject()
Expand Down
4 changes: 0 additions & 4 deletions PyPDF2/generic/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
NumberObject,
PdfObject,
TextStringObject,
_PdfWriterInterface,
encode_pdfdocencoding,
)
from ._data_structures import (
Expand Down Expand Up @@ -97,9 +96,6 @@ def createStringObject(
return create_string_object(string, forced_encoding)


_PdfWriterInterface # to prevent error


__all__ = [
# Base types
"BooleanObject",
Expand Down
42 changes: 14 additions & 28 deletions PyPDF2/generic/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast

from .._codecs import _pdfdoc_encoding_rev
from .._protocols import PdfObjectProtocol, PdfWriterProtocol
from .._utils import (
StrByteType,
StreamType,
Expand All @@ -53,27 +54,7 @@
__author_email__ = "biziqe@mathieu.fenniak.net"


class _PdfDocumentInterface:
def get_object(self, ido: Union[int, "IndirectObject"]) -> "PdfObject":
pass


class _PdfWriterInterface(_PdfDocumentInterface):
_objects: List["PdfObject"]
_id_translated: Dict[int, Dict[int, int]]

def write(
self, stream: Union[Path, StrByteType]
) -> Tuple[bool, Union[FileIO, BytesIO, BufferedReader, BufferedWriter]]:
pass


PdfWriter = (
_PdfWriterInterface # local alias to ease annotation reading and auto comments
)


class PdfObject:
class PdfObject(PdfObjectProtocol):
# function for calculating a hash value
hash_func: Callable[..., "hashlib._Hash"] = hashlib.sha1
indirect_ref: Optional["IndirectObject"]
Expand All @@ -92,20 +73,22 @@ def hash_value(self) -> bytes:

def clone(
self,
pdf_dest: PdfWriter,
pdf_dest: PdfWriterProtocol,
force_duplicate: bool = False,
ignore_fields: Union[Tuple[str, ...], List[str], None] = (),
) -> "PdfObject":
"""
clone object into pdf_dest (PdfWriterOnly)
clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter)
force_duplicate: in standard if the object has been already cloned and reference,
the copy is returned; when force_duplicate == True, a new copy is always performed
ignore_fields : list/tuple of Fields names (for dictionaries that will be ignored during cloning (apply also to childs duplication)
in standard, clone function call _reference_clone (see _reference)
"""
raise Exception("clone PdfObject")

def _reference_clone(self, clone: Any, pdf_dest: PdfWriter) -> "PdfObject":
def _reference_clone(
self, clone: Any, pdf_dest: PdfWriterProtocol
) -> PdfObjectProtocol:
"""
reference the object within the _objects of pdf_dest only if indirect_ref attribute exists (which means the objects was already identified in xref/xobjstm)
if object has been already referenced do nothing
Expand All @@ -122,9 +105,11 @@ def _reference_clone(self, clone: Any, pdf_dest: PdfWriter) -> "PdfObject":
if id(ind.pdf) not in pdf_dest._id_translated:
pdf_dest._id_translated[id(ind.pdf)] = {}
if ind.idnum in pdf_dest._id_translated[id(ind.pdf)]:
return pdf_dest.get_object(
obj = pdf_dest.get_object(
pdf_dest._id_translated[id(ind.pdf)][ind.idnum]
)
assert obj is not None
return obj
pdf_dest._id_translated[id(ind.pdf)][ind.idnum] = i
pdf_dest._objects.append(clone)
clone.indirect_ref = IndirectObject(i, 0, pdf_dest)
Expand All @@ -147,7 +132,7 @@ def write_to_stream(
class NullObject(PdfObject):
def clone(
self,
pdf_dest: PdfWriter,
pdf_dest: PdfWriterProtocol,
force_duplicate: bool = False,
ignore_fields: Union[Tuple[str, ...], List[str], None] = (),
) -> "NullObject":
Expand Down Expand Up @@ -187,7 +172,7 @@ def __init__(self, value: Any) -> None:

def clone(
self,
pdf_dest: PdfWriter,
pdf_dest: PdfWriterProtocol,
force_duplicate: bool = False,
ignore_fields: Union[Tuple[str, ...], List[str], None] = (),
) -> "BooleanObject":
Expand Down Expand Up @@ -246,7 +231,7 @@ def __init__(self, idnum: int, generation: int, pdf: Any) -> None: # PdfReader

def clone(
self,
pdf_dest: PdfWriter,
pdf_dest: PdfWriterProtocol,
force_duplicate: bool = False,
ignore_fields: Union[Tuple[str, ...], List[str], None] = (),
) -> "IndirectObject": # PPzz
Expand All @@ -261,6 +246,7 @@ def clone(
dup = pdf_dest.get_object(pdf_dest._id_translated[id(self.pdf)][self.idnum])
else:
dup = self.get_object().clone(pdf_dest, force_duplicate, ignore_fields) # type: ignore
assert dup is not None
assert dup.indirect_ref is not None
return dup.indirect_ref

Expand Down
16 changes: 8 additions & 8 deletions PyPDF2/generic/_data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from io import BytesIO
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast

from .._protocols import PdfObjectProtocol, PdfWriterProtocol
from .._utils import (
WHITESPACES,
StreamType,
Expand Down Expand Up @@ -64,7 +65,6 @@
PdfObject,
TextStringObject,
)
from ._base import _PdfWriterInterface as PdfWriter
from ._utils import read_hex_string_from_stream, read_string_from_stream

logger = logging.getLogger(__name__)
Expand All @@ -75,7 +75,7 @@
class ArrayObject(list, PdfObject):
def clone(
self,
pdf_dest: PdfWriter,
pdf_dest: PdfWriterProtocol,
force_duplicate: bool = False,
ignore_fields: Union[Tuple[str, ...], List[str], None] = (),
) -> "ArrayObject":
Expand Down Expand Up @@ -158,7 +158,7 @@ def readFromStream(
class DictionaryObject(dict, PdfObject):
def clone(
self,
pdf_dest: PdfWriter,
pdf_dest: PdfWriterProtocol,
force_duplicate: bool = False,
ignore_fields: Union[Tuple[str, ...], List[str], None] = (),
) -> "DictionaryObject":
Expand All @@ -181,7 +181,7 @@ def clone(
def _clone(
self,
src: "DictionaryObject",
pdf_dest: PdfWriter,
pdf_dest: PdfWriterProtocol,
force_duplicate: bool,
ignore_fields: Union[Tuple[str, ...], List[str]],
) -> None:
Expand Down Expand Up @@ -499,10 +499,10 @@ def addChild(self, child: Any, pdf: Any) -> None: # pragma: no cover
deprecate_with_replacement("addChild", "add_child")
self.add_child(child, pdf)

def add_child(self, child: Any, pdf: PdfWriter) -> None:
def add_child(self, child: Any, pdf: PdfWriterProtocol) -> None:
self.insert_child(child, None, pdf)

def insert_child(self, child: Any, before: Any, pdf: PdfWriter) -> None:
def insert_child(self, child: Any, before: Any, pdf: PdfWriterProtocol) -> None:
def inc_parent_counter(
parent: Union[None, IndirectObject, TreeObject], n: int
) -> None:
Expand Down Expand Up @@ -671,7 +671,7 @@ def __init__(self) -> None:
def _clone(
self,
src: DictionaryObject,
pdf_dest: PdfWriter,
pdf_dest: PdfWriterProtocol,
force_duplicate: bool,
ignore_fields: Union[Tuple[str, ...], List[str]],
) -> None:
Expand Down Expand Up @@ -887,7 +887,7 @@ def clone(
def _clone(
self,
src: DictionaryObject,
pdf_dest: PdfWriter,
pdf_dest: PdfWriterProtocol,
force_duplicate: bool,
ignore_fields: Union[Tuple[str, ...], List[str]],
) -> None:
Expand Down
21 changes: 0 additions & 21 deletions PyPDF2/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,24 +54,3 @@
"/UseOC",
"/UseAttachments",
]


class PdfReaderProtocol(Protocol): # pragma: no cover
@property
def pdf_header(self) -> str:
...

@property
def strict(self) -> bool:
...

@property
def xref(self) -> Dict[int, Dict[int, Any]]:
...

@property
def pages(self) -> List[Any]:
...

def get_object(self, indirect_reference: Any) -> Optional[Any]:
...

0 comments on commit e1c3ed3

Please sign in to comment.