Skip to content

Commit

Permalink
ENH: Improve/rewrite PDF permission retrieval
Browse files Browse the repository at this point in the history
  • Loading branch information
stefan6419846 committed Jan 8, 2024
1 parent cfd8712 commit 553ed6e
Show file tree
Hide file tree
Showing 5 changed files with 184 additions and 7 deletions.
14 changes: 13 additions & 1 deletion pypdf/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
StrByteType,
StreamType,
b_,
deprecate_with_replacement,
logger_warning,
parse_iso8824_date,
read_non_whitespace,
Expand Down Expand Up @@ -1811,12 +1812,16 @@ def decrypt(self, password: Union[str, bytes]) -> PasswordType:

def decode_permissions(self, permissions_code: int) -> Dict[str, bool]:
"""Take the permissions as an integer, return the allowed access."""
deprecate_with_replacement(
old_name="decode_permissions", new_name="user_access_permissions", removed_in="5.0.0"
)

permissions_mapping = {
"print": UserAccessPermissions.PRINT,
"modify": UserAccessPermissions.MODIFY,
"copy": UserAccessPermissions.EXTRACT,
"annotations": UserAccessPermissions.ADD_OR_MODIFY,
"forms": UserAccessPermissions.R7,
"forms": UserAccessPermissions.FILL_FORM_FIELDS,
"accessability": UserAccessPermissions.EXTRACT_TEXT_AND_GRAPHICS,
"assemble": UserAccessPermissions.ASSEMBLE_DOC,
"print_high_quality": UserAccessPermissions.PRINT_TO_REPRESENTATION,
Expand All @@ -1827,6 +1832,13 @@ def decode_permissions(self, permissions_code: int) -> Dict[str, bool]:
for key, flag in permissions_mapping.items()
}

@property
def user_access_permissions(self) -> Optional[UserAccessPermissions]:
"""Get the user access permissions for encrypted documents. Returns None if not encrypted."""
if self._encryption is None:
return None
return UserAccessPermissions(self._encryption.P)

@property
def is_encrypted(self) -> bool:
"""
Expand Down
2 changes: 1 addition & 1 deletion pypdf/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@
)

OPTIONAL_READ_WRITE_FIELD = FieldFlag(0)
ALL_DOCUMENT_PERMISSIONS = UserAccessPermissions((2**31 - 1) - 3)
ALL_DOCUMENT_PERMISSIONS = UserAccessPermissions.all()


class ObjectDeletionFlag(enum.IntFlag):
Expand Down
41 changes: 41 additions & 0 deletions pypdf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,47 @@ class UserAccessPermissions(IntFlag):
R31 = 2**30
R32 = 2**31

@classmethod
def _is_reserved(cls, name: str) -> bool:
"""Check if the given name corresponds to a reserved flag entry."""
return name.startswith("R") and name[1:].isdigit()

@classmethod
def _defaults_to_one(cls, name: str) -> bool:
"""Check if the given reserved name defaults to 1 = active."""
return name not in {"R1", "R2"}

def to_dict(self) -> Dict[str, bool]:
"""Convert the given flag value to a corresponding verbose name mapping."""
result: Dict[str, bool] = {}
for name, flag in UserAccessPermissions.__members__.items():
if UserAccessPermissions._is_reserved(name):
continue
result[name.lower()] = (self & flag) == flag
return result

@classmethod
def from_dict(cls, value: Dict[str, bool]) -> "UserAccessPermissions":
"""Convert the verbose name mapping to the corresponding flag value."""
value_copy = value.copy()
result = cls(0)
for name, flag in cls.__members__.items():
if cls._is_reserved(name):
# Reserved names have a required value. Use it.
if cls._defaults_to_one(name):
result |= flag
continue
is_active = value_copy.pop(name.lower(), False)
if is_active:
result |= flag
if value_copy:
raise ValueError(f"Unknown dictionary keys: {value_copy!r}")
return result

@classmethod
def all(cls) -> "UserAccessPermissions":
return cls((2**32 - 1) - cls.R1 - cls.R2)


class Ressources:
"""TABLE 3.30 Entries in a resource dictionary."""
Expand Down
84 changes: 82 additions & 2 deletions tests/test_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@
import re
from typing import Callable

from pypdf.constants import PDF_KEYS, GraphicsStateParameters
import pytest

from pypdf.constants import PDF_KEYS, GraphicsStateParameters, UserAccessPermissions


def test_slash_prefix():
"""
Naming conventions of PDF_KEYS (constant names) are followed.
This test function validates if PDF key names follow the required pattern:
- Starts with a slash '/'
- Starts with a slash "/"
- Followed by an uppercase letter
- Contains alphanumeric characters (letters and digits)
- The attribute name should be a case-insensitive match, with underscores removed
Expand All @@ -34,3 +36,81 @@ def test_slash_prefix():
if cls == GraphicsStateParameters and attr in ["ca", "op"]:
continue
assert pattern.match(constant_value)


def test_user_access_permissions__dict_handling():
# Value is mix of configurable and reserved bits.
# Reserved bits should not be part of the dictionary.
as_dict = UserAccessPermissions(512 + 64 + 8).to_dict()
assert as_dict == {
"add_or_modify": False,
"assemble_doc": False,
"extract": False,
"extract_text_and_graphics": True,
"fill_form_fields": False,
"modify": True,
"print": False,
"print_to_representation": False,
}

# Convert the dictionary back to an integer.
# This should add the reserved bits automatically.
permissions = UserAccessPermissions.from_dict(as_dict)
assert permissions == 4294963912

# Roundtrip for valid dictionary.
data = {
"add_or_modify": True,
"assemble_doc": False,
"extract": False,
"extract_text_and_graphics": True,
"fill_form_fields": False,
"modify": True,
"print": False,
"print_to_representation": True,
}
assert UserAccessPermissions.from_dict(data).to_dict() == data

# Empty inputs.
assert UserAccessPermissions.from_dict({}) == 4294963392 # Reserved bits.
assert UserAccessPermissions(0).to_dict() == {
"add_or_modify": False,
"assemble_doc": False,
"extract": False,
"extract_text_and_graphics": False,
"fill_form_fields": False,
"modify": False,
"print": False,
"print_to_representation": False,
}

# Unknown dictionary keys.
data = {
"add_or_modify": True,
"key1": False,
"key2": True,
}
unknown = {
"key1": False,
"key2": True,
}
with pytest.raises(
ValueError,
match=f'Unknown dictionary keys: {unknown!r}'
):
UserAccessPermissions.from_dict(data)


def test_user_access_permissions__all():
all_permissions = UserAccessPermissions.all()
all_int = int(all_permissions)
all_string = bin(all_permissions)

assert all_string.startswith("0b")
assert len(all_string[2:]) == 32 # 32-bit integer

assert all_int & UserAccessPermissions.R1 == 0
assert all_int & UserAccessPermissions.R2 == 0
assert all_int & UserAccessPermissions.PRINT == UserAccessPermissions.PRINT
assert all_int & UserAccessPermissions.R7 == UserAccessPermissions.R7
assert all_int & UserAccessPermissions.R31 == UserAccessPermissions.R31
50 changes: 47 additions & 3 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@

import pytest

from pypdf import PdfReader
from pypdf import PdfReader, PdfWriter
from pypdf._crypt_providers import crypt_provider
from pypdf._reader import convert_to_int
from pypdf.constants import ImageAttributes as IA
from pypdf.constants import PageAttributes as PG
from pypdf.constants import UserAccessPermissions as UAP
from pypdf.errors import (
EmptyFileError,
FileNotDecryptedError,
Expand Down Expand Up @@ -730,11 +731,54 @@ def test_decode_permissions():

print_ = base.copy()
print_["print"] = True
assert reader.decode_permissions(4) == print_
with pytest.raises(
DeprecationWarning,
match="decode_permissions is deprecated and will be removed in pypdf 5.0.0. Use user_access_permissions instead",
):
assert reader.decode_permissions(4) == print_

modify = base.copy()
modify["modify"] = True
assert reader.decode_permissions(8) == modify
with pytest.raises(
DeprecationWarning,
match="decode_permissions is deprecated and will be removed in pypdf 5.0.0. Use user_access_permissions instead",
):
assert reader.decode_permissions(8) == modify


@pytest.mark.skipif(not HAS_AES, reason="No AES implementation")
def test_user_access_permissions():
# Not encrypted.
reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
assert reader.user_access_permissions is None

# Encrypted.
reader = PdfReader(RESOURCE_ROOT / "encryption" / "r6-owner-password.pdf")
assert reader.user_access_permissions == UAP.all()

# Custom writer permissions.
writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf")
writer.encrypt(
user_password="",
owner_password="abc",
permissions_flag=UAP.PRINT | UAP.FILL_FORM_FIELDS,
)
output = BytesIO()
writer.write(output)
reader = PdfReader(output)
assert reader.user_access_permissions == (UAP.PRINT | UAP.FILL_FORM_FIELDS)

# All writer permissions.
writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf")
writer.encrypt(
user_password="",
owner_password="abc",
permissions_flag=UAP.all(),
)
output = BytesIO()
writer.write(output)
reader = PdfReader(output)
assert reader.user_access_permissions == UAP.all()


def test_pages_attribute():
Expand Down

0 comments on commit 553ed6e

Please sign in to comment.