Skip to content

Commit

Permalink
fix: restore previous API
Browse files Browse the repository at this point in the history
  • Loading branch information
dhdaines committed Aug 2, 2024
1 parent d7e7658 commit 5f5bffd
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 30 deletions.
39 changes: 39 additions & 0 deletions pdfminer/ascii85.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""Python implementation of ASCII85/ASCIIHex decoder (Adobe version)."""

import re
from base64 import a85decode
from binascii import unhexlify


def ascii85decode(data: bytes) -> bytes:
"""In ASCII85 encoding, every four bytes are encoded with five ASCII
letters, using 85 different types of characters (as 256**4 < 85**5).
When the length of the original bytes is not a multiple of 4, a special
rule is used for round up.
The Adobe's ASCII85 implementation is slightly different from
its original in handling the last characters.
"""
return a85decode(data, adobe=True)


bws_re = re.compile(rb"\s")


def asciihexdecode(data: bytes) -> bytes:
"""ASCIIHexDecode filter: PDFReference v1.4 section 3.3.1
For each pair of ASCII hexadecimal digits (0-9 and A-F or a-f), the
ASCIIHexDecode filter produces one byte of binary data. All white-space
characters are ignored. A right angle bracket character (>) indicates
EOD. Any other characters will cause an error. If the filter encounters
the EOD marker after reading an odd number of hexadecimal digits, it
will behave as if a 0 followed the last digit.
"""
data = bws_re.sub(b"", data)
idx = data.find(b">")
if idx != -1:
data = data[:idx]
if idx % 2 == 1:
data += b"0"
return unhexlify(data)
27 changes: 2 additions & 25 deletions pdfminer/pdftypes.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import io
import logging
import re
import zlib
from base64 import a85decode
from binascii import unhexlify
from typing import (
TYPE_CHECKING,
Any,
Expand All @@ -19,6 +16,7 @@
from warnings import warn

from pdfminer import pdfexceptions, settings
from pdfminer.ascii85 import ascii85decode, asciihexdecode
from pdfminer.ccitt import ccittfaxdecode
from pdfminer.lzw import lzwdecode
from pdfminer.psparser import LIT, PSObject
Expand Down Expand Up @@ -244,27 +242,6 @@ def decompress_corrupted(data: bytes) -> bytes:
return result_str


bws_re = re.compile(rb"\s")


def asciihexdecode(data: bytes) -> bytes:
"""ASCIIHexDecode filter: PDFReference v1.4 section 3.3.1
For each pair of ASCII hexadecimal digits (0-9 and A-F or a-f), the
ASCIIHexDecode filter produces one byte of binary data. All white-space
characters are ignored. A right angle bracket character (>) indicates
EOD. Any other characters will cause an error. If the filter encounters
the EOD marker after reading an odd number of hexadecimal digits, it
will behave as if a 0 followed the last digit.
"""
data = bws_re.sub(b"", data)
idx = data.find(b">")
if idx != -1:
data = data[:idx]
if idx % 2 == 1:
data += b"0"
return unhexlify(data)


class PDFStream(PDFObject):
def __init__(
self,
Expand Down Expand Up @@ -366,7 +343,7 @@ def decode(self) -> None:
elif f in LITERALS_LZW_DECODE:
data = lzwdecode(data)
elif f in LITERALS_ASCII85_DECODE:
data = a85decode(data, adobe=True)
data = ascii85decode(data)
elif f in LITERALS_ASCIIHEX_DECODE:
data = asciihexdecode(data)
elif f in LITERALS_RUNLENGTH_DECODE:
Expand Down
9 changes: 4 additions & 5 deletions tests/test_pdfminer_crypto.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
"""Test of various compression/encoding modules (previously in doctests)"""

import binascii
from base64 import a85decode

from pdfminer.arcfour import Arcfour
from pdfminer.ascii85 import ascii85decode, asciihexdecode
from pdfminer.lzw import lzwdecode
from pdfminer.pdftypes import asciihexdecode
from pdfminer.runlength import rldecode


Expand All @@ -19,13 +18,13 @@ def dehex(b):
return binascii.unhexlify(b)


class TestAsciiHex:
class TestAscii85:
def test_ascii85decode(self):
"""The sample string is taken from:
http://en.wikipedia.org/w/index.php?title=Ascii85
"""
assert a85decode(b"9jqo^BlbD-BleB1DJ+*+F(f,q") == b"Man is distinguished"
assert a85decode(b"E,9)oF*2M7/c~>", adobe=True) == b"pleasure."
assert ascii85decode(b"9jqo^BlbD-BleB1DJ+*+F(f,q") == b"Man is distinguished"
assert ascii85decode(b"E,9)oF*2M7/c~>") == b"pleasure."

def test_asciihexdecode(self):
assert asciihexdecode(b"61 62 2e6364 65") == b"ab.cde"
Expand Down

0 comments on commit 5f5bffd

Please sign in to comment.