From 734f9048dc1ecea04ccfe767cbbbcbfd517ab5eb Mon Sep 17 00:00:00 2001 From: Timo Ramsauer Date: Mon, 24 Apr 2023 18:36:32 +0200 Subject: [PATCH 1/3] Removed duplicate compression --- weasyprint/pdf/anchors.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/weasyprint/pdf/anchors.py b/weasyprint/pdf/anchors.py index 86e1aca2e..6fd74f8b4 100644 --- a/weasyprint/pdf/anchors.py +++ b/weasyprint/pdf/anchors.py @@ -261,23 +261,18 @@ def write_pdf_attachment(pdf, attachment, url_fetcher): uncompressed_length = 0 stream = b'' md5 = hashlib.md5() - compress = zlib.compressobj() for data in iter(lambda: source.read(4096), b''): uncompressed_length += len(data) md5.update(data) - compressed = compress.compress(data) - stream += compressed - compressed = compress.flush(zlib.Z_FINISH) - stream += compressed + stream += data file_extra = pydyf.Dictionary({ 'Type': '/EmbeddedFile', - 'Filter': '/FlateDecode', 'Params': pydyf.Dictionary({ 'CheckSum': f'<{md5.hexdigest()}>', 'Size': uncompressed_length, }) }) - file_stream = pydyf.Stream([stream], file_extra, compress) + file_stream = pydyf.Stream([stream], file_extra, compress=True) pdf.add_object(file_stream) except URLFetchingError as exception: From df25267a3bfcb2bae0bb0cabeb027f0964efdfbe Mon Sep 17 00:00:00 2001 From: Timo Ramsauer Date: Mon, 24 Apr 2023 18:38:22 +0200 Subject: [PATCH 2/3] Removed zlib import --- weasyprint/pdf/anchors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/weasyprint/pdf/anchors.py b/weasyprint/pdf/anchors.py index 6fd74f8b4..e365b4781 100644 --- a/weasyprint/pdf/anchors.py +++ b/weasyprint/pdf/anchors.py @@ -2,7 +2,6 @@ import hashlib import io -import zlib from os.path import basename from urllib.parse import unquote, urlsplit From 1db2d6d7c70674a66d2600a766a6003bff39592f Mon Sep 17 00:00:00 2001 From: Timo Ramsauer Date: Wed, 26 Apr 2023 07:19:41 +0200 Subject: [PATCH 3/3] Compress attachments based on uncompressed_pdf --- weasyprint/pdf/__init__.py | 2 +- weasyprint/pdf/anchors.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/weasyprint/pdf/__init__.py b/weasyprint/pdf/__init__.py index 22a08b743..6bfb3ddd1 100644 --- a/weasyprint/pdf/__init__.py +++ b/weasyprint/pdf/__init__.py @@ -242,7 +242,7 @@ def generate_pdf(document, target, zoom, **options): pdf_attachments = [] for attachment in attachments: pdf_attachment = write_pdf_attachment( - pdf, attachment, document.url_fetcher) + pdf, attachment, document.url_fetcher, compress) if pdf_attachment is not None: pdf_attachments.append(pdf_attachment) if pdf_attachments: diff --git a/weasyprint/pdf/anchors.py b/weasyprint/pdf/anchors.py index e365b4781..1760cc16b 100644 --- a/weasyprint/pdf/anchors.py +++ b/weasyprint/pdf/anchors.py @@ -213,7 +213,7 @@ def add_annotations(links, matrix, document, pdf, page, annot_files, compress): # above about multiple regions won't always be correct, because # two links might have the same href, but different titles. annot_files[annot_target] = write_pdf_attachment( - pdf, (annot_target, None), document.url_fetcher) + pdf, (annot_target, None), document.url_fetcher, compress) annot_file = annot_files[annot_target] if annot_file is None: continue @@ -241,7 +241,7 @@ def add_annotations(links, matrix, document, pdf, page, annot_files, compress): page['Annots'].append(annot.reference) -def write_pdf_attachment(pdf, attachment, url_fetcher): +def write_pdf_attachment(pdf, attachment, url_fetcher, compress): """Write an attachment to the PDF stream.""" # Attachments from document links like or can only be URLs. # They're passed in as tuples @@ -271,7 +271,7 @@ def write_pdf_attachment(pdf, attachment, url_fetcher): 'Size': uncompressed_length, }) }) - file_stream = pydyf.Stream([stream], file_extra, compress=True) + file_stream = pydyf.Stream([stream], file_extra, compress=compress) pdf.add_object(file_stream) except URLFetchingError as exception: