Skip to content

Commit

Permalink
Merge pull request #1829 from Kozea/save-memory
Browse files Browse the repository at this point in the history
Save memory
  • Loading branch information
liZe authored Mar 26, 2023
2 parents 1c13656 + c7087d3 commit 74823d3
Show file tree
Hide file tree
Showing 6 changed files with 268 additions and 172 deletions.
12 changes: 10 additions & 2 deletions weasyprint/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,11 @@ def render(self, stylesheets=None, presentational_hints=False,
:param font_config: A font configuration handling ``@font-face`` rules.
:type counter_style: :class:`css.counters.CounterStyle`
:param counter_style: A dictionary storing ``@counter-style`` rules.
:param dict image_cache: A dictionary used to cache images.
:param image_cache:
A dictionary used to cache images, or a folder path where images
are temporarily stored.
:type image_cache:
:obj:`dict`, :obj:`str` or :class:`document.DiskCache`
:param bool forms: Whether PDF forms have to be included.
:returns: A :class:`document.Document` object.
Expand Down Expand Up @@ -186,7 +190,11 @@ def write_pdf(self, target=None, stylesheets=None, zoom=1,
:param font_config: A font configuration handling ``@font-face`` rules.
:type counter_style: :class:`css.counters.CounterStyle`
:param counter_style: A dictionary storing ``@counter-style`` rules.
:param dict image_cache: A dictionary used to cache images.
:param image_cache:
A dictionary used to cache images, or a folder path where images
are temporarily stored.
:type image_cache:
:obj:`dict`, :obj:`str` or :class:`document.DiskCache`
:param bytes identifier: A bytestring used as PDF file identifier.
:param str variant: A PDF variant name.
:param str version: A PDF version number.
Expand Down
10 changes: 10 additions & 0 deletions weasyprint/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ def main(argv=None, stdout=None, stdin=None):
multiple times, ``all`` adds all allowed values, ``none`` removes all
previously set values.
.. option:: -c <folder>, --cache-folder <folder>
Store cache on disk instead of memory. The ``folder`` is created if
needed and cleaned after the PDF is generated.
.. option:: -v, --verbose
Show warnings and information messages.
Expand Down Expand Up @@ -156,6 +161,10 @@ def main(argv=None, stdout=None, stdin=None):
'-O', '--optimize-size', action='append',
help='optimize output size for specified features',
choices=('images', 'fonts', 'all', 'none'), default=['fonts'])
parser.add_argument(
'-c', '--cache-folder',
help='Store cache on disk instead of memory. The ``folder`` is '
'created if needed and cleaned after the PDF is generated.')
parser.add_argument(
'-v', '--verbose', action='store_true',
help='show warnings and information messages')
Expand Down Expand Up @@ -203,6 +212,7 @@ def main(argv=None, stdout=None, stdin=None):
'version': args.pdf_version,
'forms': args.pdf_forms,
'custom_metadata': args.custom_metadata,
'image_cache': args.cache_folder,
}

# Default to logging to stderr.
Expand Down
70 changes: 59 additions & 11 deletions weasyprint/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

import functools
import io
import shutil
from hashlib import md5
from pathlib import Path

from . import CSS
from .anchors import gather_anchors, make_page_bookmark_tree
Expand Down Expand Up @@ -159,6 +160,52 @@ def __init__(self, title=None, authors=None, description=None,
self.custom = custom or {}


class DiskCache:
"""Dict-like storing images content on disk.
Bytestrings values are stored on disk. Other Python objects (i.e.
RasterImage instances) are still stored in memory, but are much more
lightweight.
"""
def __init__(self, folder):
self._path = Path(folder)
self._path.mkdir(parents=True, exist_ok=True)
self._memory_cache = {}
self._disk_paths = set()

def _path_from_key(self, key):
return self._path / md5(key.encode()).hexdigest()

def __getitem__(self, key):
if key in self._memory_cache:
return self._memory_cache[key]
else:
return self._path_from_key(key).read_bytes()

def __setitem__(self, key, value):
if isinstance(value, bytes):
path = self._path_from_key(key)
self._disk_paths.add(path)
path.write_bytes(value)
else:
self._memory_cache[key] = value

def __contains__(self, key):
return (
key in self._memory_cache or
self._path_from_key(key).exists())

def __del__(self):
try:
for path in self._disk_paths:
path.unlink(missing_ok=True)
self._path.rmdir()
except Exception:
# Silently ignore errors while clearing cache
pass


class Document:
"""A rendered document ready to be painted in a pydyf stream.
Expand All @@ -181,7 +228,10 @@ def _build_layout_context(cls, html, stylesheets, presentational_hints,
target_collector = TargetCollector()
page_rules = []
user_stylesheets = []
image_cache = {} if image_cache is None else image_cache
if image_cache is None:
image_cache = {}
elif not isinstance(image_cache, DiskCache):
image_cache = DiskCache(image_cache)
for css in stylesheets or []:
if not hasattr(css, 'matcher'):
css = CSS(
Expand Down Expand Up @@ -364,15 +414,13 @@ def write_pdf(self, target=None, zoom=1, attachments=None, finisher=None,
if finisher:
finisher(self, pdf)

output = io.BytesIO()
pdf.write(output, version=pdf.version, identifier=identifier)

if target is None:
output = io.BytesIO()
pdf.write(output, version=pdf.version, identifier=identifier)
return output.getvalue()

if hasattr(target, 'write'):
pdf.write(target, version=pdf.version, identifier=identifier)
else:
output.seek(0)
if hasattr(target, 'write'):
shutil.copyfileobj(output, target)
else:
with open(target, 'wb') as fd:
shutil.copyfileobj(output, fd)
with open(target, 'wb') as fd:
pdf.write(fd, version=pdf.version, identifier=identifier)
2 changes: 1 addition & 1 deletion weasyprint/draw.py
Original file line number Diff line number Diff line change
Expand Up @@ -1199,7 +1199,7 @@ def draw_first_line(stream, textbox, text_overflow, block_ellipsis, x, y,
pillow_image = Image.open(BytesIO(png_data))
image_id = f'{font.hash}{glyph}'
image = RasterImage(
pillow_image, image_id, optimize_size=())
pillow_image, image_id, optimize_size=(), cache={})
d = font.widths[glyph] / 1000
a = pillow_image.width / pillow_image.height * d
pango.pango_font_get_glyph_extents(
Expand Down
Loading

0 comments on commit 74823d3

Please sign in to comment.