Skip to content

Commit

Permalink
Merge pull request #1796 from Kozea/forms
Browse files Browse the repository at this point in the history
Support PDF forms
  • Loading branch information
liZe authored Feb 3, 2023
2 parents b446f6e + e655a78 commit 4978837
Show file tree
Hide file tree
Showing 26 changed files with 627 additions and 400 deletions.
23 changes: 23 additions & 0 deletions docs/api_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,10 @@ check that they follow the rules listed by the related specifications. The main
constraint is to use a correct HTML structure to avoid inconsistencies in the
PDF structure.

Generated PDFs can include forms, using the ``appearance: auto`` CSS property
or the ``--pdf-forms`` CLI option. Text inputs, text areas and check boxes are
supported.


Fonts
~~~~~
Expand Down Expand Up @@ -743,3 +747,22 @@ All the ``flex-*``, ``align-*``, ``justify-*`` and ``order`` properties are
supported. The ``flex`` and ``flex-flow`` shorthands are supported too.

.. _CSS Flexible Box Layout Module Level 1: https://www.w3.org/TR/css-flexbox-1/

CSS Basic User Interface Module Level 3/4
+++++++++++++++++++++++++++++++++++++++++

The `CSS Basic User Interface Module Level 3/4`_ "enables authors to style user
interface related properties and values."

The ``outline-width``, ``outline-style``, ``outline-color`` properties and the
``outline`` shorthand are supported. The ``outline-offset`` property is **not**
supported.

The ``resize``, ``cursor``, ``caret-*`` and ``nav-*`` properties are **not**
supported.

The ``appearance`` property is supported. When set to ``auto``, it displays
form fields as PDF form fields (supported for text inputs, check boxes and
text areas only).

The ``accent-color`` property is **not** supported.
26 changes: 25 additions & 1 deletion tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import pytest
from PIL import Image
from weasyprint import CSS, HTML, __main__, default_url_fetcher
from weasyprint.links import resolve_links
from weasyprint.pdf.anchors import resolve_links
from weasyprint.urls import path2url

from .draw import parse_pixels
Expand Down Expand Up @@ -464,6 +464,30 @@ def test_partial_pdf_custom_metadata():
assert b'value' in stdout


@pytest.mark.parametrize('html, field', (
(b'<input>', b'/Tx'),
(b'<input type="checkbox">', b'/Btn'),
(b'<textarea></textarea>', b'/Tx'),
))
def test_pdf_inputs(html, field):
stdout = _run('--pdf-forms - -', html)
assert b'AcroForm' in stdout
assert field in stdout
stdout = _run('- -', html)
assert b'AcroForm' not in stdout


@pytest.mark.parametrize('css, with_forms, without_forms', (
('appearance: auto', True, True),
('appearance: none', False, False),
('', True, False),
))
def test_appearance(css, with_forms, without_forms):
html = f'<input style="{css}">'.encode()
assert (b'AcroForm' in _run('--pdf-forms - -', html)) is with_forms
assert (b'AcroForm' in _run('- -', html)) is without_forms


def test_reproducible():
os.environ['SOURCE_DATE_EPOCH'] = '0'
stdout1 = _run('- -', b'<body>a<img src=pattern.png>')
Expand Down
3 changes: 2 additions & 1 deletion tests/test_css.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ def test_expand_shorthands():
@assert_no_logs
def test_annotate_document():
document = FakeHTML(resource_filename('doc1.html'))
document._ua_stylesheets = lambda: [CSS(resource_filename('mini_ua.css'))]
document._ua_stylesheets = (
lambda *_, **__: [CSS(resource_filename('mini_ua.css'))])
style_for = get_all_computed_styles(
document, user_stylesheets=[CSS(resource_filename('user.css'))])

Expand Down
7 changes: 5 additions & 2 deletions tests/testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from weasyprint.css.counters import CounterStyle
from weasyprint.css.targets import TargetCollector
from weasyprint.formatting_structure import boxes, build
from weasyprint.html import HTML5_UA_STYLESHEET
from weasyprint.logger import LOGGER
from weasyprint.urls import path2url

Expand Down Expand Up @@ -47,8 +48,10 @@

class FakeHTML(HTML):
"""Like weasyprint.HTML, but with a lighter UA stylesheet."""
def _ua_stylesheets(self):
return [TEST_UA_STYLESHEET]
def _ua_stylesheets(self, forms=False):
return [
TEST_UA_STYLESHEET if stylesheet == HTML5_UA_STYLESHEET
else stylesheet for stylesheet in super()._ua_stylesheets(forms)]


def resource_filename(basename):
Expand Down
24 changes: 15 additions & 9 deletions weasyprint/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,9 @@ def __init__(self, guess=None, filename=None, url=None, file_obj=None,
result, content_language=None)
self.etree_element = self.wrapper_element.etree_element

def _ua_stylesheets(self):
def _ua_stylesheets(self, forms=False):
if forms:
return [HTML5_UA_STYLESHEET, HTML5_UA_FORM_STYLESHEET]
return [HTML5_UA_STYLESHEET]

def _ua_counter_style(self):
Expand All @@ -117,7 +119,7 @@ def _ph_stylesheets(self):

def render(self, stylesheets=None, presentational_hints=False,
optimize_size=('fonts',), font_config=None, counter_style=None,
image_cache=None):
image_cache=None, forms=False):
"""Lay out and paginate the document, but do not (yet) export it.
This returns a :class:`document.Document` object which provides
Expand All @@ -137,18 +139,20 @@ def render(self, stylesheets=None, presentational_hints=False,
:type counter_style: :class:`css.counters.CounterStyle`
:param counter_style: A dictionary storing ``@counter-style`` rules.
:param dict image_cache: A dictionary used to cache images.
:param bool forms: Whether PDF forms have to be included.
:returns: A :class:`document.Document` object.
"""
return Document._render(
self, stylesheets, presentational_hints,
optimize_size, font_config, counter_style, image_cache)
self, stylesheets, presentational_hints, optimize_size,
font_config, counter_style, image_cache, forms)

def write_pdf(self, target=None, stylesheets=None, zoom=1,
attachments=None, finisher=None, presentational_hints=False,
optimize_size=('fonts',), font_config=None,
counter_style=None, image_cache=None, identifier=None,
variant=None, version=None, custom_metadata=False):
variant=None, version=None, forms=False,
custom_metadata=False):
"""Render the document to a PDF file.
This is a shortcut for calling :meth:`render`, then
Expand Down Expand Up @@ -186,8 +190,9 @@ def write_pdf(self, target=None, stylesheets=None, zoom=1,
:param bytes identifier: A bytestring used as PDF file identifier.
:param str variant: A PDF variant name.
:param str version: A PDF version number.
:param bool custom_metadata: A boolean defining whether custom HTML
metadata should be stored in the generated PDF.
:param bool forms: Whether PDF forms have to be included.
:param bool custom_metadata: Whether custom HTML metadata should be
stored in the generated PDF.
:returns:
The PDF as :obj:`bytes` if ``target`` is not provided or
:obj:`None`, otherwise :obj:`None` (the PDF is written to
Expand All @@ -197,7 +202,7 @@ def write_pdf(self, target=None, stylesheets=None, zoom=1,
return (
self.render(
stylesheets, presentational_hints, optimize_size, font_config,
counter_style, image_cache)
counter_style, image_cache, forms)
.write_pdf(
target, zoom, attachments, finisher, identifier, variant,
version, custom_metadata))
Expand Down Expand Up @@ -335,5 +340,6 @@ def _select_source(guess=None, filename=None, url=None, file_obj=None,
# Work around circular imports.
from .css import preprocess_stylesheet # noqa isort:skip
from .html import ( # noqa isort:skip
HTML5_UA_COUNTER_STYLE, HTML5_UA_STYLESHEET, HTML5_PH_STYLESHEET)
HTML5_UA_COUNTER_STYLE, HTML5_UA_STYLESHEET, HTML5_UA_FORM_STYLESHEET,
HTML5_PH_STYLESHEET)
from .document import Document, Page # noqa isort:skip
3 changes: 3 additions & 0 deletions weasyprint/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ def main(argv=None, stdout=None, stdin=None):
parser.add_argument(
'--pdf-variant', choices=VARIANTS, help='PDF variant to generate')
parser.add_argument('--pdf-version', help='PDF version number')
parser.add_argument(
'--pdf-forms', action='store_true', help='Include PDF forms')
parser.add_argument(
'--custom-metadata', action='store_true',
help='include custom HTML meta tags in PDF metadata')
Expand Down Expand Up @@ -199,6 +201,7 @@ def main(argv=None, stdout=None, stdin=None):
'identifier': args.pdf_identifier,
'variant': args.pdf_variant,
'version': args.pdf_version,
'forms': args.pdf_forms,
'custom_metadata': args.custom_metadata,
}

Expand Down
70 changes: 20 additions & 50 deletions weasyprint/links.py → weasyprint/anchors.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,12 @@
"""PDF links and bookmarks management."""
"""Find anchors, links, bookmarks and inputs in documents."""

import math

from .formatting_structure import boxes
from .layout.percent import percentage
from .logger import LOGGER
from .matrix import Matrix


def resolve_links(pages):
"""Resolve internal hyperlinks.
Links to a missing anchor are removed with a warning.
If multiple anchors have the same name, the first one is used.
:returns:
A generator yielding lists (one per page) like :attr:`Page.links`,
except that ``target`` for internal hyperlinks is
``(page_number, x, y)`` instead of an anchor name.
The page number is a 0-based index into the :attr:`pages` list,
and ``x, y`` are in CSS pixels from the top-left of the page.
"""
anchors = set()
paged_anchors = []
for i, page in enumerate(pages):
paged_anchors.append([])
for anchor_name, (point_x, point_y) in page.anchors.items():
if anchor_name not in anchors:
paged_anchors[-1].append((anchor_name, point_x, point_y))
anchors.add(anchor_name)
for page in pages:
page_links = []
for link in page.links:
link_type, anchor_name, _, _ = link
if link_type == 'internal':
if anchor_name not in anchors:
LOGGER.error(
'No anchor #%s for internal URI reference',
anchor_name)
else:
page_links.append(link)
else:
# External link
page_links.append(link)
yield page_links, paged_anchors.pop(0)


def rectangle_aabb(matrix, pos_x, pos_y, width, height):
"""Apply a transformation matrix to an axis-aligned rectangle.
Expand All @@ -68,8 +27,12 @@ def rectangle_aabb(matrix, pos_x, pos_y, width, height):
return box_x1, box_y1, box_x2, box_y2


def gather_links_and_bookmarks(box, anchors, links, bookmarks,
parent_matrix=None):
def gather_anchors(box, anchors, links, bookmarks, inputs, parent_matrix=None):
"""Gather anchors and other data related to specific positions in PDF.
Currently finds anchors, links, bookmarks and inputs.
"""
# Get box transformation matrix.
# "Transforms apply to block-level and atomic inline-level elements,
# but do not apply to elements which may be split into
Expand Down Expand Up @@ -124,19 +87,26 @@ def gather_links_and_bookmarks(box, anchors, links, bookmarks,
has_link = link and not isinstance(box, (boxes.TextBox, boxes.LineBox))
# In case of duplicate IDs, only the first is an anchor.
has_anchor = anchor_name and anchor_name not in anchors
is_input = box.is_input()

if has_bookmark or has_link or has_anchor:
pos_x, pos_y, width, height = box.hit_area()
if has_bookmark or has_link or has_anchor or is_input:
if is_input:
pos_x, pos_y = box.content_box_x(), box.content_box_y()
width, height = box.width, box.height
else:
pos_x, pos_y, width, height = box.hit_area()
if has_link or is_input:
rectangle = rectangle_aabb(matrix, pos_x, pos_y, width, height)
if has_link:
token_type, link = link
assert token_type == 'url'
link_type, target = link
assert isinstance(target, str)
if link_type == 'external' and box.is_attachment():
link_type = 'attachment'
rectangle = rectangle_aabb(matrix, pos_x, pos_y, width, height)
link = (link_type, target, rectangle, box)
links.append(link)
links.append((link_type, target, rectangle, box))
if is_input:
inputs.append((box.element, box.style, rectangle))
if matrix and (has_bookmark or has_anchor):
pos_x, pos_y = matrix.transform_point(pos_x, pos_y)
if has_bookmark:
Expand All @@ -146,7 +116,7 @@ def gather_links_and_bookmarks(box, anchors, links, bookmarks,
anchors[anchor_name] = pos_x, pos_y

for child in box.all_children():
gather_links_and_bookmarks(child, anchors, links, bookmarks, matrix)
gather_anchors(child, anchors, links, bookmarks, inputs, matrix)


def make_page_bookmark_tree(page, skipped_levels, last_by_depth,
Expand Down
4 changes: 2 additions & 2 deletions weasyprint/css/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1096,7 +1096,7 @@ def preprocess_stylesheet(device_media_type, base_url, stylesheet_rules,
def get_all_computed_styles(html, user_stylesheets=None,
presentational_hints=False, font_config=None,
counter_style=None, page_rules=None,
target_collector=None):
target_collector=None, forms=False):
"""Compute all the computed styles of all elements in ``html`` document.
Do everything from finding author stylesheets to parsing and applying them.
Expand All @@ -1112,7 +1112,7 @@ def get_all_computed_styles(html, user_stylesheets=None,
for style in html._ua_counter_style():
for key, value in style.items():
counter_style[key] = value
for sheet in (html._ua_stylesheets() or []):
for sheet in (html._ua_stylesheets(forms) or []):
sheets.append((sheet, 'user agent', None))
if presentational_hints:
for sheet in (html._ph_stylesheets() or []):
Expand Down
9 changes: 4 additions & 5 deletions weasyprint/css/computed_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -748,7 +748,7 @@ def strut_layout(style, context=None):
if key in context.strut_layouts:
return context.strut_layouts[key]

layout = Layout(context, style['font_size'], style)
layout = Layout(context, style)
layout.set_text(' ')
line, _ = layout.get_first_line()
_, _, _, _, text_height, baseline = first_line_metrics(
Expand Down Expand Up @@ -782,11 +782,10 @@ def character_ratio(style, character):
style = style.copy()
style['letter_spacing'] = 'normal'
style['word_spacing'] = 0

# Random big value
font_size = 1000
style['font_size'] = 1000

layout = Layout(context=None, font_size=font_size, style=style)
layout = Layout(context=None, style=style)
layout.set_text(character)
line, _ = layout.get_first_line()

Expand All @@ -802,6 +801,6 @@ def character_ratio(style, character):

# Zero means some kind of failure, fallback is 0.5.
# We round to try keeping exact values that were altered by Pango.
ratio = round(measure / font_size, 5) or 0.5
ratio = round(measure / style['font_size'], 5) or 0.5
cache[cache_key] = ratio
return ratio
Loading

0 comments on commit 4978837

Please sign in to comment.