Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include radio buttons when processing forms #2143

Merged
merged 12 commits into from
May 14, 2024
12 changes: 11 additions & 1 deletion tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,15 @@ def test_partial_pdf_custom_metadata():
('<input>', ['/Tx', '/V ()']),
('<input value="">', ['/Tx', '/V ()']),
('<input type="checkbox">', ['/Btn']),
('<input type="radio">',
['/Btn', '/V /Off', '/AS /Off', f'/Ff {1 << (16 - 1)}']),
('<input checked type="radio" name="foo" value="Some Value">',
['/Btn', '/TU (foo)', '/V (Some Value)', '/AS (Some Value)']),
('<form><input type="radio" name="foo" value="v1"></form>'
'<form><input checked type="radio" name="foo" value="v1"></form>',
['/Btn', '/V (v1)',
'/AS (v1)', '/V (v1)',
'/AS /Off', '/V /Off']),
('<textarea></textarea>', ['/Tx', '/V ()']),
('<select><option value="a">A</option></select>', ['/Ch', '/Opt']),
('<select>'
Expand All @@ -525,7 +534,8 @@ def test_partial_pdf_custom_metadata():
def test_pdf_inputs(html, fields):
stdout = _run('--pdf-forms --uncompressed-pdf - -', html.encode())
assert b'AcroForm' in stdout
assert all(field.encode() in stdout for field in fields)
for field in fields:
assert field.encode() in stdout
stdout = _run('--uncompressed-pdf - -', html.encode())
assert b'AcroForm' not in stdout

Expand Down
16 changes: 12 additions & 4 deletions weasyprint/anchors.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@ def rectangle_aabb(matrix, pos_x, pos_y, width, height):
return box_x1, box_y1, box_x2, box_y2


def gather_anchors(box, anchors, links, bookmarks, inputs, parent_matrix=None):
def gather_anchors(box, anchors, links, bookmarks, forms,
parent_matrix=None,
parent_form=None):
"""Gather anchors and other data related to specific positions in PDF.

Currently finds anchors, links, bookmarks and inputs.
Currently finds anchors, links, bookmarks and forms.

"""
# Get box transformation matrix.
Expand Down Expand Up @@ -89,6 +91,11 @@ def gather_anchors(box, anchors, links, bookmarks, inputs, parent_matrix=None):
has_anchor = anchor_name and anchor_name not in anchors
is_input = box.is_input()

if box.is_form():
parent_form = box.element
if parent_form not in forms:
forms[parent_form] = []

if has_bookmark or has_link or has_anchor or is_input:
if is_input:
pos_x, pos_y = box.content_box_x(), box.content_box_y()
Expand All @@ -106,7 +113,7 @@ def gather_anchors(box, anchors, links, bookmarks, inputs, parent_matrix=None):
link_type = 'attachment'
links.append((link_type, target, rectangle, box))
if is_input:
inputs.append((box.element, box.style, rectangle))
forms[parent_form].append((box.element, box.style, rectangle))
if matrix and (has_bookmark or has_anchor):
pos_x, pos_y = matrix.transform_point(pos_x, pos_y)
if has_bookmark:
Expand All @@ -116,7 +123,8 @@ def gather_anchors(box, anchors, links, bookmarks, inputs, parent_matrix=None):
anchors[anchor_name] = pos_x, pos_y

for child in box.all_children():
gather_anchors(child, anchors, links, bookmarks, inputs, matrix)
gather_anchors(child, anchors, links, bookmarks, forms, matrix,
parent_form=parent_form)


def make_page_bookmark_tree(page, skipped_levels, last_by_depth,
Expand Down
13 changes: 9 additions & 4 deletions weasyprint/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class Page:
instantiated directly.

"""

def __init__(self, page_box):
#: The page width, including margins, in CSS pixels.
self.width = page_box.margin_width()
Expand Down Expand Up @@ -67,14 +68,16 @@ def __init__(self, page_box):
#: ``(x, y)`` point in CSS pixels from the top-left of the page.
self.anchors = {}

#: The :obj:`list` of ``(element, attributes, rectangle)`` :obj:`tuples
#: <tuple>`. A ``rectangle`` is ``(x, y, width, height)``, in CSS
#: The :obj:`dict` mapping form elements to a list
#: of ``(element, attributes, rectangle)`` :obj:`tuples <tuple>`.
#: A ``rectangle`` is ``(x, y, width, height)``, in CSS
#: pixels from the top-left of the page. ``atributes`` is a
#: :obj:`dict` of HTML tag attributes and values.
self.inputs = []
#: The key ``None`` will contain inputs that are not part of a form.
self.forms = {None: []}

gather_anchors(
page_box, self.anchors, self.links, self.bookmarks, self.inputs)
page_box, self.anchors, self.links, self.bookmarks, self.forms)
self._page_box = page_box

def paint(self, stream, scale=1):
Expand Down Expand Up @@ -105,6 +108,7 @@ class DocumentMetadata:
New attributes may be added in future versions of WeasyPrint.

"""

def __init__(self, title=None, authors=None, description=None,
keywords=None, generator=None, created=None, modified=None,
attachments=None, lang=None, custom=None):
Expand Down Expand Up @@ -162,6 +166,7 @@ class DiskCache:
(i.e. RasterImage instances) are still stored in memory.

"""

def __init__(self, folder):
self._path = Path(folder)
self._path.mkdir(parents=True, exist_ok=True)
Expand Down
6 changes: 6 additions & 0 deletions weasyprint/formatting_structure/boxes.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,12 @@ def is_input(self):
return not isinstance(self, (LineBox, TextBox))
return False

def is_form(self):
"""Return whether this box is a form element."""
if self.element is None:
return False
return self.element.tag in 'form'

okkays marked this conversation as resolved.
Show resolved Hide resolved

class ParentBox(Box):
"""A box that has children."""
Expand Down
6 changes: 3 additions & 3 deletions weasyprint/pdf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from .stream import Stream

from .anchors import ( # isort:skip
add_annotations, add_inputs, add_links, add_outlines, resolve_links,
add_annotations, add_forms, add_links, add_outlines, resolve_links,
write_pdf_attachment)

VARIANTS = {
Expand Down Expand Up @@ -184,8 +184,8 @@ def generate_pdf(document, target, zoom, **options):
add_annotations(
links_and_anchors[0], matrix, document, pdf, pdf_page, annot_files,
compress)
add_inputs(
page.inputs, matrix, pdf, pdf_page, resources, stream,
add_forms(
page.forms, matrix, pdf, pdf_page, resources, stream,
document.font_config.font_map, compress)
page.paint(stream, scale)

Expand Down
94 changes: 73 additions & 21 deletions weasyprint/pdf/anchors.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Insert anchors, links, bookmarks and inputs in PDFs."""

import collections
import io
import mimetypes
from hashlib import md5
Expand Down Expand Up @@ -91,10 +92,31 @@ def add_outlines(pdf, bookmarks, parent=None):
return outlines, count


def add_inputs(inputs, matrix, pdf, page, resources, stream, font_map,
compress):
def _make_checked_stream(resources, width, height, compress, style, font_size):
on_stream = pydyf.Stream(extra={
'Resources': resources.reference,
'Type': '/XObject',
'Subtype': '/Form',
'BBox': pydyf.Array((0, 0, width, height)),
}, compress=compress)
on_stream.push_state()
on_stream.begin_text()
on_stream.set_color_rgb(*style['color'][:3])
on_stream.set_font_size('ZaDb', font_size)
# Center (let’s assume that Dingbat’s check has a 0.8em size)
x = (width - font_size * 0.8) / 2
y = (height - font_size * 0.8) / 2
on_stream.move_text_to(x, y)
on_stream.show_text_string('4')
on_stream.end_text()
on_stream.pop_state()
return on_stream


def add_forms(forms, matrix, pdf, page, resources, stream, font_map,
compress):
"""Include form inputs in PDF."""
if not inputs:
if not forms or not any(forms.values()):
return

if 'Annots' not in page:
Expand All @@ -109,12 +131,19 @@ def add_inputs(inputs, matrix, pdf, page, resources, stream, font_map,
context = ffi.gc(
pango.pango_font_map_create_context(font_map),
gobject.g_object_unref)
for i, (element, style, rectangle) in enumerate(inputs):
inputs_with_forms = [
(form, element, style, rectangle)
for form, inputs in forms.items()
for element, style, rectangle in inputs
]
radio_groups = collections.defaultdict(dict)
for i, (form, element, style, rectangle) in enumerate(inputs_with_forms):
rectangle = (
*matrix.transform_point(*rectangle[:2]),
*matrix.transform_point(*rectangle[2:]))

input_type = element.attrib.get('type')
input_value = element.attrib.get('value', 'Yes')
default_name = f'unknown-{page_reference.decode()}-{i}'
input_name = element.attrib.get('name', default_name)
# TODO: where does this 0.75 scale come from?
Expand All @@ -125,23 +154,9 @@ def add_inputs(inputs, matrix, pdf, page, resources, stream, font_map,
# Checkboxes
width = rectangle[2] - rectangle[0]
height = rectangle[1] - rectangle[3]
checked_stream = pydyf.Stream(extra={
'Resources': resources.reference,
'Type': '/XObject',
'Subtype': '/Form',
'BBox': pydyf.Array((0, 0, width, height)),
}, compress=compress)
checked_stream.push_state()
checked_stream.begin_text()
checked_stream.set_color_rgb(*style['color'][:3])
checked_stream.set_font_size('ZaDb', font_size)
# Center (let’s assume that Dingbat’s check has a 0.8em size)
x = (width - font_size * 0.8) / 2
y = (height - font_size * 0.8) / 2
checked_stream.move_text_to(x, y)
checked_stream.show_text_string('4')
checked_stream.end_text()
checked_stream.pop_state()
checked_stream = _make_checked_stream(resources,
width, height,
compress, style, font_size)
pdf.add_object(checked_stream)

checked = 'checked' in element.attrib
Expand All @@ -161,6 +176,43 @@ def add_inputs(inputs, matrix, pdf, page, resources, stream, font_map,
'AS': '/Yes' if checked else '/Off',
'DA': pydyf.String(b' '.join(field_stream.stream)),
})
elif input_type == 'radio':
if input_name not in radio_groups[form]:
new_group = pydyf.Dictionary({
'Type': '/Annot',
'Subtype': '/Widget',
'FT': '/Btn',
'Ff': 1 << (16 - 1), # Radio flag
'F': 1 << (3 - 1), # Print flag
'P': page.reference,
'T': pydyf.String(f'{hash(form)}-{input_name}'),
'TU': pydyf.String(input_name),
'V': '/Off',
'Kids': pydyf.Array(),
})
pdf.add_object(new_group)
page['Annots'].append(new_group.reference)
radio_groups[form][input_name] = new_group
group = radio_groups[form][input_name]
width = rectangle[2] - rectangle[0]
height = rectangle[1] - rectangle[3]
on_stream = _make_checked_stream(resources,
width, height,
compress, style, font_size)
checked = 'checked' in element.attrib
field = pydyf.Dictionary({
'Type': '/Annot',
'Subtype': '/Widget',
'Rect': pydyf.Array(rectangle),
'Parent': group.reference,
'AS': pydyf.String(input_value) if checked else '/Off',
'AP': pydyf.Dictionary({'N': pydyf.Dictionary({
pydyf.String(input_value): on_stream.reference,
})}),
})
if checked:
group['V'] = pydyf.String(input_value)
group['Kids'].append(field.reference)
elif element.tag == 'select':
# Select fields
font_description = get_font_description(style)
Expand Down
Loading