Kozea · liZe · Jan 24, 2025 · Dec 31, 2024 · Jan 2, 2025 · Jan 14, 2025
diff --git a/docs/common_use_cases.rst b/docs/common_use_cases.rst
@@ -102,8 +102,8 @@ such as page numbers, headers, etc. Read more about the page_ at-rule.
 .. _page: https://developer.mozilla.org/en-US/docs/Web/CSS/@page
 
 
-Generate PDFs Specialized for Accessibility (PDF/UA) and Archiving (PDF/A)
---------------------------------------------------------------------------
+Generate Specialized PDFs
+-------------------------
 
 WeasyPrint can generate different PDF variants, including PDF/UA and PDF/A. The
 feature is available by using the ``--pdf-variant`` CLI option, or the
@@ -125,8 +125,8 @@ Even if WeasyPrint tries to generate valid documents, the result is not
 guaranteed: the HTML, CSS and PDF features chosen by the user must follow the
 limitations defined by the different specifications.
 
-PDF/A
-.....
+PDF/A (Archiving)
+.................
 
 PDF/A documents are specialized for archiving purposes. They are a simple
 subset of PDF, with a lot of limitations: no audio, video or JavaScript,
@@ -145,8 +145,8 @@ valid PDF identifier, but you can provide your own with the
 If your document includes images, you must set the ``image-rendering:
 crisp-edges`` property to avoid anti-aliasing, that is forbidden by PDF/A.
 
-PDF/UA
-......
+PDF/UA (Universal Accessibility)
+................................
 
 PDF/UA documents are specialized for accessibility purposes. They include extra
 metadata that define document information and content structure.
@@ -158,6 +158,179 @@ also used to define the order of the PDF content.
 Some information is required in your HTML file, including a ``<title>`` tag,
 and a ``lang`` attribute set on the ``<html>`` tag.
 
+Factur-X / ZUGFeRD (Electronic Invoices)
+........................................
+
+Factur-X / ZUGFeRD is a Franco-German standard for hybrid e-invoice, the first
+implementation of the European Semantic Standard EN 16931. It enables users to
+include normalized metadata in PDF invoices, such as companies information or
+invoice amounts, so that compatible software can automatically read this
+information. This standard is based on PDF/A-3b.
+
+WeasyPrint can generate Factur-X / ZUGFeRD documents. Invoice metadata must be
+generated by the user and included in the PDF document when rendered. Two
+different metadata files are required:
+
+- the first one is RDF metadata, containing document metadata and PDF/A
+  extension information;
+- the second one is Factur-X / ZUGFeRD metadata, containing invoice amounts,
+  plus seller and buyer information.
+
+Here is an example of Factur-X document generation.
+
+``rdf.xml``:
+
+.. code-block:: xml
+
+  <x:xmpmeta
+      xmlns:x="adobe:ns:meta/"
+      xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+      xmlns:pdf="http://ns.adobe.com/pdf/1.3/"
+      xmlns:fx="urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0#"
+      xmlns:pdfaExtension="http://www.aiim.org/pdfa/ns/extension/"
+      xmlns:pdfaSchema="http://www.aiim.org/pdfa/ns/schema#"
+      xmlns:pdfaProperty="http://www.aiim.org/pdfa/ns/property#">
+    <!-- placeholder -->
+    <rdf:RDF>
+      <rdf:Description rdf:about="">
+        <fx:ConformanceLevel>MINIMUM</fx:ConformanceLevel>
+        <fx:DocumentFileName>factur-x.xml</fx:DocumentFileName>
+        <fx:DocumentType>INVOICE</fx:DocumentType>
+        <fx:Version>1.0</fx:Version>
+      </rdf:Description>
+      <rdf:Description rdf:about="">
+        <pdfaExtension:schemas>
+          <rdf:Bag>
+            <rdf:li rdf:parseType="Resource">
+              <pdfaSchema:schema>Factur-X PDFA Extension Schema</pdfaSchema:schema>
+              <pdfaSchema:namespaceURI>urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0#</pdfaSchema:namespaceURI>
+              <pdfaSchema:prefix>fx</pdfaSchema:prefix>
+              <pdfaSchema:property>
+                <rdf:Seq>
+                  <rdf:li rdf:parseType="Resource">
+                    <pdfaProperty:name>DocumentFileName</pdfaProperty:name>
+                    <pdfaProperty:valueType>Text</pdfaProperty:valueType>
+                    <pdfaProperty:category>external</pdfaProperty:category>
+                    <pdfaProperty:description>name of the embedded XML invoice file</pdfaProperty:description>
+                  </rdf:li>
+                  <rdf:li rdf:parseType="Resource">
+                    <pdfaProperty:name>DocumentType</pdfaProperty:name>
+                    <pdfaProperty:valueType>Text</pdfaProperty:valueType>
+                    <pdfaProperty:category>external</pdfaProperty:category>
+                    <pdfaProperty:description>INVOICE</pdfaProperty:description>
+                  </rdf:li>
+                  <rdf:li rdf:parseType="Resource">
+                    <pdfaProperty:name>Version</pdfaProperty:name>
+                    <pdfaProperty:valueType>Text</pdfaProperty:valueType>
+                    <pdfaProperty:category>external</pdfaProperty:category>
+                    <pdfaProperty:description>The actual version of the Factur-X XML schema</pdfaProperty:description>
+                  </rdf:li>
+                  <rdf:li rdf:parseType="Resource">
+                    <pdfaProperty:name>ConformanceLevel</pdfaProperty:name>
+                    <pdfaProperty:valueType>Text</pdfaProperty:valueType>
+                    <pdfaProperty:category>external</pdfaProperty:category>
+                    <pdfaProperty:description>The conformance level of the embedded Factur-X data</pdfaProperty:description>
+                  </rdf:li>
+                </rdf:Seq>
+              </pdfaSchema:property>
+            </rdf:li>
+          </rdf:Bag>
+        </pdfaExtension:schemas>
+      </rdf:Description>
+    </rdf:RDF>
+  </x:xmpmeta>
+
+``factur-x.xml``:
+
+.. code-block:: xml
+
+  <rsm:CrossIndustryInvoice
+      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+      xmlns:qdt="urn:un:unece:uncefact:data:standard:QualifiedDataType:100"
+      xmlns:udt="urn:un:unece:uncefact:data:standard:UnqualifiedDataType:100"
+      xmlns:rsm="urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100"
+      xmlns:ram="urn:un:unece:uncefact:data:standard:ReusableAggregateBusinessInformationEntity:100">
+    <rsm:ExchangedDocumentContext>
+      <ram:BusinessProcessSpecifiedDocumentContextParameter>
+        <ram:ID>A1</ram:ID>
+      </ram:BusinessProcessSpecifiedDocumentContextParameter>
+      <ram:GuidelineSpecifiedDocumentContextParameter>
+        <ram:ID>urn:factur-x.eu:1p0:minimum</ram:ID>
+      </ram:GuidelineSpecifiedDocumentContextParameter>
+    </rsm:ExchangedDocumentContext>
+    <rsm:ExchangedDocument>
+      <ram:ID>123</ram:ID>
+      <ram:TypeCode>380</ram:TypeCode>
+      <ram:IssueDateTime>
+        <udt:DateTimeString format="102">20200131</udt:DateTimeString>
+      </ram:IssueDateTime>
+    </rsm:ExchangedDocument>
+    <rsm:SupplyChainTradeTransaction>
+      <ram:ApplicableHeaderTradeAgreement>
+        <ram:BuyerReference>Buyer</ram:BuyerReference>
+        <ram:SellerTradeParty>
+          <ram:Name>Supplyer Corp</ram:Name>
+          <ram:SpecifiedLegalOrganization>
+            <ram:ID schemeID="0002">123456782</ram:ID>
+          </ram:SpecifiedLegalOrganization>
+          <ram:PostalTradeAddress>
+            <ram:CountryID>FR</ram:CountryID>
+          </ram:PostalTradeAddress>
+          <ram:SpecifiedTaxRegistration>
+            <ram:ID schemeID="VA">FR11123456782</ram:ID>
+          </ram:SpecifiedTaxRegistration>
+        </ram:SellerTradeParty>
+        <ram:BuyerTradeParty>
+          <ram:Name>Buyer Corp</ram:Name>
+          <ram:SpecifiedLegalOrganization>
+            <ram:ID schemeID="0002">987654324</ram:ID>
+          </ram:SpecifiedLegalOrganization>
+        </ram:BuyerTradeParty>
+        <ram:BuyerOrderReferencedDocument >
+          <ram:IssuerAssignedID>456</ram:IssuerAssignedID>
+        </ram:BuyerOrderReferencedDocument>
+      </ram:ApplicableHeaderTradeAgreement>
+      <ram:ApplicableHeaderTradeDelivery/>
+      <ram:ApplicableHeaderTradeSettlement>
+        <ram:InvoiceCurrencyCode>EUR</ram:InvoiceCurrencyCode>
+        <ram:SpecifiedTradeSettlementHeaderMonetarySummation>
+          <ram:TaxBasisTotalAmount>100.00</ram:TaxBasisTotalAmount>
+          <ram:TaxTotalAmount currencyID="EUR">20.00</ram:TaxTotalAmount>
+          <ram:GrandTotalAmount>120.00</ram:GrandTotalAmount>
+          <ram:DuePayableAmount>120.00</ram:DuePayableAmount>
+        </ram:SpecifiedTradeSettlementHeaderMonetarySummation>
+      </ram:ApplicableHeaderTradeSettlement>
+    </rsm:SupplyChainTradeTransaction>
+  </rsm:CrossIndustryInvoice>
+
+``invoice.py``:
+
+.. code-block:: python
+
+  from pathlib import Path
+  from weasyprint import Attachment, HTML
+
+  def generate_rdf_metadata(metadata, variant, version, conformance):
+      original_rdf = generate_original_rdf_metadata(metadata, variant, version, conformance)
+      return Path("rdf.xml").read_bytes().replace(b"<!-- placeholder -->", original_rdf)
+
+  document = HTML(string="<h1>Invoice</h1>").render()
+  generate_original_rdf_metadata = document.metadata.generate_rdf_metadata
+
+  factur_x_xml = Path("factur-x.xml").read_text()
+  attachment = Attachment(string=factur_x_xml, name="factur-x.xml", relationship="Data")
+  document.metadata.attachments = [attachment]
+
+  document.metadata.generate_rdf_metadata = generate_rdf_metadata
+  document.write_pdf("invoice.pdf", pdf_variant="pdf/a-3b")
+
+Of course, the content of these files has to be adapted to the content of real
+invoices. Using XML generators instead of plain text manipulation is also
+highly recommended.
+
+A more detailed blog article is available on `Binary Butterfly’s website
+<https://binary-butterfly.de/artikel/factur-x-zugferd-e-invoices-with-python/>`_.
+
 
 Include PDF Forms
 -----------------

diff --git a/tests/test_api.py b/tests/test_api.py
@@ -18,6 +18,7 @@
 
 from weasyprint import CSS, HTML, __main__, default_url_fetcher
 from weasyprint.pdf.anchors import resolve_links
+from weasyprint.pdf.metadata import generate_rdf_metadata
 from weasyprint.urls import path2url
 
 from .draw import parse_pixels
@@ -414,14 +415,14 @@ def test_command_line_render(tmp_path):
         os.environ.pop('SOURCE_DATE_EPOCH')
 
         stdout = _run('combined.html --uncompressed-pdf -')
-        assert stdout.count(b'attachment') == 0
+        assert stdout.count(b'Filespec') == 0
         stdout = _run('combined.html --uncompressed-pdf -')
-        assert stdout.count(b'attachment') == 0
+        assert stdout.count(b'Filespec') == 0
         stdout = _run('-a pattern.png --uncompressed-pdf combined.html -')
-        assert stdout.count(b'attachment') == 1
+        assert stdout.count(b'Filespec') == 1
         stdout = _run(
             '-a style.css -a pattern.png --uncompressed-pdf combined.html -')
-        assert stdout.count(b'attachment') == 2
+        assert stdout.count(b'Filespec') == 2
 
         _run('combined.html out23.pdf --timeout 30')
         assert (tmp_path / 'out23.pdf').read_bytes() == pdf_bytes
@@ -1140,6 +1141,7 @@ def assert_meta(html, **meta):
     meta.setdefault('attachments', [])
     meta.setdefault('lang', None)
     meta.setdefault('custom', {})
+    meta.setdefault('generate_rdf_metadata', generate_rdf_metadata)
     assert vars(FakeHTML(string=html).render().metadata) == meta
 
 

diff --git a/tests/test_pdf.py b/tests/test_pdf.py
@@ -598,7 +598,7 @@ def test_embedded_files_attachments(tmp_path):
         ]
     )
     assert f'<{hashlib.md5(b"hi there").hexdigest()}>'.encode() in pdf
-    assert b'/F ()' in pdf
+    assert b'/F (attachment.bin)' in pdf
     assert b'/UF (attachment.bin)' in pdf
     name = BOM_UTF16_BE + 'some file attachment äöü'.encode('utf-16-be')
     assert b'/Desc <' + name.hex().encode() + b'>' in pdf
@@ -716,3 +716,29 @@ def test_bleed(style, media, bleed, trim):
     assert f'/MediaBox {str(media).replace(",", "")}'.encode() in pdf
     assert f'/BleedBox {str(bleed).replace(",", "")}'.encode() in pdf
     assert f'/TrimBox {str(trim).replace(",", "")}'.encode() in pdf
+
+
+@assert_no_logs
+def test_default_rdf_metadata():
+    pdf_document = FakeHTML(string='<body>test</body>').render()
+
+    pdf_document.metadata.title = None
+
+    pdf_bytes = pdf_document.write_pdf(
+        pdf_variant='pdf/a-3b', pdf_identifier=b'example-bytes', uncompressed_pdf=True)
+    assert b'<rdf:RDF xmlns:pdf="http://ns.adobe.com/pdf/1.3/"' in pdf_bytes
+
+
+@assert_no_logs
+def test_custom_rdf_metadata():
+    def generate_rdf_metadata(*args, **kwargs):
+        return b'TEST_METADATA'
+
+    pdf_document = FakeHTML(string='<body>test</body>').render()
+
+    pdf_document.metadata.title = None
+    pdf_document.metadata.generate_rdf_metadata = generate_rdf_metadata
+
+    pdf_bytes = pdf_document.write_pdf(
+        pdf_variant='pdf/a-3b', pdf_identifier=b'example-bytes', uncompressed_pdf=True)
+    assert b'TEST_METADATA' in pdf_bytes
diff --git a/weasyprint/__init__.py b/weasyprint/__init__.py
@@ -318,6 +318,9 @@ class Attachment:
     HTML specific arguments (``encoding`` and ``media_type``) are not
     supported.
 
+    :param str name:
+        The name of the attachment to be included in the PDF document.
+        May be :obj:`None`.
     :param str description:
         A description of the attachment to be included in the PDF document.
         May be :obj:`None`.
@@ -335,11 +338,12 @@ class Attachment:
     """
     def __init__(self, guess=None, filename=None, url=None, file_obj=None,
                  string=None, base_url=None, url_fetcher=default_url_fetcher,
-                 description=None, created=None, modified=None,
+                 name=None, description=None, created=None, modified=None,
                  relationship='Unspecified'):
         self.source = _select_source(
             guess, filename, url, file_obj, string, base_url=base_url,
             url_fetcher=url_fetcher)
+        self.name = name
         self.description = description
         self.relationship = relationship
         self.md5 = None

diff --git a/weasyprint/document.py b/weasyprint/document.py
@@ -18,6 +18,7 @@
 from .logger import PROGRESS_LOGGER
 from .matrix import Matrix
 from .pdf import VARIANTS, generate_pdf
+from .pdf.metadata import generate_rdf_metadata
 from .text.fonts import FontConfiguration
 
 
@@ -105,12 +106,10 @@ class DocumentMetadata:
     """Meta-information belonging to a whole :class:`Document`.
 
     New attributes may be added in future versions of WeasyPrint.
-
     """
-
-    def __init__(self, title=None, authors=None, description=None,
-                 keywords=None, generator=None, created=None, modified=None,
-                 attachments=None, lang=None, custom=None):
+    def __init__(self, title=None, authors= None, description=None, keywords=None,
+                 generator=None, created=None, modified=None, attachments=None,
+                 lang=None, custom=None, generate_rdf_metadata=generate_rdf_metadata):
         #: The title of the document, as a string or :obj:`None`.
         #: Extracted from the ``<title>`` element in HTML
         #: and written to the ``/Title`` info field in PDF.
@@ -156,6 +155,9 @@ def __init__(self, title=None, authors=None, description=None,
         #: Custom metadata, as a dict whose keys are the metadata names and
         #: values are the metadata values.
         self.custom = custom or {}
+        #: Custom RDF metadata generator, which will replace the default generator.
+        #: The function should return bytes containing an RDF XML.
+        self.generate_rdf_metadata = generate_rdf_metadata
 
 
 class DiskCache:

diff --git a/weasyprint/pdf/__init__.py b/weasyprint/pdf/__init__.py
@@ -271,7 +271,7 @@ def generate_pdf(document, target, zoom, **options):
     if pdf_attachments:
         content = pydyf.Dictionary({'Names': pydyf.Array()})
         for i, pdf_attachment in enumerate(pdf_attachments):
-            content['Names'].append(pydyf.String(f'attachment{i}'))
+            content['Names'].append(pdf_attachment['F'])
             content['Names'].append(pdf_attachment.reference)
         pdf.add_object(content)
         if 'Names' not in pdf.catalog:

diff --git a/weasyprint/pdf/anchors.py b/weasyprint/pdf/anchors.py
@@ -351,7 +351,9 @@ def write_pdf_attachment(pdf, attachment, compress):
 
     # TODO: Use the result object from a URL fetch operation to provide more
     # details on the possible filename and MIME type.
-    if url and urlsplit(url).path:
+    if attachment.name:
+        filename = attachment.name
+    elif url and urlsplit(url).path:
         filename = basename(unquote(urlsplit(url).path))
     else:
         filename = 'attachment.bin'
@@ -376,7 +378,7 @@ def write_pdf_attachment(pdf, attachment, compress):
 
     pdf_attachment = pydyf.Dictionary({
         'Type': '/Filespec',
-        'F': pydyf.String(),
+        'F': pydyf.String(filename.encode(errors='ignore')),
         'UF': pydyf.String(filename),
         'EF': pydyf.Dictionary({'F': file_stream.reference}),
         'Desc': pydyf.String(attachment.description or ''),