Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hyperref and hyperxmp metadata improvements #2365

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -984,6 +984,9 @@ t/complex/hyperchars.xml
t/complex/hypertest.pdf
t/complex/hypertest.tex
t/complex/hypertest.xml
t/complex/hyperxmp.pdf
t/complex/hyperxmp.tex
t/complex/hyperxmp.xml
t/complex/labelled.pdf
t/complex/labelled.tex
t/complex/labelled.xml
Expand Down
1 change: 1 addition & 0 deletions lib/LaTeXML/Engine/Base_Schema.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ RequireResource('LaTeXML.css');
"ma" => "http://www.w3.org/ns/ma-ont#",
"og" => "http://ogp.me/ns#",
"owl" => "http://www.w3.org/2002/07/owl#",
"prism" => "http://prismstandard.org/namespaces/basic/3.0/",
"rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
"rdfa" => "http://www.w3.org/ns/rdfa#",
"rdfs" => "http://www.w3.org/2000/01/rdf-schema#",
Expand Down
43 changes: 22 additions & 21 deletions lib/LaTeXML/Package/hyperref.sty.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -86,21 +86,19 @@ foreach my $option ( # 3.1 General Options
DeclareOption($option, undef); }

# \hypersetup{keyvals} configures various parameters,
# for each pdf keyword, provide [property,(content|resource),datatype]
our %pdfkey_property = (
baseurl => '', # xmp:BaseURL ??
pdfauthor => ['dcterms:creator', 'content'],
pdfkeywords => ['dcterms:subject', 'content'], # & pdf:Keywords
pdflang => ['dcterms:language', 'content'],
pdfproducer => '', # pdf:Producer & xmp:CreatorTool
pdfsubject => ['dcterms:subject', 'content'],
pdftitle => ['dcterms:title', 'content'],
# Include hyperxmp's keywords, as well.
pdfauthortitle => '', # photoshop:AuthorsPosition
pdfcaptionwriter => '', # photoshop:CaptionWriter !?!?!?
pdfcopyright => ['dcterms:rights', 'content'], # & xmpRights:Marked
pdflicenseurl => ['cc:licence', 'resource'], # xmpRights:WebStatement
pdfmetalang => '', # dcterms:language ??
# for each pdf keyword, provide [property,(content|resource),datatype,langsupport]
# %pdfkey_property may have already been populated by hyperxmp
our %pdfkey_property = (%pdfkey_property,
baseurl => '', # xmp:BaseURL ??
pdfauthor => ['dcterms:creator', 'content'],
pdfcreationdate => '', # xmp:CreateDate
pdfkeywords => ['dcterms:subject', 'content'], # & pdf:Keywords
pdflang => ['dcterms:language', 'content'],
pdfmoddate => '', # xmp:ModifyDate
pdfproducer => '', # pdf:Producer & xmp:CreatorTool
pdfsubject => ['dcterms:description', 'content'],
pdftitle => ['dcterms:title', 'content'],
pdftrapped => '', # pdf:Trapped
);
# date=>dcterms:date xmp:CreateDate xmp:ModifyDate xmp:MetadataDate ?
# document identifier => xmlMM:DocumentID
Expand All @@ -126,21 +124,24 @@ DefPrimitive('\hypersetup RequiredKeyVals:Hyp', sub {
my @pairs = $kv->getPairs;
while (@pairs) {
my ($key, $value) = (shift(@pairs), shift(@pairs));
hyperref_setoption($key, Digest($value)); }
hyperref_setoption($key, Expand($value)); }
return; });

PushValue('@at@end@document', T_CS('\@add@PDF@RDFa@triples'));

DefConstructor('\@add@PDF@RDFa@triples', sub {
my ($document, $xproperty, $content) = @_;
if (my $root = $document->getDocument->documentElement) {
my $metalang = LookupMapping('Hyperref_options', 'pdfmetalang') // LookupMapping('Hyperref_options', 'pdflang');
foreach my $key (LookupMappingKeys('Hyperref_options')) {
if (my $entry = ($pdfkey_property{$key})) {
my ($property, $object, $datatype) = @$entry;
my $value = LookupMapping('Hyperref_options', $key);
my $node = $document->openElementAt($root, 'ltx:rdf',
property => $property, $object => $value,
($datatype ? (datatype => $datatype) : ()));
my ($property, $object, $datatype, $langsupport) = @$entry;
my $value = ToString(LookupMapping('Hyperref_options', $key));
my ($lang, $localizedValue) = $value =~ m/^\[([^]]*)\](.*)/;
my $node = $document->openElementAt($root, 'ltx:rdf',
property => $property, $object => $langsupport ? $localizedValue // $value : $value,
($langsupport && ($lang || $metalang) ? ('xml:lang' => $lang // $metalang) : ()),
($datatype ? (datatype => $datatype) : ()));
# Must do directly; $document->setAttribute omits empty attributes
$node->setAttribute(about => '');
$document->closeElementAt($node); } } } });
Expand Down
60 changes: 58 additions & 2 deletions lib/LaTeXML/Package/hyperxmp.sty.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,63 @@ use LaTeXML::Package;

RequirePackage('ifthen');

# Basically, the effects of hyperxmp are already built
# into the LaTeXML binding for hyperref.
# macro to pass alternate language entries
# TODO support entries in multiple languages
# TODO pdfmetalang should affect *following* entries, not all of them
Let('\XMPLangAlt', '\@gobbletwo');

# macros for including commas in comma-separated lists
# TODO implement comma-separated lists splitting
Let('\xmpquote', '\relax');
DefMacroI('\xmpcomma', undef, ',');

# %pdfkey_property may have already been populated by hyperref
our %pdfkey_property = (%pdfkey_property,
# modify hyperref keywords to support specifying the language
pdfsubject => ['dcterms:description', 'content', undef, 1],
pdftitle => ['dcterms:title', 'content', undef, 1],
pdfaconformance => '', # pdfaid:conformance
pdfapart => '', # pdfaid:part
pdfauthortitle => '', # photoshop:AuthorsPosition
pdfbookedition => ['prism:bookEdition', 'content', undef, 1],
pdfbytes => ['prism:byteCount', 'content'],
pdfcaptionwriter => '', # photoshop:CaptionWriter
pdfcontactaddress => '', # Iptc4xmpCore:CiAdrExtadr
pdfcontactcity => '', # Iptc4xmpCore:CiAdrCity
pdfcontactcountry => '', # Iptc4xmpCore:CiAdrCtry
pdfcontactemail => '', # Iptc4xmpCore:CiEmailWork
pdfcontactphone => '', # Iptc4xmpCore:CiTelWork
pdfcontactpostcode => '', # Iptc4xmpCore:CiAdrPcode
pdfcontactregion => '', # Iptc4xmpCore:CiAdrRegion
pdfcontacturl => '', # Iptc4xmpCore:CiUrlWork
pdfcopyright => ['dcterms:rights', 'content', undef, 1], # & xmpRights:Marked, xmpRights:WebStatement
pdfdate => ['dcterms:date', 'content'], # (if missing, hyperxmp uses \date -- we delegate the default choice to postprocessing)
pdfdocumentid => '', # xmpMM:DocumentID
pdfdoi => ['prism:doi', 'content'],
pdfeissn => ['prism:eIssn', 'content'],
pdfidentifier => ['dcterms:identifier', 'content'], # (if missing, hyperxmp uses the first non-empty doi, eissn, issn, isbn with prefix info:doi/ or urn:ISS(B|N): -- we delegate the default choice to postprocessing)
pdfinstanceid => '', # xmpMM:InstanceID
pdfisbn => ['prism:isbn', 'content'],
pdfissn => ['prism:issn', 'content'],
pdfissuenum => ['prism:number', 'content'],
pdflicenseurl => ['cc:licence', 'resource'], # xmpRights:WebStatement
pdfmetadate => '', # xmp:MetadataDate
pdfmetalang => '', # the default language of the metadata entries themselves
pdfnumpages => ['prism:pageCount', 'content'], # xmpTPg:NPages
pdfpagerange => ['prism:pageRange', 'content'],
pdfpublication => ['prism:publicationName', 'content', undef, 1],
pdfpublisher => ['dcterms:publisher', 'content'],
pdfpubstatus => '', # jav:journal_article_version
pdfpubtype => ['prism:aggregationType', 'content'],
pdfrendition => '', # xmpMM:RenditionClass
pdfsource => ['dcterms:source', 'content'],
pdfsubtitle => ['prism:subtitle', 'content', undef, 1],
pdftype => ['dcterms:type', 'content'],
pdfuapart => '', # pdfuaid:part
pdfurl => ['prism:url', 'content'],
pdfversionid => '', # xmpMM:VersionID
pdfvolumenum => ['prism:volume', 'content'],
pdfxstandard => '', # {pdfx,pdfxid}:GTS_PDFXVersion
);

1;
1 change: 1 addition & 0 deletions t/80_complex.t
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ latexml_tests("t/complex",
cleveref_minimal => 'cleveref.sty',
figure_dual_caption => {packages => 'graphicx.sty', texlive_min => 2021},
figure_mixed_content => {packages => 'graphicx.sty', texlive_min => 2021},
hyperxmp => 'babel.sty',
si => {
env=>'CI', # only runs in continuous integration
packages => 'siunitx.sty', texlive_min => 2015 } });
2 changes: 1 addition & 1 deletion t/complex/hypertest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,6 @@
<rdf about="" content="Not subject to copyright" property="dcterms:rights"/>
<rdf about="" content="test,hyperref" property="dcterms:subject"/>
<rdf about="" property="cc:licence" resource="http://creativecommons.org/licenses/by/3.0/"/>
<rdf about="" content="Testing LaTeXMLs processing of hyperref metadata" property="dcterms:subject"/>
<rdf about="" content="Testing LaTeXML's processing of hyperref metadata" property="dcterms:description"/>
<rdf about="" content="Test Hyperref Metadata" property="dcterms:title"/>
</document>
Binary file added t/complex/hyperxmp.pdf
Binary file not shown.
64 changes: 64 additions & 0 deletions t/complex/hyperxmp.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
% example copied from hyperxmp with minor modifications
\pdfobjcompresslevel=0 % added to easily inspect the XMP packet
\documentclass[british]{article} % added british to test pdflang and pdfmetalang
\usepackage[utf8]{inputenc}
\usepackage{babel}
\usepackage[unicode]{hyperref}
\usepackage{hyperxmp}
%
\title{%
On a heuristic viewpoint concerning the production and
transformation of light}
\author{Albert Einstein}
\date{March 17, 1905}
%
\hypersetup{%
pdfmetalang={en-AU}, % added to test pdfmetalang
pdftitle={%
On a heuristic viewpoint concerning the production and
transformation of light},
pdfsubtitle={[en-US]Putting that bum Maxwell in his place},
pdfauthor={Albert Einstein},
pdfauthortitle={\xmpquote{Technical Assistant\xmpcomma\ Level III}},
pdfdate={1905-03-17},
pdfcopyright={Copyright (C) 1905, Albert Einstein},
pdfsubject={photoelectric effect},
pdfkeywords={energy quanta, Hertz effect, quantum physics},
pdflicenseurl={http://creativecommons.org/licenses/by-nc-nd/3.0/},
pdfcaptionwriter={Scott Pakin},
pdfcontactaddress={Kramgasse 49},
pdfcontactcity={Bern},
pdfcontactpostcode={3011},
pdfcontactcountry={Switzerland},
pdfcontactphone={031 312 00 91},
pdfcontactemail={aeinstein@ipi.ch},
pdfcontacturl={%
http://einstein.biz/,
https://www.facebook.com/AlbertEinstein
},
pdfdocumentid={uuid:6d1ac9ec-4ff2-515a-954b-648eeb4853b0},
pdfversionid={2.998e8},
pdfpublication={[de]Annalen der Physik},
pdfpublisher={Wiley-VCH},
pdfpubtype={journal},
pdfvolumenum={322},
pdfissuenum={6},
pdfpagerange={132-148},
pdfissn={0003-3804},
pdfeissn={1521-3889},
pdfpubstatus={VoR},
pdflang={en-GB},
pdfurl={http://www.physik.uni-augsburg.de/annalen/history/einstein-papers/1905_17_132-148.pdf},
pdfdoi={10.1002/andp.19053220607},
pdfidentifier={info:lccn/50013519}
}
\XMPLangAlt{de}{pdftitle={Über einen die Erzeugung und Verwandlung des
Lichtes betreffenden heuristischen Gesichtspunkt}}
%
\begin{document}
\maketitle
A profound formal difference exists between the theoretical
concepts that physicists have formed about gases and other
ponderable bodies, and Maxwell's theory of electromagnetic
processes in so-called empty space\dots
\end{document}
43 changes: 43 additions & 0 deletions t/complex/hyperxmp.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
<?xml version="1.0" encoding="UTF-8"?>
<?latexml class="article" options="british"?>
<?latexml package="inputenc" options="utf8"?>
<?latexml package="babel"?>
<?latexml package="hyperref" options="unicode"?>
<?latexml package="hyperxmp"?>
<?latexml RelaxNGSchema="LaTeXML"?>
<document xmlns="http://dlmf.nist.gov/LaTeXML" class="ltx_authors_1line" prefix="cc: http://creativecommons.org/ns# dcterms: http://purl.org/dc/terms/ prism: http://prismstandard.org/namespaces/basic/3.0/" xml:lang="en-GB">
<resource src="LaTeXML.css" type="text/css"/>
<resource src="ltx-article.css" type="text/css"/>
<title>On a heuristic viewpoint concerning the production and
transformation of light</title>
<creator role="author">
<personname>Albert Einstein</personname>
</creator>
<date role="creation">March 17, 1905</date>
<para xml:id="p1">
<p>A profound formal difference exists between the theoretical
concepts that physicists have formed about gases and other
ponderable bodies, and Maxwell’s theory of electromagnetic
processes in so-called empty space…</p>
</para>
<rdf about="" content="Albert Einstein" property="dcterms:creator"/>
<rdf about="" content="Copyright (C) 1905, Albert Einstein" property="dcterms:rights" xml:lang="en-AU"/>
<rdf about="" content="1905-03-17" property="dcterms:date"/>
<rdf about="" content="10.1002/andp.19053220607" property="prism:doi"/>
<rdf about="" content="1521-3889" property="prism:eIssn"/>
<rdf about="" content="info:lccn/50013519" property="dcterms:identifier"/>
<rdf about="" content="0003-3804" property="prism:issn"/>
<rdf about="" content="6" property="prism:number"/>
<rdf about="" content="energy quanta, Hertz effect, quantum physics" property="dcterms:subject"/>
<rdf about="" content="en-GB" property="dcterms:language"/>
<rdf about="" property="cc:licence" resource="http://creativecommons.org/licenses/by-nc-nd/3.0/"/>
<rdf about="" content="132-148" property="prism:pageRange"/>
<rdf about="" content="Annalen der Physik" property="prism:publicationName" xml:lang="de"/>
<rdf about="" content="Wiley-VCH" property="dcterms:publisher"/>
<rdf about="" content="journal" property="prism:aggregationType"/>
<rdf about="" content="photoelectric effect" property="dcterms:description" xml:lang="en-AU"/>
<rdf about="" content="Putting that bum Maxwell in his place" property="prism:subtitle" xml:lang="en-US"/>
<rdf about="" content="On a heuristic viewpoint concerning the production and&#10;transformation of light" property="dcterms:title" xml:lang="en-AU"/>
<rdf about="" content="http://www.physik.uni-augsburg.de/annalen/history/einstein-papers/1905_17_132-148.pdf" property="prism:url"/>
<rdf about="" content="322" property="prism:volume"/>
</document>
Loading