Skip to content

Commit

Permalink
More ligatures (#2156)
Browse files Browse the repository at this point in the history
* Add ? text ligatures:

* Report text ligatures when --debug=document

* Add upside down \! and ? to ligatures test case
  • Loading branch information
brucemiller authored Jul 22, 2023
1 parent dbef02e commit 0471b66
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 10 deletions.
1 change: 1 addition & 0 deletions lib/LaTeXML/Core/Document.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1243,6 +1243,7 @@ sub closeText_internal {
next if ($fonttest = $$ligature{fontTest}) && !&$fonttest($font);
$string = &{ $$ligature{code} }($string); } }
$node->setData($string) unless $string eq $ostring;
Debug("LIGATURE $ostring => $string") if $LaTeXML::DEBUG{document} && ($string ne $ostring);
$$self{node} = $parent; # Effectively closed (->setNode, but don't recurse)
return $parent; }
else {
Expand Down
34 changes: 24 additions & 10 deletions lib/LaTeXML/Package/TeX.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -7112,19 +7112,33 @@ DefMathI('\to', undef, "\x{2192}", role => 'ARROW'); # RIGHTWARDS ARROW??? a bit
DefPrimitive('\@@endash', sub { Box("\x{2013}", undef, undef, T_CS('\@@endash')); });
DefPrimitive('\@@emdash', sub { Box("\x{2014}", undef, undef, T_CS('\@@emdash')); });

DefLigature(qr{--}, "\x{2013}",
fontTest => sub { $_[0]->getFamily ne 'typewriter'; }); # EN DASH (NOTE: With digits before & aft => \N{FIGURE DASH})
DefLigature(qr{---}, "\x{2014}",
fontTest => sub { $_[0]->getFamily ne 'typewriter'; }); # EM DASH
sub nonTypewriter {
my ($font) = @_;
return ($font->getFamily ne 'typewriter'); }

sub nonTypewriterT1 {
my ($font) = @_;
return ($font->getFamily ne 'typewriter') && (($font->getEncoding || 'OT1') =~ /^(OT1|T1)$/); }

# EN DASH (NOTE: With digits before & aft => \N{FIGURE DASH})
DefLigature(qr{--}, "\x{2013}", fontTest => \&nonTypewriter); # EN dash
DefLigature(qr{---}, "\x{2014}", fontTest => \&nonTypewriter); # EM dash

# Ligatures for doubled single left & right quotes to convert to double quotes
# [should ligatures be part of a font, in the first place? (it is in TeX!)
DefLigature(qr{\x{2018}\x{2018}}, "\x{201C}",
fontTest => sub { ($_[0]->getFamily ne 'typewriter')
&& (($_[0]->getEncoding || 'OT1') =~ /^(OT1|T1)$/); }); # is this needed?
DefLigature(qr{\x{2019}\x{2019}}, "\x{201D}",
fontTest => sub { ($_[0]->getFamily ne 'typewriter')
&& (($_[0]->getEncoding || 'OT1') =~ /^(OT1|T1)$/); });
DefLigature(qr{\x{2018}\x{2018}}, "\x{201C}", fontTest => \&nonTypewriterT1); # double left quote
DefLigature(qr{\x{2019}\x{2019}}, "\x{201D}", fontTest => \&nonTypewriterT1); # double right quote
DefLigature(qr{\?\x{2018}}, UTF(0xBF), fontTest => \&nonTypewriterT1); # ? backquote
DefLigature(qr{!\x{2018}}, UTF(0xA1), fontTest => \&nonTypewriterT1); # ! backquote
# These ligatures are also handled by TeX.
# However, it appears that decent modern fonts in modern browsers handle these at that level.
# So it's likely not worth doing it at the conversion level, possibly adversely affecting search.
# DefLigature(qr{ff}, "\x{FB00}", fontTest => \&nonTypewriterT1);
# DefLigature(qr{fi}, "\x{FB01}", fontTest => \&nonTypewriterT1);
# DefLigature(qr{fl}, "\x{FB02}", fontTest => \&nonTypewriterT1);
# DefLigature(qr{ffi}, "\x{FB03}", fontTest => \&nonTypewriterT1);
# DefLigature(qr{ffl}, "\x{FB04}", fontTest => \&nonTypewriterT1);

DefConstructor('\TeX',
"<ltx:text class='ltx_TeX_logo' cssstyle='letter-spacing:-0.2em; margin-right:0.2em'>"
. "T<ltx:text yoffset='-0.4ex'>E</ltx:text>X</ltx:text>",
Expand Down
Binary file modified t/tokenize/ligatures.pdf
Binary file not shown.
2 changes: 2 additions & 0 deletions t/tokenize/ligatures.tex
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ \section{Text Ligatures}

A ``quote'' like this.

?`Espa\~nol? !`Hola!

\section{Ignore comments}
%foo
`%bar
Expand Down
3 changes: 3 additions & 0 deletions t/tokenize/ligatures.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ In interjection — like this — gets em-dash.</p>
<para xml:id="S1.p3">
<p>A “quote” like this.</p>
</para>
<para xml:id="S1.p4">
<p>¿Español? ¡Hola!</p>
</para>
</section>
<section inlist="toc" xml:id="S2">
<tags>
Expand Down

0 comments on commit 0471b66

Please sign in to comment.