Skip to content

Commit

Permalink
More TeX-like: Handle \noexpand by prefixing \dont_expand, rather tha…
Browse files Browse the repository at this point in the history
…n wrapping token; Gullet never exposes \dont_expand, but (sometimes) returns a magic internal \special_relax, distinct from \relax
  • Loading branch information
brucemiller committed Jul 7, 2023
1 parent ce15359 commit 072e61f
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 51 deletions.
10 changes: 4 additions & 6 deletions lib/LaTeXML/Core/Definition/Expandable.pm
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@ sub new {
if (ref $expansion eq 'LaTeXML::Core::Tokens') {
Fatal('misdefined', $cs, $source, "Expansion of '" . ToString($cs) . "' has unbalanced {}",
"Expansion is " . ToString($expansion)) unless $expansion->isBalanced;
# rescan for match tokens and unwrap dont_expand...
$expansion = $expansion->packParameters unless $traits{nopackParameters};
}
$expansion = $expansion->packParameters unless $traits{nopackParameters}; }
elsif (!ref $expansion) {
$expansion = TokenizeInternal($expansion)->packParameters; }

Expand All @@ -55,9 +53,9 @@ sub invoke {
no warnings 'recursion';
my ($self, $gullet, $onceonly) = @_;
# shortcut for "trivial" macros; but only if not tracing & profiling!!!!
my $_tracing = $STATE->lookupValue('TRACING') || 0;
my $tracing = ($_tracing & TRACE_MACROS);
my $profiled = ($_tracing & TRACE_PROFILE) && ($LaTeXML::CURRENT_TOKEN || $$self{cs});
my $_tracing = $STATE->lookupValue('TRACING') || 0;
my $tracing = ($_tracing & TRACE_MACROS);
my $profiled = ($_tracing & TRACE_PROFILE) && ($LaTeXML::CURRENT_TOKEN || $$self{cs});
my $expansion = $$self{expansion};
my $etype = ref $expansion;
my $result;
Expand Down
13 changes: 8 additions & 5 deletions lib/LaTeXML/Core/Gullet.pm
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,9 @@ sub readToken {
&& $LaTeXML::READING_ALIGNMENT
&& (($atoken, $atype, $ahidden) = $self->isColumnEnd($token))) {
$self->handleTemplate($LaTeXML::READING_ALIGNMENT, $token, $atype, $ahidden); }
elsif ((defined $token) && ($$token[1] == CC_CS) && ($$token[0] eq '\dont_expand')) {
my $unexpanded = $self->readToken; # Replace next token with a special \relax
return T_CS('\special_relax'); }
else {
last; } }
return $token; }
Expand Down Expand Up @@ -343,9 +346,9 @@ sub readXToken {
if (!defined $token) {
return unless $autoclose && $$self{autoclose} && @{ $$self{mouthstack} };
$self->closeMouth; } # Next input stream.
elsif (my $unexpanded = $$token[2]) { # Handle \noexpand; Inline get_dont_expand
return ($for_conditional && ($$unexpanded[1] == CC_ACTIVE) ? $unexpanded : T_CS('\relax'));
}
elsif (($cc == CC_CS) && ($$token[0] eq '\dont_expand')) {
my $unexpanded = $self->readToken;
return ($for_conditional && ($$unexpanded[1] == CC_ACTIVE) ? $unexpanded : T_CS('\special_relax')); }
## Wow!!!!! See TeX the Program \S 309
elsif (!$LaTeXML::ALIGN_STATE # SHOULD count nesting of { }!!! when SCANNED (not digested)
&& $LaTeXML::READING_ALIGNMENT
Expand Down Expand Up @@ -421,8 +424,8 @@ sub readBalanced {
if (!defined $token) {
# What's the right error handling now?
last; }
elsif (my $unexpanded = $$token[2]) { # Inline get_dont_expand
push(@tokens, $unexpanded); }
elsif (($cc == CC_CS) && ($$token[0] eq '\dont_expand')) {
push(@tokens, readToken($self)); } # Pass on NEXT token, unchanged.
elsif ($cc == CC_END) {
$level--;
if (!$level) {
Expand Down
5 changes: 2 additions & 3 deletions lib/LaTeXML/Core/State.pm
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,6 @@ sub lookupMeaning {
my ($self, $token) = @_;
if (my $cs = $token
&& $CATCODE_ACTIVE_OR_CS[$$token[1]]
&& !$$token[2] # return token itself, if \noexpand
&& $$token[0]) {
my $e = $$self{meaning}{$cs}; return $e && $$e[0]; }
else { return $token; } }
Expand Down Expand Up @@ -422,7 +421,7 @@ sub lookupExpandable {
return $defn; }
return; }

# Whether token must be wrapped as dont_expand
# Whether token is affected by \noexpand
sub isDontExpandable {
my ($self, $token) = @_;
# Basically: a CS or Active token that is either not defined, or is expandable
Expand Down Expand Up @@ -466,7 +465,7 @@ sub lookupDigestableDefinition {
# If a cs has been let to an executable token, lookup ITS defn.
if (((ref $defn) eq 'LaTeXML::Core::Token')
# If we're digesting an unexpanded, act like \relax
&& ($lookupname = ($$defn[2] ? '\relax' : $CATCODE_EXECUTABLE_PRIMITIVE_NAME[$$defn[1]]))
&& ($lookupname = $CATCODE_EXECUTABLE_PRIMITIVE_NAME[$$defn[1]])
&& ($entry = $$self{meaning}{$lookupname})) {
$defn = $$entry[0]; }
return $defn; }
Expand Down
23 changes: 2 additions & 21 deletions lib/LaTeXML/Core/Token.pm
Original file line number Diff line number Diff line change
Expand Up @@ -290,21 +290,6 @@ sub substituteParameters {

sub packParameters { return $_[0]; }

# Mark a token as not to be expanded (\noexpand) by hiding itself as the 3rd element of a new token.
# Wonder if this should only have effect on expandable tokens?
sub with_dont_expand {
my ($self) = @_;
my $cc = $$self[1];
return ((($cc == CC_CS) || ($cc == CC_ACTIVE)) && $STATE->isDontExpandable($self))
? bless ['\relax', CC_CS, $self], 'LaTeXML::Core::Token'
: $self; }

# Return the original token of a not-expanded token,
# or undef if it isn't marked as such.
sub get_dont_expand {
my ($self) = @_;
return $$self[2]; }

#======================================================================
# Note that this converts the string to a more `user readable' form using `standard' chars for catcodes.
# We'll need to be careful about using string instead of reverting for internal purposes where the
Expand Down Expand Up @@ -338,18 +323,14 @@ sub equals {
(defined $b
&& (ref $a) eq (ref $b))
&& ($$a[1] == $$b[1])
&& (($$a[1] == CC_SPACE) || ($$a[0] eq $$b[0]))
&& ((!$$a[2]) == (!$$b[2])) # must have same dont-expand-edness
; }
&& (($$a[1] == CC_SPACE) || ($$a[0] eq $$b[0])); }

my @CONTROLNAME = ( #[CONSTANT]
my @CONTROLNAME = ( #[CONSTANT]
qw( NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO SI
DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US));
# Primarily for error reporting.
sub stringify {
my ($self) = @_;
if ($$self[2]) {
return $$self[2]->stringify() . " (dont expand)"; }
my $string = $self->toString;
# Make the token's char content more printable, since this is for error messages.
if (length($string) == 1) {
Expand Down
3 changes: 1 addition & 2 deletions lib/LaTeXML/Package.pm
Original file line number Diff line number Diff line change
Expand Up @@ -384,8 +384,7 @@ sub Let {
# If strings are given, assume CS tokens (most common case)
$token1 = T_CS($token1) unless ref $token1;
$token2 = T_CS($token2) unless ref $token2;
$STATE->assignMeaning($token1,
($token2->get_dont_expand ? $token2 : $STATE->lookupMeaning($token2)), $scope);
$STATE->assignMeaning($token1, $STATE->lookupMeaning($token2), $scope);
AfterAssignment();
return; }

Expand Down
36 changes: 22 additions & 14 deletions lib/LaTeXML/Package/TeX.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -809,10 +809,13 @@ DefConditionalI('\iffalse', undef, sub { 0; });
# This makes \relax disappear completely after digestion
# (which seems most TeX like).
DefPrimitive('\relax', sub { (); });
## However, this keeps a box, so it can appear in UnTeX
### However, this keeps a box, so it can appear in UnTeX
### DefPrimitive('\relax',undef);
## But if you do that, you've got to watch out since it usually
## shouldn't be a box; See the isRelax code in handleScripts, below
### shouldn't be a box; See the isRelax code in handleScripts, below
# Internal token produced by Gullet in response to \dont_expand;
# Acts like \relax, but isn't equal to it.
DefPrimitiveI('\special_relax', undef, sub { (); });

DefMacro('\number Number', sub { Explode($_[1]->valueOf); });
# define it here (only approxmiately), since it's already useful.
Expand Down Expand Up @@ -978,11 +981,20 @@ DefMacro('\expandafter Token Token', sub {
else {
($tok, $xtok); } });

# Replace the next token with it's not-expanded variant
# If next token is expandable, prefix it with the internal marker \dont_expand
# That token is never defined, explicitly handled in Gullet & should never escape the Gullet
DefMacroI('\noexpand', undef, sub {
my $token = $_[0]->readToken;
# Missing token likely the result of "{\noexpand}" for which TeX would be unperturbed
return ($token ? $token->with_dont_expand : ()); });
return ($token
? ((($$token[1] == CC_CS) || ($$token[1] == CC_ACTIVE)) && $STATE->isDontExpandable($token)
? (T_CS('\dont_expand'), $token)
: $token)
: ()); });

DefPrimitiveI('\dont_expand', undef, sub {
Error('misdefined', '\dont_expand', $_[0],
"The token \\dont_expand should never reach Stomach!"); });

DefMacroI('\topmark', undef, Tokens());
DefMacroI('\firstmark', undef, Tokens());
Expand Down Expand Up @@ -2472,21 +2484,17 @@ DefPrimitive('\lowercase GeneralText', sub {
# Converts $tokens to a string in the fashion of \message and others:
# doubles #, converts to string; optionally adds spaces after control sequences
# in the spirit of the B Book, "show_token_list" routine, in 292.
# [This could be a $tokens->unpackParameters, but for the curious space treatment]
sub writableTokens {
my ($tokens) = @_;
my @tokens = $tokens->unlist;
# unwrap a \noexpand-created \relax to its actual content,
# to avoid confusing users with a \relax dontexpand
@tokens = map {
my $t = ($$_[2] || $_);
my $cc = $$t[1];
if ($cc == CC_CS) { ($t, T_SPACE); }
my $cc = $$_[1];
if ($cc == CC_CS) { ($_, T_SPACE); }
elsif ($cc == CC_SPACE) { (T_SPACE); }
elsif ($cc == CC_PARAM) { ($t, $t); }
elsif ($cc == CC_ARG) {
# B Book, 294. Reduce to param+integer
(T_PARAM, T_OTHER($$t[0])); }
else { $t; }
elsif ($cc == CC_PARAM) { ($_, $_); }
elsif ($cc == CC_ARG) { (T_PARAM, T_OTHER($$_[0])); }
else { $_; }
} @tokens;
return UnTeX(Tokens(@tokens), 1); }

Expand Down

0 comments on commit 072e61f

Please sign in to comment.