From 072e61fd96e87120e3701802a7a67b91f42df186 Mon Sep 17 00:00:00 2001 From: Bruce Miller Date: Fri, 7 Jul 2023 10:45:04 -0400 Subject: [PATCH] More TeX-like: Handle \noexpand by prefixing \dont_expand, rather than wrapping token; Gullet never exposes \dont_expand, but (sometimes) returns a magic internal \special_relax, distinct from \relax --- lib/LaTeXML/Core/Definition/Expandable.pm | 10 +++---- lib/LaTeXML/Core/Gullet.pm | 13 ++++---- lib/LaTeXML/Core/State.pm | 5 ++-- lib/LaTeXML/Core/Token.pm | 23 ++------------- lib/LaTeXML/Package.pm | 3 +- lib/LaTeXML/Package/TeX.pool.ltxml | 36 ++++++++++++++--------- 6 files changed, 39 insertions(+), 51 deletions(-) diff --git a/lib/LaTeXML/Core/Definition/Expandable.pm b/lib/LaTeXML/Core/Definition/Expandable.pm index aaa965a518..72bc4cf393 100644 --- a/lib/LaTeXML/Core/Definition/Expandable.pm +++ b/lib/LaTeXML/Core/Definition/Expandable.pm @@ -29,9 +29,7 @@ sub new { if (ref $expansion eq 'LaTeXML::Core::Tokens') { Fatal('misdefined', $cs, $source, "Expansion of '" . ToString($cs) . "' has unbalanced {}", "Expansion is " . ToString($expansion)) unless $expansion->isBalanced; - # rescan for match tokens and unwrap dont_expand... - $expansion = $expansion->packParameters unless $traits{nopackParameters}; - } + $expansion = $expansion->packParameters unless $traits{nopackParameters}; } elsif (!ref $expansion) { $expansion = TokenizeInternal($expansion)->packParameters; } @@ -55,9 +53,9 @@ sub invoke { no warnings 'recursion'; my ($self, $gullet, $onceonly) = @_; # shortcut for "trivial" macros; but only if not tracing & profiling!!!! - my $_tracing = $STATE->lookupValue('TRACING') || 0; - my $tracing = ($_tracing & TRACE_MACROS); - my $profiled = ($_tracing & TRACE_PROFILE) && ($LaTeXML::CURRENT_TOKEN || $$self{cs}); + my $_tracing = $STATE->lookupValue('TRACING') || 0; + my $tracing = ($_tracing & TRACE_MACROS); + my $profiled = ($_tracing & TRACE_PROFILE) && ($LaTeXML::CURRENT_TOKEN || $$self{cs}); my $expansion = $$self{expansion}; my $etype = ref $expansion; my $result; diff --git a/lib/LaTeXML/Core/Gullet.pm b/lib/LaTeXML/Core/Gullet.pm index 141653fdbb..d1448f0a16 100644 --- a/lib/LaTeXML/Core/Gullet.pm +++ b/lib/LaTeXML/Core/Gullet.pm @@ -294,6 +294,9 @@ sub readToken { && $LaTeXML::READING_ALIGNMENT && (($atoken, $atype, $ahidden) = $self->isColumnEnd($token))) { $self->handleTemplate($LaTeXML::READING_ALIGNMENT, $token, $atype, $ahidden); } + elsif ((defined $token) && ($$token[1] == CC_CS) && ($$token[0] eq '\dont_expand')) { + my $unexpanded = $self->readToken; # Replace next token with a special \relax + return T_CS('\special_relax'); } else { last; } } return $token; } @@ -343,9 +346,9 @@ sub readXToken { if (!defined $token) { return unless $autoclose && $$self{autoclose} && @{ $$self{mouthstack} }; $self->closeMouth; } # Next input stream. - elsif (my $unexpanded = $$token[2]) { # Handle \noexpand; Inline get_dont_expand - return ($for_conditional && ($$unexpanded[1] == CC_ACTIVE) ? $unexpanded : T_CS('\relax')); - } + elsif (($cc == CC_CS) && ($$token[0] eq '\dont_expand')) { + my $unexpanded = $self->readToken; + return ($for_conditional && ($$unexpanded[1] == CC_ACTIVE) ? $unexpanded : T_CS('\special_relax')); } ## Wow!!!!! See TeX the Program \S 309 elsif (!$LaTeXML::ALIGN_STATE # SHOULD count nesting of { }!!! when SCANNED (not digested) && $LaTeXML::READING_ALIGNMENT @@ -421,8 +424,8 @@ sub readBalanced { if (!defined $token) { # What's the right error handling now? last; } - elsif (my $unexpanded = $$token[2]) { # Inline get_dont_expand - push(@tokens, $unexpanded); } + elsif (($cc == CC_CS) && ($$token[0] eq '\dont_expand')) { + push(@tokens, readToken($self)); } # Pass on NEXT token, unchanged. elsif ($cc == CC_END) { $level--; if (!$level) { diff --git a/lib/LaTeXML/Core/State.pm b/lib/LaTeXML/Core/State.pm index bc6dc8574f..3d8f7c4968 100644 --- a/lib/LaTeXML/Core/State.pm +++ b/lib/LaTeXML/Core/State.pm @@ -347,7 +347,6 @@ sub lookupMeaning { my ($self, $token) = @_; if (my $cs = $token && $CATCODE_ACTIVE_OR_CS[$$token[1]] - && !$$token[2] # return token itself, if \noexpand && $$token[0]) { my $e = $$self{meaning}{$cs}; return $e && $$e[0]; } else { return $token; } } @@ -422,7 +421,7 @@ sub lookupExpandable { return $defn; } return; } -# Whether token must be wrapped as dont_expand +# Whether token is affected by \noexpand sub isDontExpandable { my ($self, $token) = @_; # Basically: a CS or Active token that is either not defined, or is expandable @@ -466,7 +465,7 @@ sub lookupDigestableDefinition { # If a cs has been let to an executable token, lookup ITS defn. if (((ref $defn) eq 'LaTeXML::Core::Token') # If we're digesting an unexpanded, act like \relax - && ($lookupname = ($$defn[2] ? '\relax' : $CATCODE_EXECUTABLE_PRIMITIVE_NAME[$$defn[1]])) + && ($lookupname = $CATCODE_EXECUTABLE_PRIMITIVE_NAME[$$defn[1]]) && ($entry = $$self{meaning}{$lookupname})) { $defn = $$entry[0]; } return $defn; } diff --git a/lib/LaTeXML/Core/Token.pm b/lib/LaTeXML/Core/Token.pm index 0760f6ecd0..7dbb999700 100644 --- a/lib/LaTeXML/Core/Token.pm +++ b/lib/LaTeXML/Core/Token.pm @@ -290,21 +290,6 @@ sub substituteParameters { sub packParameters { return $_[0]; } -# Mark a token as not to be expanded (\noexpand) by hiding itself as the 3rd element of a new token. -# Wonder if this should only have effect on expandable tokens? -sub with_dont_expand { - my ($self) = @_; - my $cc = $$self[1]; - return ((($cc == CC_CS) || ($cc == CC_ACTIVE)) && $STATE->isDontExpandable($self)) - ? bless ['\relax', CC_CS, $self], 'LaTeXML::Core::Token' - : $self; } - -# Return the original token of a not-expanded token, -# or undef if it isn't marked as such. -sub get_dont_expand { - my ($self) = @_; - return $$self[2]; } - #====================================================================== # Note that this converts the string to a more `user readable' form using `standard' chars for catcodes. # We'll need to be careful about using string instead of reverting for internal purposes where the @@ -338,18 +323,14 @@ sub equals { (defined $b && (ref $a) eq (ref $b)) && ($$a[1] == $$b[1]) - && (($$a[1] == CC_SPACE) || ($$a[0] eq $$b[0])) - && ((!$$a[2]) == (!$$b[2])) # must have same dont-expand-edness - ; } + && (($$a[1] == CC_SPACE) || ($$a[0] eq $$b[0])); } -my @CONTROLNAME = ( #[CONSTANT] +my @CONTROLNAME = ( #[CONSTANT] qw( NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO SI DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US)); # Primarily for error reporting. sub stringify { my ($self) = @_; - if ($$self[2]) { - return $$self[2]->stringify() . " (dont expand)"; } my $string = $self->toString; # Make the token's char content more printable, since this is for error messages. if (length($string) == 1) { diff --git a/lib/LaTeXML/Package.pm b/lib/LaTeXML/Package.pm index 8b3e0317cf..57b1501ec3 100644 --- a/lib/LaTeXML/Package.pm +++ b/lib/LaTeXML/Package.pm @@ -384,8 +384,7 @@ sub Let { # If strings are given, assume CS tokens (most common case) $token1 = T_CS($token1) unless ref $token1; $token2 = T_CS($token2) unless ref $token2; - $STATE->assignMeaning($token1, - ($token2->get_dont_expand ? $token2 : $STATE->lookupMeaning($token2)), $scope); + $STATE->assignMeaning($token1, $STATE->lookupMeaning($token2), $scope); AfterAssignment(); return; } diff --git a/lib/LaTeXML/Package/TeX.pool.ltxml b/lib/LaTeXML/Package/TeX.pool.ltxml index 87ea02c3ba..67fbad11e3 100644 --- a/lib/LaTeXML/Package/TeX.pool.ltxml +++ b/lib/LaTeXML/Package/TeX.pool.ltxml @@ -809,10 +809,13 @@ DefConditionalI('\iffalse', undef, sub { 0; }); # This makes \relax disappear completely after digestion # (which seems most TeX like). DefPrimitive('\relax', sub { (); }); -## However, this keeps a box, so it can appear in UnTeX +### However, this keeps a box, so it can appear in UnTeX ### DefPrimitive('\relax',undef); ## But if you do that, you've got to watch out since it usually -## shouldn't be a box; See the isRelax code in handleScripts, below +### shouldn't be a box; See the isRelax code in handleScripts, below +# Internal token produced by Gullet in response to \dont_expand; +# Acts like \relax, but isn't equal to it. +DefPrimitiveI('\special_relax', undef, sub { (); }); DefMacro('\number Number', sub { Explode($_[1]->valueOf); }); # define it here (only approxmiately), since it's already useful. @@ -978,11 +981,20 @@ DefMacro('\expandafter Token Token', sub { else { ($tok, $xtok); } }); -# Replace the next token with it's not-expanded variant +# If next token is expandable, prefix it with the internal marker \dont_expand +# That token is never defined, explicitly handled in Gullet & should never escape the Gullet DefMacroI('\noexpand', undef, sub { my $token = $_[0]->readToken; # Missing token likely the result of "{\noexpand}" for which TeX would be unperturbed - return ($token ? $token->with_dont_expand : ()); }); + return ($token + ? ((($$token[1] == CC_CS) || ($$token[1] == CC_ACTIVE)) && $STATE->isDontExpandable($token) + ? (T_CS('\dont_expand'), $token) + : $token) + : ()); }); + +DefPrimitiveI('\dont_expand', undef, sub { + Error('misdefined', '\dont_expand', $_[0], + "The token \\dont_expand should never reach Stomach!"); }); DefMacroI('\topmark', undef, Tokens()); DefMacroI('\firstmark', undef, Tokens()); @@ -2472,21 +2484,17 @@ DefPrimitive('\lowercase GeneralText', sub { # Converts $tokens to a string in the fashion of \message and others: # doubles #, converts to string; optionally adds spaces after control sequences # in the spirit of the B Book, "show_token_list" routine, in 292. +# [This could be a $tokens->unpackParameters, but for the curious space treatment] sub writableTokens { my ($tokens) = @_; my @tokens = $tokens->unlist; - # unwrap a \noexpand-created \relax to its actual content, - # to avoid confusing users with a \relax dontexpand @tokens = map { - my $t = ($$_[2] || $_); - my $cc = $$t[1]; - if ($cc == CC_CS) { ($t, T_SPACE); } + my $cc = $$_[1]; + if ($cc == CC_CS) { ($_, T_SPACE); } elsif ($cc == CC_SPACE) { (T_SPACE); } - elsif ($cc == CC_PARAM) { ($t, $t); } - elsif ($cc == CC_ARG) { - # B Book, 294. Reduce to param+integer - (T_PARAM, T_OTHER($$t[0])); } - else { $t; } + elsif ($cc == CC_PARAM) { ($_, $_); } + elsif ($cc == CC_ARG) { (T_PARAM, T_OTHER($$_[0])); } + else { $_; } } @tokens; return UnTeX(Tokens(@tokens), 1); }