Skip to content

Commit

Permalink
Introduce caseInsensitive lexer rule option, fixes #3436
Browse files Browse the repository at this point in the history
  • Loading branch information
KvanTTT committed Dec 28, 2021
1 parent 6f41f9c commit 2ee4d10
Show file tree
Hide file tree
Showing 16 changed files with 261 additions and 151 deletions.
13 changes: 13 additions & 0 deletions doc/lexer-rules.md
Original file line number Diff line number Diff line change
Expand Up @@ -308,3 +308,16 @@ As of 4.5, you can also define channel names like enumerations with the followin
```
channels { WSCHANNEL, MYHIDDEN }
```
## Lexer Rule Options
### caseInsensitive
Defines if the current lexer rule is case-insensitive.
The argument can be `true`, `false`.
The option rewrites `caseInsensitive` grammar option value if it's defined.
```g4
options { caseInsensitive=true; }
STRING options { caseInsensitive=false; } : 'N'? '\'' (~'\'' | '\'\'')* '\''; // lower n is not allowed
```
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,26 @@ public void testSetUp() throws Exception {
checkLexerMatches(lg, inputString, "TOKEN, EOF");
}

@Test public void testCaseInsensitiveInLexerRule() throws Exception {
LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n" +
"TOKEN1 options { caseInsensitive=true; } : [a-f]+;\n" +
"WS: [ ]+ -> skip;"
);

checkLexerMatches(lg, "ABCDEF", "TOKEN1, EOF");
}

@Test public void testCaseInsensitiveInLexerRuleOverridesGlobalValue() {
String grammar =
"lexer grammar L;\n" +
"options { caseInsensitive=true; }\n" +
"STRING options { caseInsensitive=false; } : 'N'? '\\'' (~'\\'' | '\\'\\'')* '\\'';\n";

execLexer("L.g4", grammar, "L", "n'sample'");
assertEquals("line 1:0 token recognition error at: 'n'\n", getParseErrors());
}

protected void checkLexerMatches(LexerGrammar lg, String inputString, String expecting) {
ATN atn = createATN(lg, true);
CharStream input = CharStreams.fromString(inputString);
Expand Down
38 changes: 35 additions & 3 deletions tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java
Original file line number Diff line number Diff line change
Expand Up @@ -472,13 +472,45 @@ public void testLabelsForTokensWithMixedTypesLRWithoutLabels() {
testErrors(test, false);
}

@Test public void testIllegalModeOption() {
@Test public void testIllegalCaseInsensitiveOptionValue() {
String[] test = {
"lexer grammar L;\n" +
"options { caseInsensitive = badValue; }\n" +
"DEFAULT_TOKEN: [A-F]+;\n",
"TOKEN_1 options { caseInsensitive = badValue; } : [A-F]+;\n",

"warning(" + ErrorType.ILLEGAL_OPTION_VALUE.code + "): L.g4:2:28: unsupported option value caseInsensitive=badValue\n"
"warning(" + ErrorType.ILLEGAL_OPTION_VALUE.code + "): L.g4:2:28: unsupported option value caseInsensitive=badValue\n" +
"warning(" + ErrorType.ILLEGAL_OPTION_VALUE.code + "): L.g4:3:36: unsupported option value caseInsensitive=badValue\n"
};

testErrors(test, false);
}

@Test public void testRedundantCaseInsensitiveLexerRuleOption() {
String[] test = {
"lexer grammar L;\n" +
"options { caseInsensitive = true; }\n" +
"TOKEN options { caseInsensitive = true; } : [A-F]+;\n",

"warning(" + ErrorType.REDUNDANT_CASE_INSENSITIVE_LEXER_RULE_OPTION.code + "): L.g4:3:16: caseInsensitive lexer rule option is redundant because its value equals to global value (true)\n"
};
testErrors(test, false);

String[] test2 = {
"lexer grammar L;\n" +
"options { caseInsensitive = false; }\n" +
"TOKEN options { caseInsensitive = false; } : [A-F]+;\n",

"warning(" + ErrorType.REDUNDANT_CASE_INSENSITIVE_LEXER_RULE_OPTION.code + "): L.g4:3:16: caseInsensitive lexer rule option is redundant because its value equals to global value (false)\n"
};
testErrors(test2, false);
}

@Test public void testCaseInsensitiveOptionInParseRule() {
String[] test = {
"grammar G;\n" +
"root options { caseInsensitive=true; } : 'token';",

"warning(" + ErrorType.ILLEGAL_OPTION.code + "): G.g4:2:15: unsupported option caseInsensitive\n"
};

testErrors(test, false);
Expand Down
4 changes: 2 additions & 2 deletions tool/src/org/antlr/v4/Tool.java
Original file line number Diff line number Diff line change
Expand Up @@ -288,8 +288,8 @@ protected void handleOptionSetArg(String arg) {
errMgr.toolError(ErrorType.BAD_OPTION_SET_SYNTAX, arg);
return;
}
if ( Grammar.parserOptions.contains(option) ||
Grammar.lexerOptions.contains(option) )
if ( Grammar.ParserOptions.contains(option) ||
Grammar.LexerOptions.contains(option) )
{
if ( grammarOptions==null ) grammarOptions = new HashMap<String, String>();
grammarOptions.put(option, value);
Expand Down
55 changes: 21 additions & 34 deletions tool/src/org/antlr/v4/automata/LexerATNFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,6 @@ public class LexerATNFactory extends ParserATNFactory {

private final List<String> ruleCommands = new ArrayList<String>();

private final boolean caseInsensitive;

/**
* Maps from an action index to a {@link LexerAction} object.
*/
Expand All @@ -92,8 +90,6 @@ public LexerATNFactory(LexerGrammar g) {
super(g);
// use codegen to get correct language templates for lexer commands
String language = g.getOptionString("language");
String caseInsensitiveOption = g.getOptionString("caseInsensitive");
caseInsensitive = caseInsensitiveOption != null && caseInsensitiveOption.equals("true");
CodeGenerator gen = new CodeGenerator(g.tool, null, language);
codegenTemplates = gen.getTemplates();
}
Expand Down Expand Up @@ -207,51 +203,42 @@ public Handle lexerAltCommands(Handle alt, Handle cmds) {

@Override
public Handle lexerCallCommand(GrammarAST ID, GrammarAST arg) {
LexerAction lexerAction = createLexerAction(ID, arg);
if (lexerAction != null) {
return action(ID, lexerAction);
}

// fall back to standard action generation for the command
ST cmdST = codegenTemplates.getInstanceOf("Lexer" +
CharSupport.capitalize(ID.getText())+
"Command");
if (cmdST == null) {
g.tool.errMgr.grammarError(ErrorType.INVALID_LEXER_COMMAND, g.fileName, ID.token, ID.getText());
return epsilon(ID);
}

if (cmdST.impl.formalArguments == null || !cmdST.impl.formalArguments.containsKey("arg")) {
g.tool.errMgr.grammarError(ErrorType.UNWANTED_LEXER_COMMAND_ARGUMENT, g.fileName, ID.token, ID.getText());
return epsilon(ID);
}

cmdST.add("arg", arg.getText());
cmdST.add("grammar", arg.g);
return action(cmdST.render());
return lexerCallCommandOrCommand(ID, arg);
}

@Override
public Handle lexerCommand(GrammarAST ID) {
LexerAction lexerAction = createLexerAction(ID, null);
return lexerCallCommandOrCommand(ID, null);
}

private Handle lexerCallCommandOrCommand(GrammarAST ID, GrammarAST arg) {
LexerAction lexerAction = createLexerAction(ID, arg);
if (lexerAction != null) {
return action(ID, lexerAction);
}

// fall back to standard action generation for the command
ST cmdST = codegenTemplates.getInstanceOf("Lexer" +
CharSupport.capitalize(ID.getText()) +
CharSupport.capitalize(ID.getText())+
"Command");
if (cmdST == null) {
g.tool.errMgr.grammarError(ErrorType.INVALID_LEXER_COMMAND, g.fileName, ID.token, ID.getText());
return epsilon(ID);
}

if (cmdST.impl.formalArguments != null && cmdST.impl.formalArguments.containsKey("arg")) {
g.tool.errMgr.grammarError(ErrorType.MISSING_LEXER_COMMAND_ARGUMENT, g.fileName, ID.token, ID.getText());
boolean callCommand = arg != null;
boolean containsArg = cmdST.impl.formalArguments != null && cmdST.impl.formalArguments.containsKey("arg");
if (callCommand != containsArg) {
ErrorType errorType = callCommand ? ErrorType.UNWANTED_LEXER_COMMAND_ARGUMENT : ErrorType.MISSING_LEXER_COMMAND_ARGUMENT;
g.tool.errMgr.grammarError(errorType, g.fileName, ID.token, ID.getText());
return epsilon(ID);
}

if (callCommand) {
cmdST.add("arg", arg.getText());
cmdST.add("grammar", arg.g);
}

return action(cmdST.render());
}

Expand Down Expand Up @@ -279,7 +266,7 @@ public Handle set(GrammarAST associatedAST, List<GrammarAST> alts, boolean inver
int a = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(0).getText());
int b = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(1).getText());
if (checkRange((GrammarAST)t.getChild(0), (GrammarAST)t.getChild(1), a, b)) {
checkRangeAndAddToSet(associatedAST, t, set, a, b, caseInsensitive, null);
checkRangeAndAddToSet(associatedAST, t, set, a, b, currentRule.caseInsensitive, null);
}
}
else if ( t.getType()==ANTLRParser.LEXER_CHAR_SET ) {
Expand Down Expand Up @@ -567,11 +554,11 @@ private void applyPrevState(GrammarAST charSetAST, IntervalSet set, CharSetParse
}

private void checkCharAndAddToSet(GrammarAST ast, IntervalSet set, int c) {
checkRangeAndAddToSet(ast, ast, set, c, c, caseInsensitive, null);
checkRangeAndAddToSet(ast, ast, set, c, c, currentRule.caseInsensitive, null);
}

private void checkRangeAndAddToSet(GrammarAST mainAst, IntervalSet set, int a, int b) {
checkRangeAndAddToSet(mainAst, mainAst, set, a, b, caseInsensitive, null);
checkRangeAndAddToSet(mainAst, mainAst, set, a, b, currentRule.caseInsensitive, null);
}

private CharactersDataCheckStatus checkRangeAndAddToSet(GrammarAST rootAst, GrammarAST ast, IntervalSet set, int a, int b, boolean caseInsensitive, CharactersDataCheckStatus previousStatus) {
Expand Down Expand Up @@ -630,7 +617,7 @@ private CharactersDataCheckStatus checkRangeAndAddToSet(GrammarAST rootAst, Gram

private Transition createTransition(ATNState target, int from, int to, CommonTree tree) {
RangeBorderCharactersData charactersData = RangeBorderCharactersData.getAndCheckCharactersData(from, to, g, tree, true);
if (caseInsensitive) {
if (currentRule.caseInsensitive) {
if (charactersData.isSingleRange()) {
return CodePointTransitions.createWithCodePointRange(target, from, to);
}
Expand Down
24 changes: 18 additions & 6 deletions tool/src/org/antlr/v4/parse/ANTLRLexer.g
Original file line number Diff line number Diff line change
Expand Up @@ -163,14 +163,26 @@ import org.antlr.v4.runtime.misc.Interval;
* Return token or null if for some reason we can't find the start.
*/
public Token getRuleOrSubruleStartToken() {
if ( tokens==null ) return null;
if (tokens == null) return null;
int i = tokens.index();
int n = tokens.size();
if ( i>=n ) i = n-1; // seems index == n as we lex
while ( i>=0 && i<n) {
int n = tokens.size();
if (i >= n) i = n - 1; // seems index == n as we lex
boolean withinOptionsBlock = false;
while (i >= 0 && i < n) {
int ttype = tokens.get(i).getType();
if ( ttype == LPAREN || ttype == TOKEN_REF || ttype == RULE_REF ) {
return tokens.get(i);
if (withinOptionsBlock) {
// Ignore rule options content
if (ttype == OPTIONS) {
withinOptionsBlock = false;
}
}
else {
if (ttype == RBRACE) {
withinOptionsBlock = true;
}
else if (ttype == LPAREN || ttype == TOKEN_REF || ttype == RULE_REF) {
return tokens.get(i);
}
}
i--;
}
Expand Down
8 changes: 6 additions & 2 deletions tool/src/org/antlr/v4/parse/ANTLRParser.g
Original file line number Diff line number Diff line change
Expand Up @@ -520,9 +520,13 @@ lexerRule
paraphrases.pop();
}
: FRAGMENT?
TOKEN_REF COLON lexerRuleBlock SEMI
TOKEN_REF
optionsSpec?
COLON lexerRuleBlock SEMI
-> ^( RULE<RuleAST> TOKEN_REF
^(RULEMODIFIERS FRAGMENT)? lexerRuleBlock
^(RULEMODIFIERS FRAGMENT)? optionsSpec? lexerRuleBlock
)
;
Expand Down
5 changes: 3 additions & 2 deletions tool/src/org/antlr/v4/parse/GrammarTreeVisitor.g
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ public void discoverRule(RuleAST rule, GrammarAST ID, List<GrammarAST> modifiers
List<GrammarAST> actions,
GrammarAST block) { }
public void finishRule(RuleAST rule, GrammarAST ID, GrammarAST block) { }
public void discoverLexerRule(RuleAST rule, GrammarAST ID, List<GrammarAST> modifiers,
public void discoverLexerRule(RuleAST rule, GrammarAST ID, List<GrammarAST> modifiers, GrammarAST options,
GrammarAST block) { }
public void finishLexerRule(RuleAST rule, GrammarAST ID, GrammarAST block) { }
public void ruleCatch(GrammarAST arg, ActionAST action) { }
Expand Down Expand Up @@ -525,7 +525,8 @@ lexerRule
: ^( RULE TOKEN_REF
{currentRuleName=$TOKEN_REF.text; currentRuleAST=$RULE;}
(^(RULEMODIFIERS m=FRAGMENT {mods.add($m);}))?
{discoverLexerRule((RuleAST)$RULE, $TOKEN_REF, mods, (GrammarAST)input.LT(1));}
opts=optionsSpec*
{discoverLexerRule((RuleAST)$RULE, $TOKEN_REF, mods, $opts.start, (GrammarAST)input.LT(1));}
lexerRuleBlock
{
finishLexerRule((RuleAST)$RULE, $TOKEN_REF, $lexerRuleBlock.start);
Expand Down
Loading

0 comments on commit 2ee4d10

Please sign in to comment.