From 632ff01bbb0c56db7ca7041c67a61234b9462d06 Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Fri, 1 Jul 2022 21:25:13 +0800 Subject: [PATCH] [enhancement](Nereids) add post porcessor and error listener to prser (#10306) add parser error listener and post processor to parser error listener: - throw exception when parser find unexpected syntax post processor: - throw exception when find error indent - replace '``' with '`' in quoted identifier - replace non reserved key word with normal identifier --- .../nereids/errors/QueryParsingErrors.java | 31 ++++++++ .../nereids/exceptions/AnalysisException.java | 49 ++++++++++++- .../nereids/exceptions/ParseException.java | 72 +++++++++++++++++++ .../doris/nereids/parser/NereidsParser.java | 33 +++++---- .../Origin.java} | 18 +++-- .../nereids/parser/ParseErrorListener.java | 45 ++++++++++++ .../doris/nereids/parser/ParserUtils.java | 12 ++++ .../doris/nereids/parser/PostProcessor.java | 71 ++++++++++++++++++ .../nereids/parser/NereidsParserTest.java | 29 ++++++-- .../rewrite/ExpressionRewriteTest.java | 4 +- .../expressions/ExpressionParserTest.java | 2 +- 11 files changed, 334 insertions(+), 32 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/errors/QueryParsingErrors.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/exceptions/ParseException.java rename fe/fe-core/src/main/java/org/apache/doris/nereids/{exceptions/ParsingException.java => parser/Origin.java} (62%) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/parser/ParseErrorListener.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/parser/PostProcessor.java diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/errors/QueryParsingErrors.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/errors/QueryParsingErrors.java new file mode 100644 index 00000000000000..7d63a84a63a9d1 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/errors/QueryParsingErrors.java @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.errors; + +import org.apache.doris.nereids.DorisParser.ErrorIdentContext; +import org.apache.doris.nereids.exceptions.ParseException; + +/** + * Exception packaging to improve code readability. + */ +public class QueryParsingErrors { + public static ParseException unquotedIdentifierError(String ident, ErrorIdentContext ctx) { + return new ParseException(String.format("Possibly unquoted identifier %s detected. " + + "Please consider quoting it with back-quotes as `%s`", ident, ident), ctx); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/exceptions/AnalysisException.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/exceptions/AnalysisException.java index 16e0ef8482230a..ce2310f6ddbece 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/exceptions/AnalysisException.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/exceptions/AnalysisException.java @@ -17,15 +17,58 @@ package org.apache.doris.nereids.exceptions; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; + +import java.util.Optional; + /** Nereids's AnalysisException. */ public class AnalysisException extends RuntimeException { + private final String message; + private final Optional line; + private final Optional startPosition; + private final Optional plan; - public AnalysisException(String msg, Throwable cause) { - super(msg, cause); + public AnalysisException(String message, Throwable cause, Optional line, + Optional startPosition, Optional plan) { + super(message, cause); + this.message = message; + this.line = line; + this.startPosition = startPosition; + this.plan = plan; } - public AnalysisException(String message) { + public AnalysisException(String message, Optional line, + Optional startPosition, Optional plan) { super(message); + this.message = message; + this.line = line; + this.startPosition = startPosition; + this.plan = plan; + } + + + public AnalysisException(String message, Throwable cause) { + this(message, cause, Optional.empty(), Optional.empty(), Optional.empty()); + } + + public AnalysisException(String message) { + this(message, Optional.empty(), Optional.empty(), Optional.empty()); + } + + @Override + public String getMessage() { + String planAnnotation = plan.map(p -> ";\n" + p.treeString()).orElse(""); + return getSimpleMessage() + planAnnotation; + } + + private String getSimpleMessage() { + if (line.isPresent() || startPosition.isPresent()) { + String lineAnnotation = line.map(l -> "line " + l).orElse(""); + String positionAnnotation = startPosition.map(s -> " pos " + s).orElse(""); + return message + ";" + lineAnnotation + positionAnnotation; + } else { + return message; + } } // TODO: support ErrorCode diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/exceptions/ParseException.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/exceptions/ParseException.java new file mode 100644 index 00000000000000..6eefc492b39b66 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/exceptions/ParseException.java @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.exceptions; + +import org.apache.doris.nereids.parser.Origin; +import org.apache.doris.nereids.parser.ParserUtils; + +import org.antlr.v4.runtime.ParserRuleContext; + +import java.util.Optional; + +/** + * sql parsing exception. + */ +public class ParseException extends AnalysisException { + private final String message; + private final Origin start; + private final Optional command; + + public ParseException(String message, Origin start, Optional command) { + super(message, start.line, start.startPosition, Optional.empty()); + this.message = message; + this.start = start; + this.command = command; + } + + public ParseException(String message, ParserRuleContext ctx) { + this(message, ParserUtils.position(ctx.getStart()), Optional.of(ParserUtils.command(ctx))); + } + + @Override + public String getMessage() { + StringBuilder sb = new StringBuilder(); + sb.append("\n").append(message); + if (start.line.isPresent() && start.startPosition.isPresent()) { + int line = start.line.get(); + int startPosition = start.startPosition.get(); + sb.append("(line ").append(line).append(", pos").append(startPosition).append(")").append("\n"); + if (command.isPresent()) { + sb.append("\n== SQL ==\n"); + String cmd = command.get(); + String[] splitCmd = cmd.split("\n"); + for (int i = 0; i < line; i++) { + sb.append(splitCmd[i]).append("\n"); + } + for (int i = 0; i < startPosition; i++) { + sb.append("-"); + } + sb.append("^^^\n"); + for (int i = line; i < splitCmd.length; i++) { + sb.append(splitCmd[i]).append("\n"); + } + } + } + return sb.toString(); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java index 5f294cafa4cd3e..0b12b65d1cfe01 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java @@ -20,7 +20,6 @@ import org.apache.doris.analysis.StatementBase; import org.apache.doris.nereids.DorisLexer; import org.apache.doris.nereids.DorisParser; -import org.apache.doris.nereids.exceptions.ParsingException; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.logical.LogicalPlanAdapter; @@ -39,6 +38,8 @@ * Sql parser, convert sql DSL to logical plan. */ public class NereidsParser { + private static final ParseErrorListener PARSE_ERROR_LISTENER = new ParseErrorListener(); + private static final PostProcessor POST_PROCESSOR = new PostProcessor(); /** * In MySQL protocol, client could send multi-statement in. @@ -61,31 +62,33 @@ public List parseSQL(String originStr) throws Exception { * @param sql sql string * @return logical plan */ - public LogicalPlan parseSingle(String sql) throws Exception { + public LogicalPlan parseSingle(String sql) { return (LogicalPlan) parse(sql, DorisParser::singleStatement); } - public List parseMultiple(String sql) throws Exception { + public List parseMultiple(String sql) { return (List) parse(sql, DorisParser::multiStatements); } + public Expression parseExpression(String expression) { + return (Expression) parse(expression, DorisParser::expression); + } + private Object parse(String sql, Function parseFunction) { - try { - ParserRuleContext tree = toAst(sql, parseFunction); - LogicalPlanBuilder logicalPlanBuilder = new LogicalPlanBuilder(); - return logicalPlanBuilder.visit(tree); - } catch (StackOverflowError e) { - throw new ParsingException(e.getMessage()); - } + ParserRuleContext tree = toAst(sql, parseFunction); + LogicalPlanBuilder logicalPlanBuilder = new LogicalPlanBuilder(); + return logicalPlanBuilder.visit(tree); } private ParserRuleContext toAst(String sql, Function parseFunction) { DorisLexer lexer = new DorisLexer(new CaseInsensitiveStream(CharStreams.fromString(sql))); CommonTokenStream tokenStream = new CommonTokenStream(lexer); DorisParser parser = new DorisParser(tokenStream); - // parser.addParseListener(PostProcessor) - // parser.removeErrorListeners() - // parser.addErrorListener(ParseErrorListener) + + parser.addParseListener(POST_PROCESSOR); + parser.removeErrorListeners(); + parser.addErrorListener(PARSE_ERROR_LISTENER); + ParserRuleContext tree; try { // first, try parsing with potentially faster SLL mode @@ -101,8 +104,4 @@ private ParserRuleContext toAst(String sql, Function line; + public final Optional startPosition; - public ParsingException(String message) { - super(message); + public Origin(int line, int startPosition) { + this(Optional.of(line), Optional.of(startPosition)); } + public Origin(Optional line, Optional startPosition) { + this.line = line; + this.startPosition = startPosition; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/ParseErrorListener.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/ParseErrorListener.java new file mode 100644 index 00000000000000..749b37eae0c214 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/ParseErrorListener.java @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.parser; + +import org.apache.doris.nereids.exceptions.ParseException; + +import org.antlr.v4.runtime.BaseErrorListener; +import org.antlr.v4.runtime.CommonToken; +import org.antlr.v4.runtime.RecognitionException; +import org.antlr.v4.runtime.Recognizer; + +import java.util.Optional; + +/** + * Listen parse error, and throw {@link ParseException} with reasonable message. + */ +public class ParseErrorListener extends BaseErrorListener { + @Override + public void syntaxError(Recognizer recognizer, Object offendingSymbol, int line, int charPositionInLine, + String msg, RecognitionException e) { + Origin start; + if (offendingSymbol instanceof CommonToken) { + CommonToken token = (CommonToken) offendingSymbol; + start = new Origin(line, token.getCharPositionInLine()); + } else { + start = new Origin(line, charPositionInLine); + } + throw new ParseException(msg, start, Optional.empty()); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/ParserUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/ParserUtils.java index 5cb1a488ab2185..0afc7ddce15a5d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/ParserUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/ParserUtils.java @@ -17,7 +17,10 @@ package org.apache.doris.nereids.parser; +import org.antlr.v4.runtime.CharStream; import org.antlr.v4.runtime.ParserRuleContext; +import org.antlr.v4.runtime.Token; +import org.antlr.v4.runtime.misc.Interval; import java.util.function.Supplier; @@ -28,4 +31,13 @@ public class ParserUtils { public static T withOrigin(ParserRuleContext ctx, Supplier f) { return f.get(); } + + public static String command(ParserRuleContext ctx) { + CharStream stream = ctx.getStart().getInputStream(); + return stream.getText(Interval.of(0, stream.size() - 1)); + } + + public static Origin position(Token token) { + return new Origin(token.getLine(), token.getCharPositionInLine()); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/PostProcessor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/PostProcessor.java new file mode 100644 index 00000000000000..d94fb545081f08 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/PostProcessor.java @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.parser; + +import org.apache.doris.nereids.DorisParser; +import org.apache.doris.nereids.DorisParser.ErrorIdentContext; +import org.apache.doris.nereids.DorisParser.NonReservedContext; +import org.apache.doris.nereids.DorisParser.QuotedIdentifierContext; +import org.apache.doris.nereids.DorisParserBaseListener; +import org.apache.doris.nereids.errors.QueryParsingErrors; + +import org.antlr.v4.runtime.CommonToken; +import org.antlr.v4.runtime.ParserRuleContext; +import org.antlr.v4.runtime.Token; +import org.antlr.v4.runtime.tree.TerminalNodeImpl; + +import java.util.function.Function; + +/** + * Do some post processor after parse to facilitate subsequent analysis. + */ +public class PostProcessor extends DorisParserBaseListener { + @Override + public void exitErrorIdent(ErrorIdentContext ctx) { + String ident = ctx.getParent().getText(); + throw QueryParsingErrors.unquotedIdentifierError(ident, ctx); + } + + @Override + public void exitQuotedIdentifier(QuotedIdentifierContext ctx) { + replaceTokenByIdentifier(ctx, 1, token -> { + // Remove the double back ticks in the string. + token.setText(token.getText().replace("``", "`")); + return token; + }); + } + + @Override + public void exitNonReserved(NonReservedContext ctx) { + replaceTokenByIdentifier(ctx, 0, i -> i); + } + + private void replaceTokenByIdentifier(ParserRuleContext ctx, int stripMargins, + Function f) { + ParserRuleContext parent = ctx.getParent(); + parent.removeLastChild(); + Token token = (Token) (ctx.getChild(0).getPayload()); + CommonToken newToken = new CommonToken( + new org.antlr.v4.runtime.misc.Pair<>(token.getTokenSource(), token.getInputStream()), + DorisParser.IDENTIFIER, + token.getChannel(), + token.getStartIndex() + stripMargins, + token.getStopIndex() - stripMargins); + parent.addChild(new TerminalNodeImpl(f.apply(newToken))); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java index f1104e97dab370..ff1850921c1bc1 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java @@ -17,9 +17,10 @@ package org.apache.doris.nereids.parser; +import org.apache.doris.nereids.exceptions.ParseException; +import org.apache.doris.nereids.operators.plans.logical.LogicalProject; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; -import org.junit.Assert; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -28,7 +29,7 @@ public class NereidsParserTest { @Test - public void testParseMultiple() throws Exception { + public void testParseMultiple() { NereidsParser nereidsParser = new NereidsParser(); String sql = "SELECT b FROM test;SELECT a FROM test;"; List logicalPlanList = nereidsParser.parseMultiple(sql); @@ -36,7 +37,7 @@ public void testParseMultiple() throws Exception { } @Test - public void testSingle() throws Exception { + public void testSingle() { NereidsParser nereidsParser = new NereidsParser(); String sql = "SELECT * FROM test;"; Exception exceptionOccurred = null; @@ -46,6 +47,26 @@ public void testSingle() throws Exception { exceptionOccurred = e; e.printStackTrace(); } - Assert.assertNull(exceptionOccurred); + Assertions.assertNull(exceptionOccurred); + } + + @Test + public void testErrorListener() { + Exception exception = Assertions.assertThrows(ParseException.class, () -> { + String sql = "select * from t1 where a = 1 illegal_symbol"; + NereidsParser nereidsParser = new NereidsParser(); + nereidsParser.parseSingle(sql); + }); + Assertions.assertEquals("\nextraneous input 'illegal_symbol' expecting {, ';'}(line 1, pos29)\n", + exception.getMessage()); + } + + @Test + public void testPostProcessor() { + String sql = "select `AD``D` from t1 where a = 1"; + NereidsParser nereidsParser = new NereidsParser(); + LogicalPlan logicalPlan = nereidsParser.parseSingle(sql); + LogicalProject logicalProject = (LogicalProject) logicalPlan.getOperator(); + Assertions.assertEquals("AD`D", logicalProject.getProjects().get(0).getName()); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rewrite/ExpressionRewriteTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rewrite/ExpressionRewriteTest.java index 0cb2e4c2bea485..5eef34a2704910 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rewrite/ExpressionRewriteTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/rewrite/ExpressionRewriteTest.java @@ -57,8 +57,8 @@ public void testNormalizeExpressionRewrite() { } private void assertRewrite(String expression, String expected) { - Expression needRewriteExpression = PARSER.createExpression(expression); - Expression expectedExpression = PARSER.createExpression(expected); + Expression needRewriteExpression = PARSER.parseExpression(expression); + Expression expectedExpression = PARSER.parseExpression(expected); Expression rewrittenExpression = executor.rewrite(needRewriteExpression); Assert.assertEquals(expectedExpression, rewrittenExpression); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/ExpressionParserTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/ExpressionParserTest.java index b868c695840e16..fc4749970fe845 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/ExpressionParserTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/ExpressionParserTest.java @@ -31,7 +31,7 @@ private void assertSql(String sql) throws Exception { } private void assertExpr(String expr) { - Expression expression = PARSER.createExpression(expr); + Expression expression = PARSER.parseExpression(expr); System.out.println(expression.sql()); }