[ES|QL] adapt to dev mode grammar gating (elastic#192027)

## Summary Pulls in the changes ES team introduced in the ES|QL grammar in elastic/elasticsearch#111995. Nothing should change as far as our functionality except that commands the ES team marks with the `DEV_` prefix will no longer show up in our validation errors. **Before** <img width="859" alt="Screenshot 2024-09-03 at 3 11 21 PM" src="https://github.com/user-attachments/assets/69dee5f1-dd26-4d85-b83b-a0b4689a3c09"> **After** <img width="848" alt="Screenshot 2024-09-03 at 3 10 35 PM" src="https://github.com/user-attachments/assets/31c07a0a-4e59-4e11-af72-a1eb7b7f1235"> Successful ES|QL grammar sync run: https://buildkite.com/elastic/kibana-es-ql-grammar-sync/builds/53
jcger · Sep 4, 2024 · a2dbebc · a2dbebc
1 parent 8b7d965
commit a2dbebc
Show file tree

Hide file tree

Showing 23 changed files with 2,408 additions and 2,190 deletions.
diff --git a/.buildkite/scripts/steps/esql_grammar_sync.sh b/.buildkite/scripts/steps/esql_grammar_sync.sh
@@ -11,15 +11,14 @@ synchronize_lexer_grammar () {
 
   # Insert the license header
   temp_file=$(mktemp)
-  printf "%s\n\n// DO NOT MODIFY THIS FILE BY HAND. IT IS MANAGED BY A CI JOB.\n\n%s" "$license_header" "$(cat $destination_file)" > "$temp_file"
+  printf "%s\n\n// DO NOT MODIFY THIS FILE BY HAND. IT IS MANAGED BY A CI JOB.\n\n%s" "$(cat $destination_file)" > "$temp_file"
   mv "$temp_file" "$destination_file"
 
   # Replace the line containing "lexer grammar" with "lexer grammar esql_lexer;"
   sed -i -e 's/lexer grammar.*$/lexer grammar esql_lexer;/' "$destination_file"
 
-  # Insert "options { caseInsensitive = true; }" one line below
-  sed -i -e '/lexer grammar esql_lexer;/a\
-  options { caseInsensitive = true; }' "$destination_file"
+  # Replace the line containing "superClass" with "superClass=lexer_config;"
+  sed -i -e 's/superClass.*$/superClass=lexer_config;/' "$destination_file"
 
   echo "File copied and modified successfully."
 }
@@ -34,14 +33,17 @@ synchronize_parser_grammar () {
 
   # Insert the license header
   temp_file=$(mktemp)
-  printf "%s\n\n// DO NOT MODIFY THIS FILE BY HAND. IT IS MANAGED BY A CI JOB.\n\n%s" "$license_header" "$(cat ${destination_file})" > "$temp_file"
+  printf "%s\n\n// DO NOT MODIFY THIS FILE BY HAND. IT IS MANAGED BY A CI JOB.\n\n%s" "$(cat ${destination_file})" > "$temp_file"
   mv "$temp_file" "$destination_file"
 
   # Replace the line containing "parser grammar" with "parser grammar esql_parser;"
   sed -i -e 's/parser grammar.*$/parser grammar esql_parser;/' "$destination_file"
 
-  # Replace options {tokenVocab=EsqlBaseLexer;} with options {tokenVocab=esql_lexer;}
-  sed -i -e 's/options {tokenVocab=EsqlBaseLexer;}/options {tokenVocab=esql_lexer;}/' "$destination_file"
+  # Replace tokenVocab=EsqlBaseLexer; with tokenVocab=esql_lexer;
+  sed -i -e 's/tokenVocab=EsqlBaseLexer;/tokenVocab=esql_lexer;/' "$destination_file"
+
+  # Replace the line containing "superClass" with "superClass=parser_config;"
+  sed -i -e 's/superClass.*$/superClass=parser_config;/' "$destination_file"
 
   echo "File copied and modified successfully."
 }

diff --git a/packages/kbn-esql-ast/BUILD.bazel b/packages/kbn-esql-ast/BUILD.bazel
@@ -3,6 +3,7 @@ load("@build_bazel_rules_nodejs//:index.bzl", "js_library")
 SRCS = glob(
   [
     "**/*.ts",
+    "**/*.js",
   ],
   exclude = [
     "**/*.config.js",

diff --git a/packages/kbn-esql-ast/src/antlr/esql_lexer.g4 b/packages/kbn-esql-ast/src/antlr/esql_lexer.g4
@@ -4,11 +4,58 @@
  * 2.0; you may not use this file except in compliance with the Elastic License
  * 2.0.
  */
+lexer grammar esql_lexer;
 
-// DO NOT MODIFY THIS FILE BY HAND. IT IS MANAGED BY A CI JOB.
+@header {
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+}
 
-lexer grammar esql_lexer;
-  options { caseInsensitive = true; }
+options {
+  superClass=lexer_config;
+  caseInsensitive=true;
+}
+
+/*
+ * Before modifying this file, please read the section above as changes here
+ * have significant impact in the ANTLR generated code and its consumption upstream
+ * (including Kibana).
+ *
+ * A. To add a development token (only available behind in snapshot/dev builds)
+ *
+ * Since the tokens/modes are in development, simply define them under the
+ * "// in development section" and follow the section comments in that section.
+ * That is use the DEV_ prefix and use the {this.isDevVersion()}? conditional.
+ * They are defined at the end of the file, to minimize the impact on the existing
+ * token types.
+ *
+ * B. To add a new (production-ready) token
+ *
+ * Be sure to go through step A (add a development token).
+ * Make sure to remove the prefix and conditional before promoting the tokens in
+ * production.
+ * Since tokens types (numbers) are generated by ANTLR in a continuous fashion,
+ * it is desirable to avoid changing these values hence where possible, add
+ * add them at the end of their respective section.
+ * Note that the use of lexing modes prevents this since any addition to a mode
+ * (regardless where it occurs) shifts all the declarations that follow in other modes.
+ *
+ * C. Renaming a token
+ *
+ * Avoid renaming the token. But if you really have to, please check with the
+ * Kibana team as they might be using the generated ANTLR "dictionary".
+ *
+ * D. To remove a token
+ *
+ * If the tokens haven't made it to production (and make sure to double check),
+ * simply remove them from the grammar.
+ * If the tokens get promoted to release, check with the Kibana team the impact
+ * they have on the UI (auto-completion, etc...)
+ */
 
 DISSECT : 'dissect'           -> pushMode(EXPRESSION_MODE);
 DROP : 'drop'                 -> pushMode(PROJECT_MODE);
@@ -17,21 +64,36 @@ EVAL : 'eval'                 -> pushMode(EXPRESSION_MODE);
 EXPLAIN : 'explain'           -> pushMode(EXPLAIN_MODE);
 FROM : 'from'                 -> pushMode(FROM_MODE);
 GROK : 'grok'                 -> pushMode(EXPRESSION_MODE);
-INLINESTATS : 'inlinestats'   -> pushMode(EXPRESSION_MODE);
 KEEP : 'keep'                 -> pushMode(PROJECT_MODE);
 LIMIT : 'limit'               -> pushMode(EXPRESSION_MODE);
-LOOKUP : 'lookup'             -> pushMode(LOOKUP_MODE);
 META : 'meta'                 -> pushMode(META_MODE);
-METRICS : 'metrics'           -> pushMode(METRICS_MODE);
 MV_EXPAND : 'mv_expand'       -> pushMode(MVEXPAND_MODE);
 RENAME : 'rename'             -> pushMode(RENAME_MODE);
 ROW : 'row'                   -> pushMode(EXPRESSION_MODE);
 SHOW : 'show'                 -> pushMode(SHOW_MODE);
 SORT : 'sort'                 -> pushMode(EXPRESSION_MODE);
 STATS : 'stats'               -> pushMode(EXPRESSION_MODE);
 WHERE : 'where'               -> pushMode(EXPRESSION_MODE);
-MATCH : 'match'               -> pushMode(EXPRESSION_MODE);
-UNKNOWN_CMD : ~[ \r\n\t[\]/]+ -> pushMode(EXPRESSION_MODE);
+//
+// in development
+//
+// Before adding a new in-development command, to sandbox the behavior when running in production environments
+//
+// For example: to add myCommand use the following declaration:
+// DEV_MYCOMMAND : {this.isDevVersion()}? 'mycommand' -> ...
+//
+// Once the command has been stabilized, remove the DEV_ prefix and the {}? conditional and move the command to the
+// main section while preserving alphabetical order:
+// MYCOMMAND : 'mycommand' -> ...
+DEV_INLINESTATS : {this.isDevVersion()}? 'inlinestats'   -> pushMode(EXPRESSION_MODE);
+DEV_LOOKUP :      {this.isDevVersion()}? 'lookup'        -> pushMode(LOOKUP_MODE);
+DEV_MATCH :       {this.isDevVersion()}? 'match'         -> pushMode(EXPRESSION_MODE);
+DEV_METRICS :     {this.isDevVersion()}? 'metrics'       -> pushMode(METRICS_MODE);
+
+//
+// Catch-all for unrecognized commands - don't define any beyond this line
+//
+UNKNOWN_CMD : ~[ \r\n\t[\]/]+ -> pushMode(EXPRESSION_MODE) ;
 
 LINE_COMMENT
     : '//' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN)
@@ -45,27 +107,6 @@ WS
     : [ \r\n\t]+ -> channel(HIDDEN)
     ;
 
-// in 8.14 ` were not allowed
-// this has been relaxed in 8.15 since " is used for quoting
-fragment UNQUOTED_SOURCE_PART
-    : ~[:"=|,[\]/ \t\r\n]
-    | '/' ~[*/] // allow single / but not followed by another / or * which would start a comment -- used in index pattern date spec
-    ;
-
-UNQUOTED_SOURCE
-    : UNQUOTED_SOURCE_PART+
-    ;
-
-//
-// Explain
-//
-mode EXPLAIN_MODE;
-EXPLAIN_OPENING_BRACKET : OPENING_BRACKET -> type(OPENING_BRACKET), pushMode(DEFAULT_MODE);
-EXPLAIN_PIPE : PIPE -> type(PIPE), popMode;
-EXPLAIN_WS : WS -> channel(HIDDEN);
-EXPLAIN_LINE_COMMENT : LINE_COMMENT -> channel(HIDDEN);
-EXPLAIN_MULTILINE_COMMENT : MULTILINE_COMMENT -> channel(HIDDEN);
-
 //
 // Expression - used by most command
 //
@@ -78,7 +119,7 @@ fragment DIGIT
     ;
 
 fragment LETTER
-    : [A-Za-z]
+    : [a-z]
     ;
 
 fragment ESCAPE_SEQUENCE
@@ -90,7 +131,7 @@ fragment UNESCAPED_CHARS
     ;
 
 fragment EXPONENT
-    : [Ee] [+-]? DIGIT+
+    : [e] [+-]? DIGIT+
     ;
 
 fragment ASPERAND
@@ -146,7 +187,6 @@ IS: 'is';
 LAST : 'last';
 LIKE: 'like';
 LP : '(';
-MATCH_OPERATOR: 'match';
 NOT : 'not';
 NULL : 'null';
 NULLS : 'nulls';
@@ -170,8 +210,11 @@ ASTERISK : '*';
 SLASH : '/';
 PERCENT : '%';
 
+// move it in the main section if the feature gets promoted
+DEV_MATCH_OP : {this.isDevVersion()}? DEV_MATCH -> type(DEV_MATCH);
+
 NAMED_OR_POSITIONAL_PARAM
-    : PARAM LETTER UNQUOTED_ID_BODY*
+    : PARAM (LETTER | UNDERSCORE) UNQUOTED_ID_BODY*
     | PARAM DIGIT+
     ;
 
@@ -209,6 +252,17 @@ EXPR_MULTILINE_COMMENT
 EXPR_WS
     : WS -> channel(HIDDEN)
     ;
+
+//
+// Explain
+//
+mode EXPLAIN_MODE;
+EXPLAIN_OPENING_BRACKET : OPENING_BRACKET -> type(OPENING_BRACKET), pushMode(DEFAULT_MODE);
+EXPLAIN_PIPE : PIPE -> type(PIPE), popMode;
+EXPLAIN_WS : WS -> channel(HIDDEN);
+EXPLAIN_LINE_COMMENT : LINE_COMMENT -> channel(HIDDEN);
+EXPLAIN_MULTILINE_COMMENT : MULTILINE_COMMENT -> channel(HIDDEN);
+
 //
 // FROM command
 //
@@ -221,6 +275,17 @@ FROM_COMMA : COMMA -> type(COMMA);
 FROM_ASSIGN : ASSIGN -> type(ASSIGN);
 METADATA : 'metadata';
 
+// in 8.14 ` were not allowed
+// this has been relaxed in 8.15 since " is used for quoting
+fragment UNQUOTED_SOURCE_PART
+    : ~[:"=|,[\]/ \t\r\n]
+    | '/' ~[*/] // allow single / but not followed by another / or * which would start a comment -- used in index pattern date spec
+    ;
+
+UNQUOTED_SOURCE
+    : UNQUOTED_SOURCE_PART+
+    ;
+
 FROM_UNQUOTED_SOURCE : UNQUOTED_SOURCE -> type(UNQUOTED_SOURCE);
 FROM_QUOTED_SOURCE : QUOTED_STRING -> type(QUOTED_STRING);
 
@@ -358,50 +423,6 @@ ENRICH_FIELD_WS
     : WS -> channel(HIDDEN)
     ;
 
-// LOOKUP ON key
-mode LOOKUP_MODE;
-LOOKUP_PIPE : PIPE -> type(PIPE), popMode;
-LOOKUP_COLON : COLON -> type(COLON);
-LOOKUP_COMMA : COMMA -> type(COMMA);
-LOOKUP_DOT: DOT -> type(DOT);
-LOOKUP_ON : ON -> type(ON), pushMode(LOOKUP_FIELD_MODE);
-
-LOOKUP_UNQUOTED_SOURCE: UNQUOTED_SOURCE -> type(UNQUOTED_SOURCE);
-LOOKUP_QUOTED_SOURCE : QUOTED_STRING -> type(QUOTED_STRING);
-
-LOOKUP_LINE_COMMENT
-    : LINE_COMMENT -> channel(HIDDEN)
-    ;
-
-LOOKUP_MULTILINE_COMMENT
-    : MULTILINE_COMMENT -> channel(HIDDEN)
-    ;
-
-LOOKUP_WS
-    : WS -> channel(HIDDEN)
-    ;
-
-mode LOOKUP_FIELD_MODE;
-LOOKUP_FIELD_PIPE : PIPE -> type(PIPE), popMode, popMode;
-LOOKUP_FIELD_COMMA : COMMA -> type(COMMA);
-LOOKUP_FIELD_DOT: DOT -> type(DOT);
-
-LOOKUP_FIELD_ID_PATTERN
-    : ID_PATTERN -> type(ID_PATTERN)
-    ;
-
-LOOKUP_FIELD_LINE_COMMENT
-    : LINE_COMMENT -> channel(HIDDEN)
-    ;
-
-LOOKUP_FIELD_MULTILINE_COMMENT
-    : MULTILINE_COMMENT -> channel(HIDDEN)
-    ;
-
-LOOKUP_FIELD_WS
-    : WS -> channel(HIDDEN)
-    ;
-
 mode MVEXPAND_MODE;
 MVEXPAND_PIPE : PIPE -> type(PIPE), popMode;
 MVEXPAND_DOT: DOT -> type(DOT);
@@ -487,6 +508,51 @@ SETTING_WS
     : WS -> channel(HIDDEN)
     ;
 
+//
+// LOOKUP ON key
+//
+mode LOOKUP_MODE;
+LOOKUP_PIPE : PIPE -> type(PIPE), popMode;
+LOOKUP_COLON : COLON -> type(COLON);
+LOOKUP_COMMA : COMMA -> type(COMMA);
+LOOKUP_DOT: DOT -> type(DOT);
+LOOKUP_ON : ON -> type(ON), pushMode(LOOKUP_FIELD_MODE);
+
+LOOKUP_UNQUOTED_SOURCE: UNQUOTED_SOURCE -> type(UNQUOTED_SOURCE);
+LOOKUP_QUOTED_SOURCE : QUOTED_STRING -> type(QUOTED_STRING);
+
+LOOKUP_LINE_COMMENT
+    : LINE_COMMENT -> channel(HIDDEN)
+    ;
+
+LOOKUP_MULTILINE_COMMENT
+    : MULTILINE_COMMENT -> channel(HIDDEN)
+    ;
+
+LOOKUP_WS
+    : WS -> channel(HIDDEN)
+    ;
+
+mode LOOKUP_FIELD_MODE;
+LOOKUP_FIELD_PIPE : PIPE -> type(PIPE), popMode, popMode;
+LOOKUP_FIELD_COMMA : COMMA -> type(COMMA);
+LOOKUP_FIELD_DOT: DOT -> type(DOT);
+
+LOOKUP_FIELD_ID_PATTERN
+    : ID_PATTERN -> type(ID_PATTERN)
+    ;
+
+LOOKUP_FIELD_LINE_COMMENT
+    : LINE_COMMENT -> channel(HIDDEN)
+    ;
+
+LOOKUP_FIELD_MULTILINE_COMMENT
+    : MULTILINE_COMMENT -> channel(HIDDEN)
+    ;
+
+LOOKUP_FIELD_WS
+    : WS -> channel(HIDDEN)
+    ;
 
 //
 // METRICS command