Skip to content

Commit

Permalink
[ES|QL] adapt to dev mode grammar gating (elastic#192027)
Browse files Browse the repository at this point in the history
## Summary

Pulls in the changes ES team introduced in the ES|QL grammar in
elastic/elasticsearch#111995. Nothing should
change as far as our functionality except that commands the ES team
marks with the `DEV_` prefix will no longer show up in our validation
errors.

**Before**
<img width="859" alt="Screenshot 2024-09-03 at 3 11 21 PM"
src="https://github.com/user-attachments/assets/69dee5f1-dd26-4d85-b83b-a0b4689a3c09">

**After**
<img width="848" alt="Screenshot 2024-09-03 at 3 10 35 PM"
src="https://github.com/user-attachments/assets/31c07a0a-4e59-4e11-af72-a1eb7b7f1235">

Successful ES|QL grammar sync run:
https://buildkite.com/elastic/kibana-es-ql-grammar-sync/builds/53
  • Loading branch information
drewdaemon committed Sep 4, 2024
1 parent 8b7d965 commit a2dbebc
Show file tree
Hide file tree
Showing 23 changed files with 2,408 additions and 2,190 deletions.
16 changes: 9 additions & 7 deletions .buildkite/scripts/steps/esql_grammar_sync.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,14 @@ synchronize_lexer_grammar () {

# Insert the license header
temp_file=$(mktemp)
printf "%s\n\n// DO NOT MODIFY THIS FILE BY HAND. IT IS MANAGED BY A CI JOB.\n\n%s" "$license_header" "$(cat $destination_file)" > "$temp_file"
printf "%s\n\n// DO NOT MODIFY THIS FILE BY HAND. IT IS MANAGED BY A CI JOB.\n\n%s" "$(cat $destination_file)" > "$temp_file"
mv "$temp_file" "$destination_file"

# Replace the line containing "lexer grammar" with "lexer grammar esql_lexer;"
sed -i -e 's/lexer grammar.*$/lexer grammar esql_lexer;/' "$destination_file"

# Insert "options { caseInsensitive = true; }" one line below
sed -i -e '/lexer grammar esql_lexer;/a\
options { caseInsensitive = true; }' "$destination_file"
# Replace the line containing "superClass" with "superClass=lexer_config;"
sed -i -e 's/superClass.*$/superClass=lexer_config;/' "$destination_file"

echo "File copied and modified successfully."
}
Expand All @@ -34,14 +33,17 @@ synchronize_parser_grammar () {

# Insert the license header
temp_file=$(mktemp)
printf "%s\n\n// DO NOT MODIFY THIS FILE BY HAND. IT IS MANAGED BY A CI JOB.\n\n%s" "$license_header" "$(cat ${destination_file})" > "$temp_file"
printf "%s\n\n// DO NOT MODIFY THIS FILE BY HAND. IT IS MANAGED BY A CI JOB.\n\n%s" "$(cat ${destination_file})" > "$temp_file"
mv "$temp_file" "$destination_file"

# Replace the line containing "parser grammar" with "parser grammar esql_parser;"
sed -i -e 's/parser grammar.*$/parser grammar esql_parser;/' "$destination_file"

# Replace options {tokenVocab=EsqlBaseLexer;} with options {tokenVocab=esql_lexer;}
sed -i -e 's/options {tokenVocab=EsqlBaseLexer;}/options {tokenVocab=esql_lexer;}/' "$destination_file"
# Replace tokenVocab=EsqlBaseLexer; with tokenVocab=esql_lexer;
sed -i -e 's/tokenVocab=EsqlBaseLexer;/tokenVocab=esql_lexer;/' "$destination_file"

# Replace the line containing "superClass" with "superClass=parser_config;"
sed -i -e 's/superClass.*$/superClass=parser_config;/' "$destination_file"

echo "File copied and modified successfully."
}
Expand Down
1 change: 1 addition & 0 deletions packages/kbn-esql-ast/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ load("@build_bazel_rules_nodejs//:index.bzl", "js_library")
SRCS = glob(
[
"**/*.ts",
"**/*.js",
],
exclude = [
"**/*.config.js",
Expand Down
220 changes: 143 additions & 77 deletions packages/kbn-esql-ast/src/antlr/esql_lexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,58 @@
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
lexer grammar esql_lexer;

// DO NOT MODIFY THIS FILE BY HAND. IT IS MANAGED BY A CI JOB.
@header {
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
}

lexer grammar esql_lexer;
options { caseInsensitive = true; }
options {
superClass=lexer_config;
caseInsensitive=true;
}

/*
* Before modifying this file, please read the section above as changes here
* have significant impact in the ANTLR generated code and its consumption upstream
* (including Kibana).
*
* A. To add a development token (only available behind in snapshot/dev builds)
*
* Since the tokens/modes are in development, simply define them under the
* "// in development section" and follow the section comments in that section.
* That is use the DEV_ prefix and use the {this.isDevVersion()}? conditional.
* They are defined at the end of the file, to minimize the impact on the existing
* token types.
*
* B. To add a new (production-ready) token
*
* Be sure to go through step A (add a development token).
* Make sure to remove the prefix and conditional before promoting the tokens in
* production.
* Since tokens types (numbers) are generated by ANTLR in a continuous fashion,
* it is desirable to avoid changing these values hence where possible, add
* add them at the end of their respective section.
* Note that the use of lexing modes prevents this since any addition to a mode
* (regardless where it occurs) shifts all the declarations that follow in other modes.
*
* C. Renaming a token
*
* Avoid renaming the token. But if you really have to, please check with the
* Kibana team as they might be using the generated ANTLR "dictionary".
*
* D. To remove a token
*
* If the tokens haven't made it to production (and make sure to double check),
* simply remove them from the grammar.
* If the tokens get promoted to release, check with the Kibana team the impact
* they have on the UI (auto-completion, etc...)
*/

DISSECT : 'dissect' -> pushMode(EXPRESSION_MODE);
DROP : 'drop' -> pushMode(PROJECT_MODE);
Expand All @@ -17,21 +64,36 @@ EVAL : 'eval' -> pushMode(EXPRESSION_MODE);
EXPLAIN : 'explain' -> pushMode(EXPLAIN_MODE);
FROM : 'from' -> pushMode(FROM_MODE);
GROK : 'grok' -> pushMode(EXPRESSION_MODE);
INLINESTATS : 'inlinestats' -> pushMode(EXPRESSION_MODE);
KEEP : 'keep' -> pushMode(PROJECT_MODE);
LIMIT : 'limit' -> pushMode(EXPRESSION_MODE);
LOOKUP : 'lookup' -> pushMode(LOOKUP_MODE);
META : 'meta' -> pushMode(META_MODE);
METRICS : 'metrics' -> pushMode(METRICS_MODE);
MV_EXPAND : 'mv_expand' -> pushMode(MVEXPAND_MODE);
RENAME : 'rename' -> pushMode(RENAME_MODE);
ROW : 'row' -> pushMode(EXPRESSION_MODE);
SHOW : 'show' -> pushMode(SHOW_MODE);
SORT : 'sort' -> pushMode(EXPRESSION_MODE);
STATS : 'stats' -> pushMode(EXPRESSION_MODE);
WHERE : 'where' -> pushMode(EXPRESSION_MODE);
MATCH : 'match' -> pushMode(EXPRESSION_MODE);
UNKNOWN_CMD : ~[ \r\n\t[\]/]+ -> pushMode(EXPRESSION_MODE);
//
// in development
//
// Before adding a new in-development command, to sandbox the behavior when running in production environments
//
// For example: to add myCommand use the following declaration:
// DEV_MYCOMMAND : {this.isDevVersion()}? 'mycommand' -> ...
//
// Once the command has been stabilized, remove the DEV_ prefix and the {}? conditional and move the command to the
// main section while preserving alphabetical order:
// MYCOMMAND : 'mycommand' -> ...
DEV_INLINESTATS : {this.isDevVersion()}? 'inlinestats' -> pushMode(EXPRESSION_MODE);
DEV_LOOKUP : {this.isDevVersion()}? 'lookup' -> pushMode(LOOKUP_MODE);
DEV_MATCH : {this.isDevVersion()}? 'match' -> pushMode(EXPRESSION_MODE);
DEV_METRICS : {this.isDevVersion()}? 'metrics' -> pushMode(METRICS_MODE);

//
// Catch-all for unrecognized commands - don't define any beyond this line
//
UNKNOWN_CMD : ~[ \r\n\t[\]/]+ -> pushMode(EXPRESSION_MODE) ;

LINE_COMMENT
: '//' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN)
Expand All @@ -45,27 +107,6 @@ WS
: [ \r\n\t]+ -> channel(HIDDEN)
;

// in 8.14 ` were not allowed
// this has been relaxed in 8.15 since " is used for quoting
fragment UNQUOTED_SOURCE_PART
: ~[:"=|,[\]/ \t\r\n]
| '/' ~[*/] // allow single / but not followed by another / or * which would start a comment -- used in index pattern date spec
;
UNQUOTED_SOURCE
: UNQUOTED_SOURCE_PART+
;
//
// Explain
//
mode EXPLAIN_MODE;
EXPLAIN_OPENING_BRACKET : OPENING_BRACKET -> type(OPENING_BRACKET), pushMode(DEFAULT_MODE);
EXPLAIN_PIPE : PIPE -> type(PIPE), popMode;
EXPLAIN_WS : WS -> channel(HIDDEN);
EXPLAIN_LINE_COMMENT : LINE_COMMENT -> channel(HIDDEN);
EXPLAIN_MULTILINE_COMMENT : MULTILINE_COMMENT -> channel(HIDDEN);
//
// Expression - used by most command
//
Expand All @@ -78,7 +119,7 @@ fragment DIGIT
;

fragment LETTER
: [A-Za-z]
: [a-z]
;

fragment ESCAPE_SEQUENCE
Expand All @@ -90,7 +131,7 @@ fragment UNESCAPED_CHARS
;

fragment EXPONENT
: [Ee] [+-]? DIGIT+
: [e] [+-]? DIGIT+
;

fragment ASPERAND
Expand Down Expand Up @@ -146,7 +187,6 @@ IS: 'is';
LAST : 'last';
LIKE: 'like';
LP : '(';
MATCH_OPERATOR: 'match';
NOT : 'not';
NULL : 'null';
NULLS : 'nulls';
Expand All @@ -170,8 +210,11 @@ ASTERISK : '*';
SLASH : '/';
PERCENT : '%';

// move it in the main section if the feature gets promoted
DEV_MATCH_OP : {this.isDevVersion()}? DEV_MATCH -> type(DEV_MATCH);

NAMED_OR_POSITIONAL_PARAM
: PARAM LETTER UNQUOTED_ID_BODY*
: PARAM (LETTER | UNDERSCORE) UNQUOTED_ID_BODY*
| PARAM DIGIT+
;

Expand Down Expand Up @@ -209,6 +252,17 @@ EXPR_MULTILINE_COMMENT
EXPR_WS
: WS -> channel(HIDDEN)
;

//
// Explain
//
mode EXPLAIN_MODE;
EXPLAIN_OPENING_BRACKET : OPENING_BRACKET -> type(OPENING_BRACKET), pushMode(DEFAULT_MODE);
EXPLAIN_PIPE : PIPE -> type(PIPE), popMode;
EXPLAIN_WS : WS -> channel(HIDDEN);
EXPLAIN_LINE_COMMENT : LINE_COMMENT -> channel(HIDDEN);
EXPLAIN_MULTILINE_COMMENT : MULTILINE_COMMENT -> channel(HIDDEN);

//
// FROM command
//
Expand All @@ -221,6 +275,17 @@ FROM_COMMA : COMMA -> type(COMMA);
FROM_ASSIGN : ASSIGN -> type(ASSIGN);
METADATA : 'metadata';

// in 8.14 ` were not allowed
// this has been relaxed in 8.15 since " is used for quoting
fragment UNQUOTED_SOURCE_PART
: ~[:"=|,[\]/ \t\r\n]
| '/' ~[*/] // allow single / but not followed by another / or * which would start a comment -- used in index pattern date spec
;
UNQUOTED_SOURCE
: UNQUOTED_SOURCE_PART+
;
FROM_UNQUOTED_SOURCE : UNQUOTED_SOURCE -> type(UNQUOTED_SOURCE);
FROM_QUOTED_SOURCE : QUOTED_STRING -> type(QUOTED_STRING);
Expand Down Expand Up @@ -358,50 +423,6 @@ ENRICH_FIELD_WS
: WS -> channel(HIDDEN)
;

// LOOKUP ON key
mode LOOKUP_MODE;
LOOKUP_PIPE : PIPE -> type(PIPE), popMode;
LOOKUP_COLON : COLON -> type(COLON);
LOOKUP_COMMA : COMMA -> type(COMMA);
LOOKUP_DOT: DOT -> type(DOT);
LOOKUP_ON : ON -> type(ON), pushMode(LOOKUP_FIELD_MODE);

LOOKUP_UNQUOTED_SOURCE: UNQUOTED_SOURCE -> type(UNQUOTED_SOURCE);
LOOKUP_QUOTED_SOURCE : QUOTED_STRING -> type(QUOTED_STRING);

LOOKUP_LINE_COMMENT
: LINE_COMMENT -> channel(HIDDEN)
;

LOOKUP_MULTILINE_COMMENT
: MULTILINE_COMMENT -> channel(HIDDEN)
;

LOOKUP_WS
: WS -> channel(HIDDEN)
;

mode LOOKUP_FIELD_MODE;
LOOKUP_FIELD_PIPE : PIPE -> type(PIPE), popMode, popMode;
LOOKUP_FIELD_COMMA : COMMA -> type(COMMA);
LOOKUP_FIELD_DOT: DOT -> type(DOT);

LOOKUP_FIELD_ID_PATTERN
: ID_PATTERN -> type(ID_PATTERN)
;

LOOKUP_FIELD_LINE_COMMENT
: LINE_COMMENT -> channel(HIDDEN)
;

LOOKUP_FIELD_MULTILINE_COMMENT
: MULTILINE_COMMENT -> channel(HIDDEN)
;

LOOKUP_FIELD_WS
: WS -> channel(HIDDEN)
;

mode MVEXPAND_MODE;
MVEXPAND_PIPE : PIPE -> type(PIPE), popMode;
MVEXPAND_DOT: DOT -> type(DOT);
Expand Down Expand Up @@ -487,6 +508,51 @@ SETTING_WS
: WS -> channel(HIDDEN)
;

//
// LOOKUP ON key
//
mode LOOKUP_MODE;
LOOKUP_PIPE : PIPE -> type(PIPE), popMode;
LOOKUP_COLON : COLON -> type(COLON);
LOOKUP_COMMA : COMMA -> type(COMMA);
LOOKUP_DOT: DOT -> type(DOT);
LOOKUP_ON : ON -> type(ON), pushMode(LOOKUP_FIELD_MODE);

LOOKUP_UNQUOTED_SOURCE: UNQUOTED_SOURCE -> type(UNQUOTED_SOURCE);
LOOKUP_QUOTED_SOURCE : QUOTED_STRING -> type(QUOTED_STRING);

LOOKUP_LINE_COMMENT
: LINE_COMMENT -> channel(HIDDEN)
;

LOOKUP_MULTILINE_COMMENT
: MULTILINE_COMMENT -> channel(HIDDEN)
;

LOOKUP_WS
: WS -> channel(HIDDEN)
;

mode LOOKUP_FIELD_MODE;
LOOKUP_FIELD_PIPE : PIPE -> type(PIPE), popMode, popMode;
LOOKUP_FIELD_COMMA : COMMA -> type(COMMA);
LOOKUP_FIELD_DOT: DOT -> type(DOT);

LOOKUP_FIELD_ID_PATTERN
: ID_PATTERN -> type(ID_PATTERN)
;

LOOKUP_FIELD_LINE_COMMENT
: LINE_COMMENT -> channel(HIDDEN)
;

LOOKUP_FIELD_MULTILINE_COMMENT
: MULTILINE_COMMENT -> channel(HIDDEN)
;

LOOKUP_FIELD_WS
: WS -> channel(HIDDEN)
;

//
// METRICS command
Expand Down
Loading

0 comments on commit a2dbebc

Please sign in to comment.