From 9e2a9ff179344a13ed9344c6d3e74954b6adc7b6 Mon Sep 17 00:00:00 2001 From: seankao-az <107227027+seankao-az@users.noreply.github.com> Date: Tue, 28 Jun 2022 10:20:38 -0700 Subject: [PATCH] PPL describe command (#646) Signed-off-by: Sean Kao --- docs/category.json | 1 + docs/user/ppl/cmd/describe.rst | 65 ++++++++++++++ docs/user/ppl/cmd/syntax.rst | 2 +- docs/user/ppl/index.rst | 2 + .../opensearch/sql/ppl/DescribeCommandIT.java | 90 +++++++++++++++++++ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 19 +++- .../opensearch/sql/ppl/parser/AstBuilder.java | 22 ++++- .../sql/ppl/antlr/PPLSyntaxParserTest.java | 26 ++++++ .../sql/ppl/parser/AstBuilderTest.java | 33 +++++++ 10 files changed, 255 insertions(+), 6 deletions(-) create mode 100644 docs/user/ppl/cmd/describe.rst create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/DescribeCommandIT.java diff --git a/docs/category.json b/docs/category.json index 0ecd91b495..7375e773d2 100644 --- a/docs/category.json +++ b/docs/category.json @@ -8,6 +8,7 @@ ], "ppl_cli": [ "user/ppl/cmd/dedup.rst", + "user/ppl/cmd/describe.rst", "user/ppl/cmd/eval.rst", "user/ppl/cmd/fields.rst", "user/ppl/cmd/head.rst", diff --git a/docs/user/ppl/cmd/describe.rst b/docs/user/ppl/cmd/describe.rst new file mode 100644 index 0000000000..0abd569684 --- /dev/null +++ b/docs/user/ppl/cmd/describe.rst @@ -0,0 +1,65 @@ +============= +describe +============= + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + + +Description +============ +| Using ``describe`` command to query metadata of the index. ``describe`` command could be only used as the first command in the PPL query. + + +Syntax +============ +describe + +* index: mandatory. describe command must specify which index to query from. + + +Example 1: Fetch all the metadata +================================= + +The example describes accounts index. + +PPL query:: + + os> describe accounts; + fetched rows / total rows = 11/11 + +----------------+---------------+--------------+----------------+-------------+-------------+---------------+-----------------+------------------+------------------+------------+-----------+--------------+-----------------+--------------------+---------------------+--------------------+---------------+-----------------+----------------+---------------+--------------------+--------------------+----------------------+ + | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | COLUMN_NAME | DATA_TYPE | TYPE_NAME | COLUMN_SIZE | BUFFER_LENGTH | DECIMAL_DIGITS | NUM_PREC_RADIX | NULLABLE | REMARKS | COLUMN_DEF | SQL_DATA_TYPE | SQL_DATETIME_SUB | CHAR_OCTET_LENGTH | ORDINAL_POSITION | IS_NULLABLE | SCOPE_CATALOG | SCOPE_SCHEMA | SCOPE_TABLE | SOURCE_DATA_TYPE | IS_AUTOINCREMENT | IS_GENERATEDCOLUMN | + |----------------+---------------+--------------+----------------+-------------+-------------+---------------+-----------------+------------------+------------------+------------+-----------+--------------+-----------------+--------------------+---------------------+--------------------+---------------+-----------------+----------------+---------------+--------------------+--------------------+----------------------| + | docTestCluster | null | accounts | account_number | null | long | null | null | null | 10 | 2 | null | null | null | null | null | 0 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | firstname | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 1 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | address | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 2 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | balance | null | long | null | null | null | 10 | 2 | null | null | null | null | null | 3 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | gender | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 4 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | city | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 5 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | employer | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 6 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | state | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 7 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | age | null | long | null | null | null | 10 | 2 | null | null | null | null | null | 8 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | email | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 9 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | lastname | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 10 | | null | null | null | null | NO | | + +----------------+---------------+--------------+----------------+-------------+-------------+---------------+-----------------+------------------+------------------+------------+-----------+--------------+-----------------+--------------------+---------------------+--------------------+---------------+-----------------+----------------+---------------+--------------------+--------------------+----------------------+ + +Example 2: Fetch metadata with condition and filter +=================================================== + +The example retrieves columns with type long in accounts index. + +PPL query:: + + os> describe accounts | where TYPE_NAME="long" | fields COLUMN_NAME; + fetched rows / total rows = 3/3 + +----------------+ + | COLUMN_NAME | + |----------------| + | account_number | + | balance | + | age | + +----------------+ + diff --git a/docs/user/ppl/cmd/syntax.rst b/docs/user/ppl/cmd/syntax.rst index 7d4a58b386..45ffea8ff6 100644 --- a/docs/user/ppl/cmd/syntax.rst +++ b/docs/user/ppl/cmd/syntax.rst @@ -10,7 +10,7 @@ Syntax Command Order ============= -The PPL query started with ``search`` command to reference a table search from. All the following command could be in any order. In the following example, ``search`` command refer the accounts index as the source, then using fields and where command to do the further processing. +The PPL query starts with either the ``search`` command to reference a table to search from, or the ``describe`` command to reference a table to get its metadata. All the following command could be in any order. In the following example, ``search`` command refer the accounts index as the source, then using fields and where command to do the further processing. .. code-block:: diff --git a/docs/user/ppl/index.rst b/docs/user/ppl/index.rst index 015797895d..39adfa0902 100644 --- a/docs/user/ppl/index.rst +++ b/docs/user/ppl/index.rst @@ -40,6 +40,8 @@ The query start with search command and then flowing a set of command delimited - `dedup command `_ + - `describe command `_ + - `eval command `_ - `fields command `_ diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/DescribeCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/DescribeCommandIT.java new file mode 100644 index 0000000000..c06ef3bc21 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/DescribeCommandIT.java @@ -0,0 +1,90 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + + +package org.opensearch.sql.ppl; + +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.client.Request; +import org.opensearch.client.ResponseException; + +import java.io.IOException; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DOG; +import static org.opensearch.sql.util.MatcherUtils.columnName; +import static org.opensearch.sql.util.MatcherUtils.verifyColumn; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; + +public class DescribeCommandIT extends PPLIntegTestCase { + + @Override + public void init() throws IOException { + loadIndex(Index.DOG); + } + + @Test + public void testDescribeAllFields() throws IOException { + JSONObject result = executeQuery(String.format("describe %s", TEST_INDEX_DOG)); + verifyColumn( + result, + columnName("TABLE_CAT"), + columnName("TABLE_SCHEM"), + columnName("TABLE_NAME"), + columnName("COLUMN_NAME"), + columnName("DATA_TYPE"), + columnName("TYPE_NAME"), + columnName("COLUMN_SIZE"), + columnName("BUFFER_LENGTH"), + columnName("DECIMAL_DIGITS"), + columnName("NUM_PREC_RADIX"), + columnName("NULLABLE"), + columnName("REMARKS"), + columnName("COLUMN_DEF"), + columnName("SQL_DATA_TYPE"), + columnName("SQL_DATETIME_SUB"), + columnName("CHAR_OCTET_LENGTH"), + columnName("ORDINAL_POSITION"), + columnName("IS_NULLABLE"), + columnName("SCOPE_CATALOG"), + columnName("SCOPE_SCHEMA"), + columnName("SCOPE_TABLE"), + columnName("SOURCE_DATA_TYPE"), + columnName("IS_AUTOINCREMENT"), + columnName("IS_GENERATEDCOLUMN") + ); + } + + @Test + public void testDescribeFilterFields() throws IOException { + JSONObject result = executeQuery(String.format("describe %s | fields TABLE_NAME, COLUMN_NAME, TYPE_NAME", TEST_INDEX_DOG)); + verifyColumn( + result, + columnName("TABLE_NAME"), + columnName("COLUMN_NAME"), + columnName("TYPE_NAME") + ); + } + + @Test + public void testDescribeWithSpecialIndexName() throws IOException { + executeRequest(new Request("PUT", "/logs-2021.01.11")); + verifyDataRows(executeQuery("describe logs-2021.01.11")); + + executeRequest(new Request("PUT", "/logs-7.10.0-2021.01.11")); + verifyDataRows(executeQuery("describe logs-7.10.0-2021.01.11")); + } + + @Test + public void describeCommandWithoutIndexShouldFailToParse() throws IOException { + try { + executeQuery("describe"); + fail(); + } catch (ResponseException e) { + assertTrue(e.getMessage().contains("RuntimeException")); + assertTrue(e.getMessage().contains("Failed to parse query due to offending symbol")); + } + } +} diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 68fb402a2a..ae3949f246 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -11,6 +11,7 @@ channels { WHITESPACE, ERRORCHANNEL } // COMMAND KEYWORDS SEARCH: 'SEARCH'; +DESCRIBE: 'DESCRIBE'; FROM: 'FROM'; WHERE: 'WHERE'; FIELDS: 'FIELDS'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index c0ab5ebb84..332bf4e463 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -14,10 +14,15 @@ root /** statement */ pplStatement - : searchCommand (PIPE commands)* + : pplCommands (PIPE commands)* ; /** commands */ +pplCommands + : searchCommand + | describeCommand + ; + commands : whereCommand | fieldsCommand | renameCommand | statsCommand | dedupCommand | sortCommand | evalCommand | headCommand | topCommand | rareCommand | parseCommand | kmeansCommand | adCommand; @@ -28,6 +33,10 @@ searchCommand | (SEARCH)? logicalExpression fromClause #searchFilterFrom ; +describeCommand + : DESCRIBE tableSourceClause + ; + whereCommand : WHERE logicalExpression ; @@ -119,8 +128,12 @@ adParameter /** clauses */ fromClause - : SOURCE EQUAL tableSource (COMMA tableSource)* - | INDEX EQUAL tableSource (COMMA tableSource)* + : SOURCE EQUAL tableSourceClause + | INDEX EQUAL tableSourceClause + ; + +tableSourceClause + : tableSource (COMMA tableSource)* ; renameClasue diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 7683002de8..d7f97e3d35 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -6,7 +6,9 @@ package org.opensearch.sql.ppl.parser; +import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedName; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DedupCommandContext; +import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DescribeCommandContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.EvalCommandContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.FieldsCommandContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.FromClauseContext; @@ -19,8 +21,10 @@ import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.SearchFromFilterContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.SortCommandContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.StatsCommandContext; +import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.TableSourceClauseContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.TopCommandContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.WhereCommandContext; +import static org.opensearch.sql.utils.SystemIndexUtils.mappingTable; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -79,11 +83,11 @@ public class AstBuilder extends OpenSearchPPLParserBaseVisitor { @Override public UnresolvedPlan visitPplStatement(PplStatementContext ctx) { - UnresolvedPlan search = visit(ctx.searchCommand()); + UnresolvedPlan pplCommand = visit(ctx.pplCommands()); return ctx.commands() .stream() .map(this::visit) - .reduce(search, (r, e) -> e.attach(r)); + .reduce(pplCommand, (r, e) -> e.attach(r)); } /** @@ -106,6 +110,15 @@ public UnresolvedPlan visitSearchFilterFrom(SearchFilterFromContext ctx) { visit(ctx.fromClause())); } + /** + * Describe command. + */ + @Override + public UnresolvedPlan visitDescribeCommand(DescribeCommandContext ctx) { + final Relation table = (Relation) visitTableSourceClause(ctx.tableSourceClause()); + return new Relation(qualifiedName(mappingTable(table.getTableName()))); + } + /** * Where command. */ @@ -286,6 +299,11 @@ public UnresolvedPlan visitTopCommand(TopCommandContext ctx) { */ @Override public UnresolvedPlan visitFromClause(FromClauseContext ctx) { + return visitTableSourceClause(ctx.tableSourceClause()); + } + + @Override + public UnresolvedPlan visitTableSourceClause(TableSourceClauseContext ctx) { return new Relation(ctx.tableSource() .stream().map(this::internalVisitExpression) .collect(Collectors.toList())); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java index 47285751dc..a593bd5000 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java @@ -145,5 +145,31 @@ public void can_parse_simple_query_string_relevance_function() { "SOURCE=test | WHERE simple_query_string([\"Tags\" ^ 1.5, Title, `Body` 4.2], 'query'," + "analyzer=keyword, quote_field_suffix=\".exact\", fuzzy_prefix_length = 4)")); } + + @Test + public void testDescribeCommandShouldPass() { + ParseTree tree = new PPLSyntaxParser().analyzeSyntax("describe t"); + assertNotEquals(null, tree); + } + + @Test + public void testDescribeCommandWithMultipleIndicesShouldPass() { + ParseTree tree = new PPLSyntaxParser().analyzeSyntax("describe t,u"); + assertNotEquals(null, tree); + } + + @Test + public void testDescribeFieldsCommandShouldPass() { + ParseTree tree = new PPLSyntaxParser().analyzeSyntax("describe t | fields a,b"); + assertNotEquals(null, tree); + } + + @Test + public void testDescribeCommandWithSourceShouldFail() { + exceptionRule.expect(RuntimeException.class); + exceptionRule.expectMessage("Failed to parse query due to offending symbol"); + + new PPLSyntaxParser().analyzeSyntax("describe source=t"); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index df8ca769d1..ce5f8f9ec5 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -30,6 +30,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.map; import static org.opensearch.sql.ast.dsl.AstDSL.nullLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.parse; +import static org.opensearch.sql.ast.dsl.AstDSL.project; import static org.opensearch.sql.ast.dsl.AstDSL.projectWithArg; import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedName; import static org.opensearch.sql.ast.dsl.AstDSL.rareTopN; @@ -38,6 +39,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.sort; import static org.opensearch.sql.ast.dsl.AstDSL.span; import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; +import static org.opensearch.sql.utils.SystemIndexUtils.mappingTable; import com.google.common.collect.ImmutableMap; import org.junit.Ignore; @@ -45,6 +47,7 @@ import org.junit.Test; import org.junit.rules.ExpectedException; import org.opensearch.sql.ast.Node; +import org.opensearch.sql.ast.expression.AllFields; import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.SpanUnit; @@ -447,30 +450,42 @@ public void testIndexName() { relation("log.2020.04.20."), compare("=", field("a"), intLiteral(1)) )); + assertEqual("describe `log.2020.04.20.`", + relation(mappingTable("log.2020.04.20."))); } @Test public void testIdentifierAsIndexNameStartWithDot() { assertEqual("source=.opensearch_dashboards", relation(".opensearch_dashboards")); + assertEqual("describe .opensearch_dashboards", + relation(mappingTable(".opensearch_dashboards"))); } @Test public void testIdentifierAsIndexNameWithDotInTheMiddle() { assertEqual("source=log.2020.10.10", relation("log.2020.10.10")); assertEqual("source=log-7.10-2020.10.10", relation("log-7.10-2020.10.10")); + assertEqual("describe log.2020.10.10", + relation(mappingTable("log.2020.10.10"))); + assertEqual("describe log-7.10-2020.10.10", + relation(mappingTable("log-7.10-2020.10.10"))); } @Test public void testIdentifierAsIndexNameWithSlashInTheMiddle() { assertEqual("source=log-2020", relation("log-2020")); + assertEqual("describe log-2020", + relation(mappingTable("log-2020"))); } @Test public void testIdentifierAsIndexNameContainStar() { assertEqual("source=log-2020-10-*", relation("log-2020-10-*")); + assertEqual("describe log-2020-10-*", + relation(mappingTable("log-2020-10-*"))); } @Test @@ -478,6 +493,12 @@ public void testIdentifierAsIndexNameContainStarAndDots() { assertEqual("source=log-2020.10.*", relation("log-2020.10.*")); assertEqual("source=log-2020.*.01", relation("log-2020.*.01")); assertEqual("source=log-2020.*.*", relation("log-2020.*.*")); + assertEqual("describe log-2020.10.*", + relation(mappingTable("log-2020.10.*"))); + assertEqual("describe log-2020.*.01", + relation(mappingTable("log-2020.*.01"))); + assertEqual("describe log-2020.*.*", + relation(mappingTable("log-2020.*.*"))); } @Test @@ -603,6 +624,18 @@ public void testKmeansCommandWithoutParameter() { new Kmeans(relation("t"), ImmutableMap.of())); } + @Test + public void testDescribeCommand() { + assertEqual("describe t", + relation(mappingTable("t"))); + } + + @Test + public void testDescribeCommandWithMultipleIndices() { + assertEqual("describe t,u", + relation(mappingTable("t,u"))); + } + @Test public void test_fitRCFADCommand_withoutDataFormat() { assertEqual("source=t | AD shingle_size=10 time_decay=0.0001 time_field='timestamp' "