ESQL: COALESCE function (elastic#98542)

This adds a `COALESCE` function that returns the first non-null value.
nik9000 · Aug 17, 2023 · 44e6134 · 44e6134
1 parent 805372c
commit 44e6134
Show file tree

Hide file tree

Showing 14 changed files with 532 additions and 68 deletions.
diff --git a/docs/reference/esql/esql-functions.asciidoc b/docs/reference/esql/esql-functions.asciidoc
@@ -16,6 +16,7 @@ these functions:
 * <<esql-auto_bucket>>
 * <<esql-case>>
 * <<esql-cidr_match>>
+* <<esql-coalesce>>
 * <<esql-concat>>
 * <<esql-cos>>
 * <<esql-cosh>>
@@ -73,6 +74,7 @@ include::functions/atan2.asciidoc[]
 include::functions/auto_bucket.asciidoc[]
 include::functions/case.asciidoc[]
 include::functions/cidr_match.asciidoc[]
+include::functions/coalesce.asciidoc[]
 include::functions/concat.asciidoc[]
 include::functions/cos.asciidoc[]
 include::functions/cosh.asciidoc[]

diff --git a/docs/reference/esql/esql-syntax.asciidoc b/docs/reference/esql/esql-syntax.asciidoc
@@ -120,15 +120,23 @@ The following boolean operators are supported:
 
 For NULL comparison use the `IS NULL` and `IS NOT NULL` predicates:
 
-[source,esql]
+[source.merge.styled,esql]
 ----
-include::{esql-specs}/conditional.csv-spec[tag=is-null]
+include::{esql-specs}/null.csv-spec[tag=is-null]
 ----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/null.csv-spec[tag=is-not-null-result]
+|===
 
-[source,esql]
+[source.merge.styled,esql]
 ----
-include::{esql-specs}/conditional.csv-spec[tag=is-not-null]
+include::{esql-specs}/null.csv-spec[tag=is-not-null]
 ----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/null.csv-spec[tag=is-not-null-result]
+|===
 
 [discrete]
 [[esql-timespan-literals]]

diff --git a/docs/reference/esql/functions/coalesce.asciidoc b/docs/reference/esql/functions/coalesce.asciidoc
@@ -0,0 +1,13 @@
+[[esql-coalesce]]
+=== `COALESCE`
+
+Returns the first non-null value.
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/null.csv-spec[tag=coalesce]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/null.csv-spec[tag=coalesce-result]
+|===
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/conditional.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/conditional.csv-spec
@@ -89,61 +89,3 @@ M              |10
 M              |10           
 M              |10           
 ;
-
-isNull
-from employees
-| where gender is null
-| sort first_name
-| keep first_name, gender
-| limit 3;
-
-first_name:keyword|gender:keyword
-Berni             |null
-Cristinel         |null
-Duangkaew         |null
-;
-
-notIsNull
-from employees
-| where gender is not null
-| sort first_name
-| keep first_name, gender
-| limit 3;
-
-first_name:keyword|gender:keyword
-Alejandro         |F
-Amabile           |M
-Anneke            |F
-;
-
-isNullForDocs
-// tag::is-null[]
-FROM employees
-| WHERE birth_date IS NULL
-| KEEP first_name, last_name
-| SORT first_name
-| LIMIT 3
-// end::is-null[]
-;
-
-// tag::is-null-result[]
-first_name:keyword|last_name:keyword   
-Basil             |Tramer         
-Florian           |Syrotiuk       
-Lucien            |Rosenbaum
-// end::is-null-result[]
-;
-
-isNotNullForDocs
-// tag::is-not-null[]
-FROM employees
-| WHERE is_rehired IS NOT NULL
-| STATS count(emp_no)
-// end::is-not-null[]
-;
-
-// tag::is-not-null-result[]
-count(emp_no):long 
-84
-// end::is-not-null-result[]
-;
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/null.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/null.csv-spec
@@ -0,0 +1,137 @@
+isNull
+from employees
+| where gender is null
+| sort first_name
+| keep first_name, gender
+| limit 3;
+
+first_name:keyword|gender:keyword
+Berni             |null
+Cristinel         |null
+Duangkaew         |null
+;
+
+notIsNull
+from employees
+| where gender is not null
+| sort first_name
+| keep first_name, gender
+| limit 3;
+
+first_name:keyword|gender:keyword
+Alejandro         |F
+Amabile           |M
+Anneke            |F
+;
+
+isNullForDocs
+// tag::is-null[]
+FROM employees
+| WHERE birth_date IS NULL
+| KEEP first_name, last_name
+| SORT first_name
+| LIMIT 3
+// end::is-null[]
+;
+
+// tag::is-null-result[]
+first_name:keyword|last_name:keyword   
+Basil             |Tramer         
+Florian           |Syrotiuk       
+Lucien            |Rosenbaum
+// end::is-null-result[]
+;
+
+isNotNullForDocs
+// tag::is-not-null[]
+FROM employees
+| WHERE is_rehired IS NOT NULL
+| STATS count(emp_no)
+// end::is-not-null[]
+;
+
+// tag::is-not-null-result[]
+count(emp_no):long 
+84
+// end::is-not-null-result[]
+;
+
+coalesceSimple
+// tag::coalesce[]
+ROW a=null, b="b"
+| EVAL COALESCE(a, b)
+// end::coalesce[]
+;
+
+// tag::coalesce-result[]
+a:null | b:keyword | COALESCE(a,b):keyword
+  null |         b | b
+// end::coalesce-result[]
+;
+
+coalesce
+FROM employees
+| EVAL first_name = COALESCE(first_name, "X")
+| SORT first_name DESC, emp_no ASC
+| KEEP emp_no, first_name
+| limit 10;
+
+emp_no:integer | first_name:keyword
+         10047 | Zvonko
+         10081 | Zhongwei
+         10026 | Yongqiao
+         10043 | Yishay
+         10050 | Yinghua
+         10087 | Xinglin
+         10030 | X
+         10031 | X
+         10032 | X
+         10033 | X
+;
+
+coalesceBackwards
+FROM employees
+| EVAL first_name = COALESCE("X", first_name)
+| SORT first_name DESC, emp_no ASC
+| KEEP emp_no, first_name
+| limit 10;
+
+emp_no:integer | first_name:keyword
+         10001 | X
+         10002 | X
+         10003 | X
+         10004 | X
+         10005 | X
+         10006 | X
+         10007 | X
+         10008 | X
+         10009 | X
+         10010 | X
+;
+
+coalesceEndsInNull
+# ending in null is sill because it'll noop but it shouldn't break things.
+FROM employees
+| EVAL first_name = COALESCE(first_name, last_name, null)
+| SORT first_name DESC, emp_no ASC
+| KEEP emp_no, first_name
+| limit 3;
+
+emp_no:integer | first_name:keyword
+         10047 | Zvonko
+         10081 | Zhongwei
+         10026 | Yongqiao
+;
+
+coalesceFolding
+FROM employees
+| EVAL foo=COALESCE(true, false, null)
+| SORT emp_no ASC
+| KEEP emp_no, first_name, foo
+| limit 3;
+
+emp_no:integer | first_name:keyword | foo:boolean
+         10001 | Georgi             | true
+         10002 | Bezalel            | true
+         10003 | Parto              | true
+;
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/show.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/show.csv-spec
@@ -18,6 +18,7 @@ auto_bucket              |auto_bucket(arg1, arg2, arg3, arg4)
 avg                      |avg(arg1)
 case                     |case(arg1...)
 cidr_match               |cidr_match(arg1, arg2...)
+coalesce                 |coalesce(arg1...)
 concat                   |concat(arg1, arg2...)
 cos                      |cos(arg1)
 cosh                     |cosh(arg1)

diff --git a/.../src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/.../src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java
@@ -65,6 +65,7 @@
 import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvMedian;
 import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvMin;
 import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvSum;
+import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.LTrim;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.Length;
@@ -144,6 +145,8 @@ private FunctionDefinition[][] functions() {
                 def(Now.class, Now::new, "now") },
             // conditional
             new FunctionDefinition[] { def(Case.class, Case::new, "case") },
+            // null
+            new FunctionDefinition[] { def(Coalesce.class, Coalesce::new, "coalesce"), },
             // IP
             new FunctionDefinition[] { def(CIDRMatch.class, CIDRMatch::new, "cidr_match") },
             // conversion functions

diff --git a/...c/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Case.java b/...c/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Case.java
@@ -181,7 +181,15 @@ private record CaseEvaluator(ElementType resultType, List<ConditionEvaluator> co
             EvalOperator.ExpressionEvaluator {
         @Override
         public Block eval(Page page) {
-            // Evaluate row at a time for now because its simpler. Much slower. But simpler.
+            /*
+             * We have to evaluate lazily so any errors or warnings that would be
+             * produced by the right hand side are avoided. And so if anything
+             * on the right hand side is slow we skip it.
+             *
+             * And it'd be good if that lazy evaluation were fast. But this
+             * implementation isn't. It's fairly simple - running position at
+             * a time - but it's not at all fast.
+             */
             int positionCount = page.getPositionCount();
             Block.Builder result = resultType.newBlockBuilder(positionCount);
             position: for (int p = 0; p < positionCount; p++) {