ESQL: Disable optimizations with bad null handling (elastic#99434)

* ESQL: Disable optimizations with bad null handling We have optimizations that kick in when aggregating on the following pairs of field types: * `long`, `long` * `keyword`, `long` * `long`, `keyword` These optimizations don't have proper support for `null` valued fields but will grow that after elastic#98749. In the mean time this disables them in a way that prevents them from bit-rotting. * Update docs/changelog/99434.yaml
nik9000 · Sep 11, 2023 · f9107e3 · f9107e3
1 parent 6ab6b23
commit f9107e3
Show file tree

Hide file tree

Showing 10 changed files with 79 additions and 41 deletions.
diff --git a/...marks/src/main/java/org/elasticsearch/benchmark/compute/operator/AggregatorBenchmark.java b/...marks/src/main/java/org/elasticsearch/benchmark/compute/operator/AggregatorBenchmark.java
@@ -141,7 +141,7 @@ private static Operator operator(String grouping, String op, String dataType) {
         };
         return new HashAggregationOperator(
             List.of(supplier(op, dataType, groups.size()).groupingAggregatorFactory(AggregatorMode.SINGLE)),
-            () -> BlockHash.build(groups, BIG_ARRAYS, 16 * 1024),
+            () -> BlockHash.build(groups, BIG_ARRAYS, 16 * 1024, false),
             new DriverContext()
         );
     }

diff --git a/docs/changelog/99434.yaml b/docs/changelog/99434.yaml
@@ -0,0 +1,5 @@
+pr: 99434
+summary: "ESQL: Disable optimizations with bad null handling"
+area: ES|QL
+type: bug
+issues: []
diff --git a/...esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java b/...esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java
@@ -62,12 +62,21 @@ public abstract sealed class BlockHash implements Releasable, SeenGroupIds //
      * Creates a specialized hash table that maps one or more {@link Block}s to ids.
      * @param emitBatchSize maximum batch size to be emitted when handling combinatorial
      *                      explosion of groups caused by multivalued fields
+     * @param allowBrokenOptimizations true ot allow optimizations with bad null handling. We will fix their
+     *                                 null handling and remove this flag, but we need to disable these in
+     *                                 production until we can. And this lets us continue to compile and
+     *                                 test them.
      */
-    public static BlockHash build(List<HashAggregationOperator.GroupSpec> groups, BigArrays bigArrays, int emitBatchSize) {
+    public static BlockHash build(
+        List<HashAggregationOperator.GroupSpec> groups,
+        BigArrays bigArrays,
+        int emitBatchSize,
+        boolean allowBrokenOptimizations
+    ) {
         if (groups.size() == 1) {
             return newForElementType(groups.get(0).channel(), groups.get(0).elementType(), bigArrays);
         }
-        if (groups.size() == 2) {
+        if (allowBrokenOptimizations && groups.size() == 2) {
             var g1 = groups.get(0);
             var g2 = groups.get(1);
             if (g1.elementType() == ElementType.LONG && g2.elementType() == ElementType.LONG) {

diff --git a/...sql/compute/src/main/java/org/elasticsearch/compute/operator/HashAggregationOperator.java b/...sql/compute/src/main/java/org/elasticsearch/compute/operator/HashAggregationOperator.java
@@ -39,7 +39,7 @@ public record HashAggregationOperatorFactory(
     ) implements OperatorFactory {
         @Override
         public Operator get(DriverContext driverContext) {
-            return new HashAggregationOperator(aggregators, () -> BlockHash.build(groups, bigArrays, maxPageSize), driverContext);
+            return new HashAggregationOperator(aggregators, () -> BlockHash.build(groups, bigArrays, maxPageSize, false), driverContext);
         }
 
         @Override

diff --git a/...ql/compute/src/main/java/org/elasticsearch/compute/operator/OrdinalsGroupingOperator.java b/...ql/compute/src/main/java/org/elasticsearch/compute/operator/OrdinalsGroupingOperator.java
@@ -422,7 +422,7 @@ private static class ValuesAggregator implements Releasable {
             this.extractor = new ValuesSourceReaderOperator(sources, docChannel, groupingField);
             this.aggregator = new HashAggregationOperator(
                 aggregatorFactories,
-                () -> BlockHash.build(List.of(new GroupSpec(channelIndex, sources.get(0).elementType())), bigArrays, maxPageSize),
+                () -> BlockHash.build(List.of(new GroupSpec(channelIndex, sources.get(0).elementType())), bigArrays, maxPageSize, false),
                 driverContext
             );
         }

diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/OperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/OperatorTests.java
@@ -247,7 +247,8 @@ public String toString() {
                             () -> BlockHash.build(
                                 List.of(new HashAggregationOperator.GroupSpec(0, ElementType.BYTES_REF)),
                                 bigArrays,
-                                randomPageSize()
+                                randomPageSize(),
+                                false
                             ),
                             driverContext
                         )

diff --git a/...c/test/java/org/elasticsearch/compute/aggregation/blockhash/BlockHashRandomizedTests.java b/...c/test/java/org/elasticsearch/compute/aggregation/blockhash/BlockHashRandomizedTests.java
@@ -168,7 +168,7 @@ private BlockHash newBlockHash(int emitBatchSize, List<ElementType> types) {
         MockBigArrays bigArrays = new MockBigArrays(PageCacheRecycler.NON_RECYCLING_INSTANCE, new NoneCircuitBreakerService());
         return forcePackedHash
             ? new PackedValuesBlockHash(specs, bigArrays, emitBatchSize)
-            : BlockHash.build(specs, bigArrays, emitBatchSize);
+            : BlockHash.build(specs, bigArrays, emitBatchSize, true);
     }
 
     private static class KeyComparator implements Comparator<List<?>> {

diff --git a/...compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/BlockHashTests.java b/...compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/BlockHashTests.java
@@ -1032,7 +1032,7 @@ private void hash(Consumer<OrdsAndKeys> callback, int emitBatchSize, Block... va
         try (
             BlockHash blockHash = forcePackedHash
                 ? new PackedValuesBlockHash(specs, bigArrays, emitBatchSize)
-                : BlockHash.build(specs, bigArrays, emitBatchSize)
+                : BlockHash.build(specs, bigArrays, emitBatchSize, true)
         ) {
             hash(true, blockHash, callback, values);
         }

diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec
@@ -290,23 +290,35 @@ null
 ;
 
 byStringAndLong
-from employees | eval trunk_worked_seconds = avg_worked_seconds / 100000000 * 100000000 | stats c = count(gender) by gender, trunk_worked_seconds | sort c desc;
+FROM employees
+| EVAL trunk_worked_seconds = avg_worked_seconds / 100000000 * 100000000
+| STATS c = COUNT(gender) by gender, trunk_worked_seconds
+| SORT c desc;
 
 c:long | gender:keyword | trunk_worked_seconds:long
-30 | M | 300000000
-27 | M | 200000000
-22 | F | 300000000
-11 | F | 200000000
+30     | M              | 300000000
+27     | M              | 200000000
+22     | F              | 300000000
+11     | F              | 200000000
+ 0     | null           | 200000000
+ 0     | null           | 300000000
 ;
 
 byStringAndLongWithAlias
-from employees | eval trunk_worked_seconds = avg_worked_seconds / 100000000 * 100000000 | rename  gender as g, trunk_worked_seconds as tws | keep g, tws | stats c = count(g) by g, tws | sort c desc;
+FROM employees
+| EVAL trunk_worked_seconds = avg_worked_seconds / 100000000 * 100000000
+| RENAME  gender as g, trunk_worked_seconds as tws
+| KEEP g, tws
+| STATS c = count(g) by g, tws
+| SORT c desc;
 
 c:long | g:keyword | tws:long
-30 | M | 300000000
-27 | M | 200000000
-22 | F | 300000000
-11 | F | 200000000
+30     | M         | 300000000
+27     | M         | 200000000
+22     | F         | 300000000
+11     | F         | 200000000
+ 0     | null      | 200000000
+ 0     | null      | 300000000
 ;
 
 byStringAndString
@@ -324,35 +336,45 @@ c:long | gender:keyword | hire_year_str:keyword
 ;
 
 byLongAndLong
-from employees | eval trunk_worked_seconds = avg_worked_seconds / 100000000 * 100000000 | stats c = count(languages.long) by languages.long, trunk_worked_seconds | sort c desc, languages.long, trunk_worked_seconds;
+FROM employees
+| EVAL trunk_worked_seconds = avg_worked_seconds / 100000000 * 100000000
+| STATS c = COUNT(languages.long) BY languages.long, trunk_worked_seconds
+| SORT c DESC, languages.long, trunk_worked_seconds;
 
 c:long         | languages.long:long | trunk_worked_seconds:long
-15             |5                    |300000000           
-11             |2                    |300000000           
-10             |4                    |300000000           
-9              |3                    |200000000           
-8              |1                    |200000000           
-8              |2                    |200000000           
-8              |3                    |300000000           
-8              |4                    |200000000           
-7              |1                    |300000000           
-6              |5                    |200000000           
+15             |5                    |300000000
+11             |2                    |300000000
+10             |4                    |300000000
+9              |3                    |200000000
+8              |1                    |200000000
+8              |2                    |200000000
+8              |3                    |300000000
+8              |4                    |200000000
+7              |1                    |300000000
+6              |5                    |200000000
+0              |null                 |200000000
+0              |null                 |300000000
 ;
 
 byUnmentionedLongAndLong
-from employees | eval trunk_worked_seconds = avg_worked_seconds / 100000000 * 100000000 | stats c = count(gender) by languages.long, trunk_worked_seconds | sort c desc, trunk_worked_seconds;
+FROM employees
+| EVAL trunk_worked_seconds = avg_worked_seconds / 100000000 * 100000000
+| STATS c = count(gender) by languages.long, trunk_worked_seconds
+| SORT c desc, trunk_worked_seconds;
 
 c:long | languages.long:long | trunk_worked_seconds:long
-13             |5              |300000000           
-10             |2              |300000000           
-9              |3              |200000000           
-9              |4              |300000000           
-8              |4              |200000000           
-8              |3              |300000000           
-7              |1              |200000000           
-6              |2              |200000000           
-6              |1              |300000000           
-4              |5              |200000000   
+13     |5                    |300000000
+10     |2                    |300000000
+ 9     |3                    |200000000
+ 9     |4                    |300000000
+ 8     |4                    |200000000
+ 8     |3                    |300000000
+ 7     |1                    |200000000
+ 6     |2                    |200000000
+ 6     |null                 |300000000
+ 6     |1                    |300000000
+ 4     |null                 |200000000
+ 4     |5                    |200000000
 ;
 
 byUnmentionedIntAndLong

diff --git a/...ql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java b/...ql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java
@@ -256,7 +256,8 @@ public Operator get(DriverContext driverContext) {
                 () -> BlockHash.build(
                     List.of(new HashAggregationOperator.GroupSpec(groupByChannel, groupElementType)),
                     bigArrays,
-                    pageSize
+                    pageSize,
+                    false
                 ),
                 columnName,
                 driverContext