Fix NaN-handling for vectorized aggregation

The vector agg functions didn't handle NaN-floats properly for min/max functions, which produced the wrong `min()` output when NaN values were presents (e.g., picking NaN over -Inf). NaN-checks are different for min and max so the checks are moved to the predicate macro instead of being defined in the template function. The previously erroneous handling of NaN is evident by some of the changes in the test output. However, some queries didn't run any actual vectorized agg plans when they should have, thus "accidentally" producing the correct min result in the test file. In those cases, instead of the vectorized plan, the test ran an init plan doing a sort with a limit of 1 to find the min. Disabling sort in the test ensures the plan is vectorized, and thus producing the erroneous result when the fix is not present.
timescale · Jan 10, 2025 · 3170014 · 3170014
1 parent a03b43a
commit 3170014
Show file tree

Hide file tree

Showing 5 changed files with 19 additions and 7 deletions.
diff --git a/.unreleased/pr_7584 b/.unreleased/pr_7584
@@ -0,0 +1 @@
+Fixes: #7584 Fix NaN-handling for vectorized aggregation
diff --git a/tsl/src/nodes/vector_agg/function/minmax_arithmetic_single.c b/tsl/src/nodes/vector_agg/function/minmax_arithmetic_single.c
@@ -26,8 +26,7 @@ FUNCTION_NAME(vector_impl)(void *agg_state, int n, const CTYPE *values, const ui
 		 * Note that we have to properly handle NaNs and Infinities for floats.
 		 */
 		const bool do_replace =
-			new_value_ok && (unlikely(!outer_isvalid) || PREDICATE(outer_result, new_value) ||
-							 isnan((double) new_value));
+			new_value_ok && (unlikely(!outer_isvalid) || PREDICATE(outer_result, new_value));
 
 		outer_result = do_replace ? new_value : outer_result;
 		outer_isvalid = outer_isvalid || do_replace;

diff --git a/tsl/src/nodes/vector_agg/function/minmax_templates.c b/tsl/src/nodes/vector_agg/function/minmax_templates.c
@@ -47,13 +47,17 @@ minmax_emit(void *agg_state, Datum *out_result, bool *out_isnull)
 
 /*
  * Templated parts for vectorized min(), max().
+ *
+ * NaN handled similar to equivalent PG functions.
  */
 #define AGG_NAME MIN
-#define PREDICATE(CURRENT, NEW) ((CURRENT) > (NEW))
+#define PREDICATE(CURRENT, NEW)                                                                    \
+	(unlikely(!isnan((double) (NEW))) && (isnan((double) (CURRENT)) || (CURRENT) > (NEW)))
 #include "minmax_arithmetic_types.c"
 
 #define AGG_NAME MAX
-#define PREDICATE(CURRENT, NEW) ((CURRENT) < (NEW))
+#define PREDICATE(CURRENT, NEW)                                                                    \
+	(unlikely(!isnan((double) (CURRENT))) && (isnan((double) (NEW)) || (CURRENT) < (NEW)))
 #include "minmax_arithmetic_types.c"
 
 #undef AGG_NAME
diff --git a/tsl/test/expected/vector_agg_functions.out b/tsl/test/expected/vector_agg_functions.out
@@ -116,6 +116,10 @@ set timescaledb.debug_require_vector_agg = :'guc_value';
 ---- on float4 due to different numeric stability in our and PG implementations.
 --set timescaledb.enable_chunkwise_aggregation to off; set timescaledb.enable_vectorized_aggregation to off; set timescaledb.debug_require_vector_agg = 'forbid';
 set max_parallel_workers_per_gather = 0;
+-- Disable sorting to force vectorized agg plans for min and max,
+-- which otherwise can produce a non-vectorized init-plan that does a
+-- sort with limit 1.
+set enable_sort = false;
 select
     format('%sselect %s%s(%s) from aggfns%s%s%s;',
             explain,
@@ -378,13 +382,13 @@ select ss, min(cfloat4) from aggfns group by ss order by min(cfloat4), ss limit
   3 | -Infinity
   4 |  -49.9999
   6 |  -49.9995
+ 11 |  -49.9991
   7 |  -49.9984
   8 |  -49.9969
   0 |  -49.9949
   5 |  -49.9942
   9 |  -49.9911
     |  -45.4083
- 11 |       NaN
 (10 rows)
 
 select stddev(cfloat4) from aggfns;
@@ -1997,14 +2001,14 @@ select ss, min(cfloat4) from aggfns where cfloat8 > 0 group by ss order by min(c
 ----+-----------
   3 | -Infinity
   4 |  -49.9993
+ 11 |  -49.9974
   8 |  -49.9969
   7 |  -49.9969
   0 |  -49.9915
   9 |  -49.9911
   5 |  -49.9892
   6 |  -49.9891
     |  -41.6131
- 11 |       NaN
 (10 rows)
 
 select stddev(cfloat4) from aggfns where cfloat8 > 0;
@@ -5193,13 +5197,13 @@ select ss, min(cfloat4) from aggfns where cfloat8 < 1000 group by ss order by mi
   3 | -Infinity
   4 |  -49.9999
   6 |  -49.9995
+ 11 |  -49.9991
   7 |  -49.9984
   8 |  -49.9969
   0 |  -49.9949
   5 |  -49.9942
   9 |  -49.9911
     |  -45.4083
- 11 |       NaN
 (10 rows)
 
 select stddev(cfloat4) from aggfns where cfloat8 < 1000;

diff --git a/tsl/test/sql/vector_agg_functions.sql b/tsl/test/sql/vector_agg_functions.sql
@@ -101,6 +101,10 @@ set timescaledb.debug_require_vector_agg = :'guc_value';
 --set timescaledb.enable_chunkwise_aggregation to off; set timescaledb.enable_vectorized_aggregation to off; set timescaledb.debug_require_vector_agg = 'forbid';
 
 set max_parallel_workers_per_gather = 0;
+-- Disable sorting to force vectorized agg plans for min and max,
+-- which otherwise can produce a non-vectorized init-plan that does a
+-- sort with limit 1.
+set enable_sort = false;
 
 select
     format('%sselect %s%s(%s) from aggfns%s%s%s;',
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Fixes: #7584 Fix NaN-handling for vectorized aggregation