Skip to content

Commit

Permalink
Fix groupby pathkeys for gapfill in PG16
Browse files Browse the repository at this point in the history
With PG16, group pathkeys can include column which are
not in the GROUP BY clause when dealing with ordered
aggregates. This means we have to exclude those columns
when checking and creating the gapfill custom scan subpath.

Fixes #6396

(cherry picked from commit 0e2ff92)
  • Loading branch information
antekresic committed Dec 18, 2023
1 parent bb870c9 commit a671faf
Show file tree
Hide file tree
Showing 7 changed files with 289 additions and 10 deletions.
2 changes: 2 additions & 0 deletions .unreleased/fix_6408
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fixes: #6408 Fix groupby pathkeys for gapfill in PG16
Thanks: @MA-MacDonald for reporting an issue with gapfill in PG16
39 changes: 29 additions & 10 deletions tsl/src/nodes/gapfill/gapfill_plan.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,18 @@ window_function_walker(Node *node, gapfill_walker_context *context)
static bool
gapfill_correct_order(PlannerInfo *root, Path *subpath, FuncExpr *func)
{
if (list_length(subpath->pathkeys) != list_length(root->group_pathkeys))
int num_groupby_pathkeys;
#if PG16_LT
num_groupby_pathkeys = list_length(root->group_pathkeys);
#else
/* In PG16 group_pathkeys can contain additional pathkeys
* used for optimization on ordered aggregates.
* We only want to deal with group by elements only here.
*/
num_groupby_pathkeys = root->num_groupby_pathkeys;
#endif

if (list_length(subpath->pathkeys) != num_groupby_pathkeys)
return false;

if (list_length(subpath->pathkeys) > 0)
Expand All @@ -147,12 +158,11 @@ gapfill_correct_order(PlannerInfo *root, Path *subpath, FuncExpr *func)
if (BTLessStrategyNumber == pk->pk_strategy && IsA(em->em_expr, FuncExpr) &&
((FuncExpr *) em->em_expr)->funcid == func->funcid)
{
ListCell *lc;

/* check all groups are part of subpath pathkeys */
foreach (lc, root->group_pathkeys)
int i;
/* check all groupby pathkeys are part of subpath pathkeys */
for (i = 0; i < num_groupby_pathkeys; i++)
{
if (!list_member(subpath->pathkeys, lfirst(lc)))
if (!list_member(subpath->pathkeys, list_nth(root->group_pathkeys, i)))
return false;
}
return true;
Expand Down Expand Up @@ -358,13 +368,22 @@ gapfill_path_create(PlannerInfo *root, Path *subpath, FuncExpr *func)
if (!gapfill_correct_order(root, subpath, func))
{
List *new_order = NIL;
ListCell *lc;
PathKey *pk_func = NULL;

int num_groupby_pathkeys;
#if PG16_LT
num_groupby_pathkeys = list_length(root->group_pathkeys);
#else
/* In PG16 group_pathkeys can contain additional pathkeys
* used for optimization on ordered aggregates.
* We only want to deal with group by elements only here.
*/
num_groupby_pathkeys = root->num_groupby_pathkeys;
#endif
int i;
/* subpath does not have correct order */
foreach (lc, root->group_pathkeys)
for (i = 0; i < num_groupby_pathkeys; i++)
{
PathKey *pk = lfirst(lc);
PathKey *pk = list_nth(root->group_pathkeys, i);
EquivalenceMember *em = linitial(pk->pk_eclass->ec_members);

if (!pk_func && IsA(em->em_expr, FuncExpr) &&
Expand Down
56 changes: 56 additions & 0 deletions tsl/test/shared/expected/gapfill-13.out
Original file line number Diff line number Diff line change
Expand Up @@ -3392,3 +3392,59 @@ GROUP BY 1;
(7 rows)

DROP TABLE month_timezone;
-- Test gapfill with additional group pathkeys added for optimization (#6396)
CREATE TABLE stocks_real_time (
time TIMESTAMPTZ NOT NULL,
symbol TEXT NOT NULL,
price DOUBLE PRECISION NULL,
day_volume INT NULL
);
INSERT INTO stocks_real_time
VALUES
(NOW(), 's1', 70.0, 50),
(NOW(), 's2', 66.5, 60),
(NOW(), 's3', 77.0, 65);
SELECT
time_bucket_gapfill('1 day', time) AS day,
COUNT(DISTINCT symbol) AS symbol_count
FROM stocks_real_time
WHERE time > '2023-12-01'
AND time < '2023-12-10'
GROUP BY 1;
day | symbol_count
------------------------------+--------------
Thu Nov 30 16:00:00 2023 PST |
Fri Dec 01 16:00:00 2023 PST |
Sat Dec 02 16:00:00 2023 PST |
Sun Dec 03 16:00:00 2023 PST |
Mon Dec 04 16:00:00 2023 PST |
Tue Dec 05 16:00:00 2023 PST |
Wed Dec 06 16:00:00 2023 PST |
Thu Dec 07 16:00:00 2023 PST |
Fri Dec 08 16:00:00 2023 PST |
Sat Dec 09 16:00:00 2023 PST |
(10 rows)

SELECT
time_bucket_gapfill('1 day', time) AS day,
COUNT(DISTINCT symbol) AS symbol_count
FROM stocks_real_time
WHERE time > '2023-12-01'
AND time < '2023-12-10'
GROUP BY 1
ORDER BY 1 DESC;
day | symbol_count
------------------------------+--------------
Sat Dec 09 16:00:00 2023 PST |
Fri Dec 08 16:00:00 2023 PST |
Thu Dec 07 16:00:00 2023 PST |
Wed Dec 06 16:00:00 2023 PST |
Tue Dec 05 16:00:00 2023 PST |
Mon Dec 04 16:00:00 2023 PST |
Sun Dec 03 16:00:00 2023 PST |
Sat Dec 02 16:00:00 2023 PST |
Fri Dec 01 16:00:00 2023 PST |
Thu Nov 30 16:00:00 2023 PST |
(10 rows)

DROP TABLE stocks_real_time;
56 changes: 56 additions & 0 deletions tsl/test/shared/expected/gapfill-14.out
Original file line number Diff line number Diff line change
Expand Up @@ -3392,3 +3392,59 @@ GROUP BY 1;
(7 rows)

DROP TABLE month_timezone;
-- Test gapfill with additional group pathkeys added for optimization (#6396)
CREATE TABLE stocks_real_time (
time TIMESTAMPTZ NOT NULL,
symbol TEXT NOT NULL,
price DOUBLE PRECISION NULL,
day_volume INT NULL
);
INSERT INTO stocks_real_time
VALUES
(NOW(), 's1', 70.0, 50),
(NOW(), 's2', 66.5, 60),
(NOW(), 's3', 77.0, 65);
SELECT
time_bucket_gapfill('1 day', time) AS day,
COUNT(DISTINCT symbol) AS symbol_count
FROM stocks_real_time
WHERE time > '2023-12-01'
AND time < '2023-12-10'
GROUP BY 1;
day | symbol_count
------------------------------+--------------
Thu Nov 30 16:00:00 2023 PST |
Fri Dec 01 16:00:00 2023 PST |
Sat Dec 02 16:00:00 2023 PST |
Sun Dec 03 16:00:00 2023 PST |
Mon Dec 04 16:00:00 2023 PST |
Tue Dec 05 16:00:00 2023 PST |
Wed Dec 06 16:00:00 2023 PST |
Thu Dec 07 16:00:00 2023 PST |
Fri Dec 08 16:00:00 2023 PST |
Sat Dec 09 16:00:00 2023 PST |
(10 rows)

SELECT
time_bucket_gapfill('1 day', time) AS day,
COUNT(DISTINCT symbol) AS symbol_count
FROM stocks_real_time
WHERE time > '2023-12-01'
AND time < '2023-12-10'
GROUP BY 1
ORDER BY 1 DESC;
day | symbol_count
------------------------------+--------------
Sat Dec 09 16:00:00 2023 PST |
Fri Dec 08 16:00:00 2023 PST |
Thu Dec 07 16:00:00 2023 PST |
Wed Dec 06 16:00:00 2023 PST |
Tue Dec 05 16:00:00 2023 PST |
Mon Dec 04 16:00:00 2023 PST |
Sun Dec 03 16:00:00 2023 PST |
Sat Dec 02 16:00:00 2023 PST |
Fri Dec 01 16:00:00 2023 PST |
Thu Nov 30 16:00:00 2023 PST |
(10 rows)

DROP TABLE stocks_real_time;
56 changes: 56 additions & 0 deletions tsl/test/shared/expected/gapfill-15.out
Original file line number Diff line number Diff line change
Expand Up @@ -3392,3 +3392,59 @@ GROUP BY 1;
(7 rows)

DROP TABLE month_timezone;
-- Test gapfill with additional group pathkeys added for optimization (#6396)
CREATE TABLE stocks_real_time (
time TIMESTAMPTZ NOT NULL,
symbol TEXT NOT NULL,
price DOUBLE PRECISION NULL,
day_volume INT NULL
);
INSERT INTO stocks_real_time
VALUES
(NOW(), 's1', 70.0, 50),
(NOW(), 's2', 66.5, 60),
(NOW(), 's3', 77.0, 65);
SELECT
time_bucket_gapfill('1 day', time) AS day,
COUNT(DISTINCT symbol) AS symbol_count
FROM stocks_real_time
WHERE time > '2023-12-01'
AND time < '2023-12-10'
GROUP BY 1;
day | symbol_count
------------------------------+--------------
Thu Nov 30 16:00:00 2023 PST |
Fri Dec 01 16:00:00 2023 PST |
Sat Dec 02 16:00:00 2023 PST |
Sun Dec 03 16:00:00 2023 PST |
Mon Dec 04 16:00:00 2023 PST |
Tue Dec 05 16:00:00 2023 PST |
Wed Dec 06 16:00:00 2023 PST |
Thu Dec 07 16:00:00 2023 PST |
Fri Dec 08 16:00:00 2023 PST |
Sat Dec 09 16:00:00 2023 PST |
(10 rows)

SELECT
time_bucket_gapfill('1 day', time) AS day,
COUNT(DISTINCT symbol) AS symbol_count
FROM stocks_real_time
WHERE time > '2023-12-01'
AND time < '2023-12-10'
GROUP BY 1
ORDER BY 1 DESC;
day | symbol_count
------------------------------+--------------
Sat Dec 09 16:00:00 2023 PST |
Fri Dec 08 16:00:00 2023 PST |
Thu Dec 07 16:00:00 2023 PST |
Wed Dec 06 16:00:00 2023 PST |
Tue Dec 05 16:00:00 2023 PST |
Mon Dec 04 16:00:00 2023 PST |
Sun Dec 03 16:00:00 2023 PST |
Sat Dec 02 16:00:00 2023 PST |
Fri Dec 01 16:00:00 2023 PST |
Thu Nov 30 16:00:00 2023 PST |
(10 rows)

DROP TABLE stocks_real_time;
56 changes: 56 additions & 0 deletions tsl/test/shared/expected/gapfill-16.out
Original file line number Diff line number Diff line change
Expand Up @@ -3394,3 +3394,59 @@ GROUP BY 1;
(7 rows)

DROP TABLE month_timezone;
-- Test gapfill with additional group pathkeys added for optimization (#6396)
CREATE TABLE stocks_real_time (
time TIMESTAMPTZ NOT NULL,
symbol TEXT NOT NULL,
price DOUBLE PRECISION NULL,
day_volume INT NULL
);
INSERT INTO stocks_real_time
VALUES
(NOW(), 's1', 70.0, 50),
(NOW(), 's2', 66.5, 60),
(NOW(), 's3', 77.0, 65);
SELECT
time_bucket_gapfill('1 day', time) AS day,
COUNT(DISTINCT symbol) AS symbol_count
FROM stocks_real_time
WHERE time > '2023-12-01'
AND time < '2023-12-10'
GROUP BY 1;
day | symbol_count
------------------------------+--------------
Thu Nov 30 16:00:00 2023 PST |
Fri Dec 01 16:00:00 2023 PST |
Sat Dec 02 16:00:00 2023 PST |
Sun Dec 03 16:00:00 2023 PST |
Mon Dec 04 16:00:00 2023 PST |
Tue Dec 05 16:00:00 2023 PST |
Wed Dec 06 16:00:00 2023 PST |
Thu Dec 07 16:00:00 2023 PST |
Fri Dec 08 16:00:00 2023 PST |
Sat Dec 09 16:00:00 2023 PST |
(10 rows)

SELECT
time_bucket_gapfill('1 day', time) AS day,
COUNT(DISTINCT symbol) AS symbol_count
FROM stocks_real_time
WHERE time > '2023-12-01'
AND time < '2023-12-10'
GROUP BY 1
ORDER BY 1 DESC;
day | symbol_count
------------------------------+--------------
Sat Dec 09 16:00:00 2023 PST |
Fri Dec 08 16:00:00 2023 PST |
Thu Dec 07 16:00:00 2023 PST |
Wed Dec 06 16:00:00 2023 PST |
Tue Dec 05 16:00:00 2023 PST |
Mon Dec 04 16:00:00 2023 PST |
Sun Dec 03 16:00:00 2023 PST |
Sat Dec 02 16:00:00 2023 PST |
Fri Dec 01 16:00:00 2023 PST |
Thu Nov 30 16:00:00 2023 PST |
(10 rows)

DROP TABLE stocks_real_time;
34 changes: 34 additions & 0 deletions tsl/test/shared/sql/gapfill.sql.in
Original file line number Diff line number Diff line change
Expand Up @@ -1537,3 +1537,37 @@ FROM
GROUP BY 1;

DROP TABLE month_timezone;

-- Test gapfill with additional group pathkeys added for optimization (#6396)
CREATE TABLE stocks_real_time (
time TIMESTAMPTZ NOT NULL,
symbol TEXT NOT NULL,
price DOUBLE PRECISION NULL,
day_volume INT NULL
);

INSERT INTO stocks_real_time
VALUES
(NOW(), 's1', 70.0, 50),
(NOW(), 's2', 66.5, 60),
(NOW(), 's3', 77.0, 65);

SELECT
time_bucket_gapfill('1 day', time) AS day,
COUNT(DISTINCT symbol) AS symbol_count
FROM stocks_real_time
WHERE time > '2023-12-01'
AND time < '2023-12-10'
GROUP BY 1;

SELECT
time_bucket_gapfill('1 day', time) AS day,
COUNT(DISTINCT symbol) AS symbol_count
FROM stocks_real_time
WHERE time > '2023-12-01'
AND time < '2023-12-10'
GROUP BY 1
ORDER BY 1 DESC;

DROP TABLE stocks_real_time;

0 comments on commit a671faf

Please sign in to comment.