From 234c8cff4c52bb1c7e1a60b8529de11ace666dc1 Mon Sep 17 00:00:00 2001 From: Jeevan Chalke Date: Tue, 29 Nov 2022 19:38:37 +0530 Subject: [PATCH] Check the shippability of sort clauses properly. We were pushing the ORDER BY clauses to the remote side without verifying whether the sort operator was safe to ship. It resulted in a wrong output when the sort operator isn't default for the sort expression's type. Fix the same. FDW-564, Vaibhav Dalvi, reviewed by Suraj Kharage and Jeevan Chalke. --- deparse.c | 70 +++++++++++++++++++++ expected/pushdown.out | 62 +++++++++++++++++++ expected/pushdown_1.out | 58 +++++++++++++++++ mongo_fdw.c | 134 ++++++++++++++++++++++++++++------------ mongo_fdw.h | 8 +++ sql/pushdown.sql | 43 +++++++++++++ 6 files changed, 336 insertions(+), 39 deletions(-) diff --git a/deparse.c b/deparse.c index 9bb3f3c..7ab3c0d 100644 --- a/deparse.c +++ b/deparse.c @@ -29,6 +29,7 @@ #include "mongo.h" #endif #include "mongo_query.h" +#include "nodes/nodeFuncs.h" #if PG_VERSION_NUM < 120000 #include "nodes/relation.h" #include "optimizer/var.h" @@ -623,3 +624,72 @@ mongo_add_null_check(Var *column, BSON *expr, pipeline_cxt *context) bsonAppendNull(&ne_expr, "1"); bsonAppendFinishArray(expr, &ne_expr); } + +/* + * mongo_is_foreign_pathkey + * Returns true if it's safe to push down the sort expression described by + * 'pathkey' to the foreign server. + */ +bool +mongo_is_foreign_pathkey(PlannerInfo *root, RelOptInfo *baserel, + PathKey *pathkey) +{ + EquivalenceMember *em; + EquivalenceClass *pathkey_ec = pathkey->pk_eclass; + Expr *em_expr; + + /* + * mongo_is_foreign_expr would detect volatile expressions as well, + * but checking ec_has_volatile here saves some cycles. + */ + if (pathkey_ec->ec_has_volatile) + return false; + + /* can push if a suitable EC member exists */ + if (!(em = mongo_find_em_for_rel(root, pathkey_ec, baserel))) + return false; + + /* Ignore binary-compatible relabeling */ + em_expr = em->em_expr; + while (em_expr && IsA(em_expr, RelabelType)) + em_expr = ((RelabelType *) em_expr)->arg; + + /* Only Vars are allowed per MongoDB. */ + if (!IsA(em_expr, Var)) + return false; + + /* Check for sort operator pushability. */ + if (!mongo_is_default_sort_operator(em, pathkey)) + return false; + + return true; +} + +/* + * mongo_is_builtin + * Return true if given object is one of PostgreSQL's built-in objects. + * + * We use FirstBootstrapObjectId as the cutoff, so that we only consider + * objects with hand-assigned OIDs to be "built in", not for instance any + * function or type defined in the information_schema. + * + * Our constraints for dealing with types are tighter than they are for + * functions or operators: we want to accept only types that are in pg_catalog, + * else format_type might incorrectly fail to schema-qualify their names. + * (This could be fixed with some changes to format_type, but for now there's + * no need.) Thus we must exclude information_schema types. + * + * XXX there is a problem with this, which is that the set of built-in + * objects expands over time. Something that is built-in to us might not + * be known to the remote server, if it's of an older version. But keeping + * track of that would be a huge exercise. + */ +bool +mongo_is_builtin(Oid oid) +{ +#if PG_VERSION_NUM >= 120000 + return (oid < FirstGenbkiObjectId); +#else + return (oid < FirstBootstrapObjectId); +#endif +} diff --git a/expected/pushdown.out b/expected/pushdown.out index 20e6574..dcd27ab 100644 --- a/expected/pushdown.out +++ b/expected/pushdown.out @@ -700,6 +700,62 @@ SELECT _id, a01, a31, a32, a33, a34, a35 FROM f_test_large ORDER BY 2 | 1 | 31 | 132 | 133 | 134 | 135 (5 rows) +-- FDW-564: Test ORDER BY with user defined operators. Create the operator +-- family required for the test. +CREATE OPERATOR PUBLIC.<^ ( + LEFTARG = INT4, + RIGHTARG = INT4, + PROCEDURE = INT4EQ +); +CREATE OPERATOR PUBLIC.=^ ( + LEFTARG = INT4, + RIGHTARG = INT4, + PROCEDURE = INT4LT +); +CREATE OPERATOR PUBLIC.>^ ( + LEFTARG = INT4, + RIGHTARG = INT4, + PROCEDURE = INT4GT +); +CREATE OPERATOR FAMILY my_op_family USING btree; +CREATE FUNCTION MY_OP_CMP(A INT, B INT) RETURNS INT AS + $$ BEGIN RETURN BTINT4CMP(A, B); END $$ LANGUAGE PLPGSQL; +CREATE OPERATOR CLASS my_op_class FOR TYPE INT USING btree FAMILY my_op_family AS + OPERATOR 1 PUBLIC.<^, + OPERATOR 3 PUBLIC.=^, + OPERATOR 5 PUBLIC.>^, + FUNCTION 1 my_op_cmp(INT, INT); +-- FDW-564: User defined operators are not pushed down. +EXPLAIN (COSTS FALSE, VERBOSE) +SELECT * FROM f_mongo_test ORDER BY a USING OPERATOR(public.<^); + QUERY PLAN +--------------------------------------------------------- + Sort + Output: _id, a, b + Sort Key: f_mongo_test.a USING <^ + -> Foreign Scan on public.f_mongo_test + Output: _id, a, b + Foreign Namespace: mongo_fdw_regress.mongo_test +(6 rows) + +EXPLAIN (COSTS FALSE, VERBOSE) +SELECT MIN(a) FROM f_mongo_test ORDER BY 1 USING OPERATOR(public.<^); + QUERY PLAN +----------------------------------------------------------------- + Sort + Output: ($0) + Sort Key: ($0) USING <^ + InitPlan 1 (returns $0) + -> Limit + Output: f_mongo_test.a + -> Foreign Scan on public.f_mongo_test + Output: f_mongo_test.a + Filter: (f_mongo_test.a IS NOT NULL) + Foreign Namespace: mongo_fdw_regress.mongo_test + -> Result + Output: $0 +(12 rows) + -- Cleanup DELETE FROM f_mongo_test WHERE a != 0; DELETE FROM f_test_tbl2 WHERE c1 IS NULL; @@ -708,6 +764,12 @@ DROP FOREIGN TABLE f_test_tbl1; DROP FOREIGN TABLE f_test_tbl2; DROP FOREIGN TABLE f_test_tbl3; DROP FOREIGN TABLE f_test_large; +DROP OPERATOR CLASS my_op_class USING btree; +DROP FUNCTION my_op_cmp(a INT, b INT); +DROP OPERATOR FAMILY my_op_family USING btree; +DROP OPERATOR public.>^(INT, INT); +DROP OPERATOR public.=^(INT, INT); +DROP OPERATOR public.<^(INT, INT); DROP USER MAPPING FOR public SERVER mongo_server; DROP SERVER mongo_server; DROP EXTENSION mongo_fdw; diff --git a/expected/pushdown_1.out b/expected/pushdown_1.out index e3c80d3..aa11550 100644 --- a/expected/pushdown_1.out +++ b/expected/pushdown_1.out @@ -744,6 +744,58 @@ SELECT _id, a01, a31, a32, a33, a34, a35 FROM f_test_large ORDER BY 2 | 1 | 31 | 132 | 133 | 134 | 135 (5 rows) +-- FDW-564: Test ORDER BY with user defined operators. Create the operator +-- family required for the test. +CREATE OPERATOR PUBLIC.<^ ( + LEFTARG = INT4, + RIGHTARG = INT4, + PROCEDURE = INT4EQ +); +CREATE OPERATOR PUBLIC.=^ ( + LEFTARG = INT4, + RIGHTARG = INT4, + PROCEDURE = INT4LT +); +CREATE OPERATOR PUBLIC.>^ ( + LEFTARG = INT4, + RIGHTARG = INT4, + PROCEDURE = INT4GT +); +CREATE OPERATOR FAMILY my_op_family USING btree; +CREATE FUNCTION MY_OP_CMP(A INT, B INT) RETURNS INT AS + $$ BEGIN RETURN BTINT4CMP(A, B); END $$ LANGUAGE PLPGSQL; +CREATE OPERATOR CLASS my_op_class FOR TYPE INT USING btree FAMILY my_op_family AS + OPERATOR 1 PUBLIC.<^, + OPERATOR 3 PUBLIC.=^, + OPERATOR 5 PUBLIC.>^, + FUNCTION 1 my_op_cmp(INT, INT); +-- FDW-564: User defined operators are not pushed down. +EXPLAIN (COSTS FALSE, VERBOSE) +SELECT * FROM f_mongo_test ORDER BY a USING OPERATOR(public.<^); + QUERY PLAN +--------------------------------------------------------- + Sort + Output: _id, a, b + Sort Key: f_mongo_test.a USING <^ + -> Foreign Scan on public.f_mongo_test + Output: _id, a, b + Foreign Namespace: mongo_fdw_regress.mongo_test +(6 rows) + +EXPLAIN (COSTS FALSE, VERBOSE) +SELECT MIN(a) FROM f_mongo_test ORDER BY 1 USING OPERATOR(public.<^); + QUERY PLAN +--------------------------------------------------------------- + Sort + Output: (min(a)) + Sort Key: (min(f_mongo_test.a)) USING <^ + -> Aggregate + Output: min(a) + -> Foreign Scan on public.f_mongo_test + Output: _id, a, b + Foreign Namespace: mongo_fdw_regress.mongo_test +(8 rows) + -- Cleanup DELETE FROM f_mongo_test WHERE a != 0; DELETE FROM f_test_tbl2 WHERE c1 IS NULL; @@ -752,6 +804,12 @@ DROP FOREIGN TABLE f_test_tbl1; DROP FOREIGN TABLE f_test_tbl2; DROP FOREIGN TABLE f_test_tbl3; DROP FOREIGN TABLE f_test_large; +DROP OPERATOR CLASS my_op_class USING btree; +DROP FUNCTION my_op_cmp(a INT, b INT); +DROP OPERATOR FAMILY my_op_family USING btree; +DROP OPERATOR public.>^(INT, INT); +DROP OPERATOR public.=^(INT, INT); +DROP OPERATOR public.<^(INT, INT); DROP USER MAPPING FOR public SERVER mongo_server; DROP SERVER mongo_server; DROP EXTENSION mongo_fdw; diff --git a/mongo_fdw.c b/mongo_fdw.c index ec26592..6ab6dff 100644 --- a/mongo_fdw.c +++ b/mongo_fdw.c @@ -55,6 +55,7 @@ #include "utils/rel.h" #include "utils/selfuncs.h" #include "utils/syscache.h" +#include "utils/typcache.h" /* Declarations for dynamic loading */ PG_MODULE_MAGIC; @@ -267,11 +268,9 @@ static void mongo_add_paths_with_pathkeys(PlannerInfo *root, Path *epq_path, Cost base_startup_cost, Cost base_total_cost); -static Expr *mongo_find_em_expr_for_input_target(PlannerInfo *root, - EquivalenceClass *ec, - RelOptInfo *rel); -static Expr *mongo_find_em_expr_for_rel(PlannerInfo *root, - EquivalenceClass *ec, RelOptInfo *rel); +static EquivalenceMember *mongo_find_em_for_rel_target(PlannerInfo *root, + EquivalenceClass *ec, + RelOptInfo *rel); #if PG_VERSION_NUM >= 120000 static void mongo_add_foreign_ordered_paths(PlannerInfo *root, RelOptInfo *input_rel, @@ -987,6 +986,7 @@ mongoGetForeignPlan(PlannerInfo *root, #ifdef META_DRIVER foreach(lc, best_path->path.pathkeys) { + EquivalenceMember *em; PathKey *pathkey = lfirst(lc); Expr *em_expr; @@ -996,16 +996,23 @@ mongoGetForeignPlan(PlannerInfo *root, * By construction, foreignrel is the input relation to the final * sort. */ - em_expr = mongo_find_em_expr_for_input_target(root, - pathkey->pk_eclass, - foreignrel); + em = mongo_find_em_for_rel_target(root, pathkey->pk_eclass, + foreignrel); } else - em_expr = mongo_find_em_expr_for_rel(root, pathkey->pk_eclass, - qual_info->foreignRel); + em = mongo_find_em_for_rel(root, pathkey->pk_eclass, + qual_info->foreignRel); + + /* + * We don't expect any error here; it would mean that shippability + * wasn't verified earlier. For the same reason, we don't recheck + * shippability of the sort operator. + */ + if (em == NULL) + elog(ERROR, "could not find pathkey item to sort"); - Assert(em_expr != NULL); /* Ignore binary-compatible relabeling */ + em_expr = em->em_expr; while (IsA(em_expr, RelabelType)) em_expr = ((RelabelType *) em_expr)->arg; @@ -4121,8 +4128,6 @@ mongo_get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel) foreach(lc, root->query_pathkeys) { PathKey *pathkey = (PathKey *) lfirst(lc); - EquivalenceClass *pathkey_ec = pathkey->pk_eclass; - Expr *em_expr; /* Only ASC NULLS FIRST and DESC NULLS LAST can be pushed down */ if (!IS_PATHKEY_PUSHABLE(pathkey)) @@ -4138,18 +4143,7 @@ mongo_get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel) * end up resorting the entire data set. So, unless we can push * down all of the query pathkeys, forget it. */ - if (!(em_expr = mongo_find_em_expr_for_rel(root, pathkey_ec, rel))) - { - query_pathkeys_ok = false; - break; - } - - /* Ignore binary-compatible relabeling */ - while (em_expr && IsA(em_expr, RelabelType)) - em_expr = ((RelabelType *) em_expr)->arg; - - /* Only Vars are allowed per MongoDB. */ - if (!IsA(em_expr, Var)) + if (!mongo_is_foreign_pathkey(root, rel, pathkey)) { query_pathkeys_ok = false; break; @@ -4184,6 +4178,7 @@ mongo_get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel) foreach(lc, useful_eclass_list) { EquivalenceClass *cur_ec = lfirst(lc); + EquivalenceMember *em; Expr *em_expr; PathKey *pathkey; @@ -4191,11 +4186,16 @@ mongo_get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel) if (cur_ec == query_ec) continue; + /* Can't push down the sort if the EC's opfamily is not shippable. */ + if (!mongo_is_builtin(linitial_oid(cur_ec->ec_opfamilies))) + continue; + /* If no pushable expression for this rel, skip it. */ - if (!(em_expr = mongo_find_em_expr_for_rel(root, cur_ec, rel))) + if (!(em = mongo_find_em_for_rel(root, cur_ec, rel))) continue; /* Ignore binary-compatible relabeling */ + em_expr = em->em_expr; while (em_expr && IsA(em_expr, RelabelType)) em_expr = ((RelabelType *) em_expr)->arg; @@ -4211,6 +4211,10 @@ mongo_get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel) if (!IS_PATHKEY_PUSHABLE(pathkey)) continue; + /* Check for sort operator pushability. */ + if (!mongo_is_default_sort_operator(em, pathkey)) + continue; + useful_pathkeys_list = lappend(useful_pathkeys_list, list_make1(pathkey)); } @@ -4312,13 +4316,12 @@ mongo_add_paths_with_pathkeys(PlannerInfo *root, RelOptInfo *rel, } /* - * mongo_find_em_expr_for_rel + * mongo_find_em_for_rel * Find an equivalence class member expression, all of whose Vars, come * from the indicated relation. */ -static Expr * -mongo_find_em_expr_for_rel(PlannerInfo *root, EquivalenceClass *ec, - RelOptInfo *rel) +EquivalenceMember * +mongo_find_em_for_rel(PlannerInfo *root, EquivalenceClass *ec, RelOptInfo *rel) { ListCell *lc_em; @@ -4326,6 +4329,10 @@ mongo_find_em_expr_for_rel(PlannerInfo *root, EquivalenceClass *ec, { EquivalenceMember *em = (EquivalenceMember *) lfirst(lc_em); + /* + * Note we require !bms_is_empty, else we'd accept constant + * expressions which are not suitable for the purpose. + */ if (bms_is_subset(em->em_relids, rel->relids) && !bms_is_empty(em->em_relids) && mongo_is_foreign_expr(root, rel, em->em_expr, false)) @@ -4335,7 +4342,7 @@ mongo_find_em_expr_for_rel(PlannerInfo *root, EquivalenceClass *ec, * taken entirely from this relation, we'll be content to choose * any one of those. */ - return em->em_expr; + return em; } } @@ -4419,6 +4426,7 @@ mongo_add_foreign_ordered_paths(PlannerInfo *root, RelOptInfo *input_rel, { PathKey *pathkey = (PathKey *) lfirst(lc); EquivalenceClass *pathkey_ec = pathkey->pk_eclass; + EquivalenceMember *em = NULL; Expr *sort_expr; /* @@ -4436,12 +4444,14 @@ mongo_add_foreign_ordered_paths(PlannerInfo *root, RelOptInfo *input_rel, * shippable EM that is computed in input_rel's reltarget, else we * can't push down the sort. */ - sort_expr = mongo_find_em_expr_for_input_target(root, pathkey_ec, - input_rel); - if (!sort_expr) + em = mongo_find_em_for_rel_target(root, pathkey_ec, input_rel); + + /* Check for sort operator pushability. */ + if (!mongo_is_default_sort_operator(em, pathkey)) return; /* Ignore binary-compatible relabeling */ + sort_expr = em->em_expr; while (sort_expr && IsA(sort_expr, RelabelType)) sort_expr = ((RelabelType *) sort_expr)->arg; @@ -4707,13 +4717,13 @@ mongo_add_foreign_final_paths(PlannerInfo *root, RelOptInfo *input_rel, #endif /* PG_VERSION_NUM >= 120000 */ /* - * mongo_find_em_expr_for_input_target + * mongo_find_em_for_rel_target * Find an equivalence class member expression to be computed as a sort * column in the given target. */ -static Expr * -mongo_find_em_expr_for_input_target(PlannerInfo *root, EquivalenceClass *ec, - RelOptInfo *rel) +static EquivalenceMember * +mongo_find_em_for_rel_target(PlannerInfo *root, EquivalenceClass *ec, + RelOptInfo *rel) { PathTarget *target = rel->reltarget; ListCell *lc1; @@ -4766,7 +4776,7 @@ mongo_find_em_expr_for_input_target(PlannerInfo *root, EquivalenceClass *ec, * it's unsafe to remote, we cannot push down the final sort. */ if (mongo_is_foreign_expr(root, rel, em->em_expr, false)) - return em->em_expr; + return em; } i++; @@ -4774,4 +4784,50 @@ mongo_find_em_expr_for_input_target(PlannerInfo *root, EquivalenceClass *ec, return NULL; /* keep compiler quiet */ } + +/* + * mongo_is_default_sort_operator + * Returns true if default sort operator is provided. + */ +bool +mongo_is_default_sort_operator(EquivalenceMember *em, PathKey *pathkey) +{ + Oid oprid; + char *oprname; + TypeCacheEntry *typentry; + + if (em == NULL) + return false; + + /* Can't push down the sort if pathkey's opfamily is not shippable. */ + if (!mongo_is_builtin(pathkey->pk_opfamily)) + return NULL; + + oprid = get_opfamily_member(pathkey->pk_opfamily, + em->em_datatype, + em->em_datatype, + pathkey->pk_strategy); + if (!OidIsValid(oprid)) + elog(ERROR, "missing operator %d(%u,%u) in opfamily %u", + pathkey->pk_strategy, em->em_datatype, em->em_datatype, + pathkey->pk_opfamily); + + /* Can't push down the sort if the operator is not shippable. */ + oprname = get_opname(oprid); + if (!((strncmp(oprname, "<", NAMEDATALEN) == 0) || + (strncmp(oprname, ">", NAMEDATALEN) == 0))) + return false; + + /* + * See whether the operator is default < or > for sort expr's datatype. + * Here we need to use the expression's actual type to discover whether + * the desired operator will be the default or not. + */ + typentry = lookup_type_cache(exprType((Node *)em->em_expr), + TYPECACHE_LT_OPR | TYPECACHE_GT_OPR); + if (oprid == typentry->lt_opr || oprid == typentry->gt_opr) + return true; + + return false; +} #endif /* End of META_DRIVER */ diff --git a/mongo_fdw.h b/mongo_fdw.h index 3b89079..3bd080d 100644 --- a/mongo_fdw.h +++ b/mongo_fdw.h @@ -502,5 +502,13 @@ extern Datum mongo_fdw_validator(PG_FUNCTION_ARGS); /* deparse.c headers */ extern void mongo_check_qual(Expr *node, MongoRelQualInfo *qual_info); extern const char *mongo_get_jointype_name(JoinType jointype); +extern EquivalenceMember *mongo_find_em_for_rel(PlannerInfo *root, + EquivalenceClass *ec, + RelOptInfo *rel); +extern bool mongo_is_builtin(Oid oid); +extern bool mongo_is_default_sort_operator(EquivalenceMember *em, + PathKey *pathkey); +extern bool mongo_is_foreign_pathkey(PlannerInfo *root, RelOptInfo *baserel, + PathKey *pathkey); #endif /* MONGO_FDW_H */ diff --git a/sql/pushdown.sql b/sql/pushdown.sql index 9171b9a..480881c 100644 --- a/sql/pushdown.sql +++ b/sql/pushdown.sql @@ -283,6 +283,43 @@ SELECT _id, a01, a31, a32, a33, a34, a35 FROM f_test_large ORDER BY a26 ASC NULLS FIRST, a27 ASC NULLS FIRST, a28 ASC NULLS FIRST, a29 ASC NULLS FIRST, a30 ASC NULLS FIRST, a31 ASC NULLS FIRST, a32 ASC NULLS FIRST; +-- FDW-564: Test ORDER BY with user defined operators. Create the operator +-- family required for the test. +CREATE OPERATOR PUBLIC.<^ ( + LEFTARG = INT4, + RIGHTARG = INT4, + PROCEDURE = INT4EQ +); + +CREATE OPERATOR PUBLIC.=^ ( + LEFTARG = INT4, + RIGHTARG = INT4, + PROCEDURE = INT4LT +); + +CREATE OPERATOR PUBLIC.>^ ( + LEFTARG = INT4, + RIGHTARG = INT4, + PROCEDURE = INT4GT +); + +CREATE OPERATOR FAMILY my_op_family USING btree; + +CREATE FUNCTION MY_OP_CMP(A INT, B INT) RETURNS INT AS + $$ BEGIN RETURN BTINT4CMP(A, B); END $$ LANGUAGE PLPGSQL; + +CREATE OPERATOR CLASS my_op_class FOR TYPE INT USING btree FAMILY my_op_family AS + OPERATOR 1 PUBLIC.<^, + OPERATOR 3 PUBLIC.=^, + OPERATOR 5 PUBLIC.>^, + FUNCTION 1 my_op_cmp(INT, INT); + +-- FDW-564: User defined operators are not pushed down. +EXPLAIN (COSTS FALSE, VERBOSE) +SELECT * FROM f_mongo_test ORDER BY a USING OPERATOR(public.<^); +EXPLAIN (COSTS FALSE, VERBOSE) +SELECT MIN(a) FROM f_mongo_test ORDER BY 1 USING OPERATOR(public.<^); + -- Cleanup DELETE FROM f_mongo_test WHERE a != 0; DELETE FROM f_test_tbl2 WHERE c1 IS NULL; @@ -291,6 +328,12 @@ DROP FOREIGN TABLE f_test_tbl1; DROP FOREIGN TABLE f_test_tbl2; DROP FOREIGN TABLE f_test_tbl3; DROP FOREIGN TABLE f_test_large; +DROP OPERATOR CLASS my_op_class USING btree; +DROP FUNCTION my_op_cmp(a INT, b INT); +DROP OPERATOR FAMILY my_op_family USING btree; +DROP OPERATOR public.>^(INT, INT); +DROP OPERATOR public.=^(INT, INT); +DROP OPERATOR public.<^(INT, INT); DROP USER MAPPING FOR public SERVER mongo_server; DROP SERVER mongo_server; DROP EXTENSION mongo_fdw;