From 492464d4e6671618a0cab6a58fd0c5780f169eaf Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Tue, 26 May 2026 15:27:21 +0900 Subject: [PATCH v3] Fix qual pushdown past grouping with mismatched equivalence The planner has two optimizations that move a qual clause across a grouping boundary: subquery_planner transfers HAVING clauses to WHERE so they can be evaluated before aggregation, and qual_is_pushdown_safe pushes outer restriction clauses into a subquery past its DISTINCT, DISTINCT ON, window PARTITION BY, or set-operation grouping layer. Both produce wrong results when the moved clause's equivalence relation disagrees with the grouping's, since the clause then filters rows the grouping would have merged. The disagreement has two forms. A type may belong to multiple btree opfamilies whose equality operators disagree (e.g. record_ops vs record_image_ops); or the grouping may use a nondeterministic collation, where comparing the column under a different collation, or wrapping it in a function or operator, can distinguish values the collation considers equal. Because we cannot prove an arbitrary expression preserves that equality, a grouping column with a nondeterministic collation is safe to push only as a direct operand of a comparison under its own collation. Fix both call sites through a shared walker parameterized by a callback that maps each Var to the grouping equality operator for its column (or InvalidOid for non-grouping Vars). For HAVING, the callback recovers the SortGroupClause's eqop via the GROUP Var's varattno, which requires running before flatten_group_exprs while havingQual still contains GROUP Vars. For subquery pushdown, the callback recovers the eqop from subquery->distinctClause, a window's partitionClause, or any grouping node in the SetOperationStmt tree. The walker fires only when there is an equivalence boundary to cross, gated by either the existing UNSAFE_NOTIN_DISTINCTON_CLAUSE and UNSAFE_NOTIN_PARTITIONBY_CLAUSE flags or by a recursive check for any grouping node in the set-op tree. Back-patch to v18 only. The HAVING half relies on the RTE_GROUP mechanism introduced in v18 (commit 247dea89f), which is what lets us identify grouping expressions via GROUP Vars on pre-flatten havingQual. Pre-v18 branches lack that machinery, so a back-patch there would need a different approach. Given the absence of field reports of these bugs on back branches, the risk of carrying a different fix on stable branches is not justified. --- src/backend/optimizer/path/allpaths.c | 179 +++++++++++ src/backend/optimizer/plan/planner.c | 263 +++++----------- src/backend/optimizer/util/clauses.c | 268 ++++++++++++++++ src/backend/utils/cache/lsyscache.c | 24 +- src/include/optimizer/clauses.h | 14 + src/test/regress/expected/aggregates.out | 74 ++++- .../regress/expected/collate.icu.utf8.out | 286 ++++++++++++++++-- src/test/regress/expected/subselect.out | 214 +++++++++++++ src/test/regress/sql/aggregates.sql | 42 ++- src/test/regress/sql/collate.icu.utf8.sql | 129 +++++++- src/test/regress/sql/subselect.sql | 105 +++++++ src/tools/pgindent/typedefs.list | 4 +- 12 files changed, 1365 insertions(+), 237 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index c134594a21a..9c040ecefc8 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -162,6 +162,10 @@ static bool targetIsInAllPartitionLists(TargetEntry *tle, Query *query); static pushdown_safe_type qual_is_pushdown_safe(Query *subquery, Index rti, RestrictInfo *rinfo, pushdown_safety_info *safetyInfo); +static Oid pushdown_var_grouping_eqop(Var *var, void *context); +static Oid subquery_column_grouping_eqop(Query *subquery, AttrNumber attno); +static Oid setop_column_grouping_eqop(Node *setop, AttrNumber attno); +static bool setop_has_grouping(Node *setop); static void subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual); static void recurse_push_qual(Node *setOp, Query *topquery, @@ -4440,6 +4444,16 @@ targetIsInAllPartitionLists(TargetEntry *tle, Query *query) * * 5. rinfo's clause must not refer to any subquery output columns that were * found to be unsafe to reference by subquery_is_pushdown_safe(). + * + * 6. If the subquery has a grouping layer (DISTINCT, DISTINCT ON, window + * PARTITION BY, or a set operation that groups rows by equality), rinfo's + * clause must not apply a different equivalence relation to a grouping column + * than the grouping uses; otherwise it would distinguish rows the grouping + * considers equal, and pushing such a clause past the grouping would drop + * members of a group and change which row becomes the group's representative + * (or, for window functions, change per-partition values such as ranks and + * counts). See expression_has_grouping_conflict for the kinds of conflict + * detected. */ static pushdown_safe_type qual_is_pushdown_safe(Query *subquery, Index rti, RestrictInfo *rinfo, @@ -4536,9 +4550,174 @@ qual_is_pushdown_safe(Query *subquery, Index rti, RestrictInfo *rinfo, list_free(vars); + /* Check point 6 */ + if (safe == PUSHDOWN_SAFE && + (subquery->hasWindowFuncs || + subquery->distinctClause != NIL || + (subquery->setOperations != NULL && + setop_has_grouping(subquery->setOperations)))) + { + if (expression_has_grouping_conflict(qual, pushdown_var_grouping_eqop, + subquery)) + safe = PUSHDOWN_UNSAFE; + } + return safe; } +/* + * pushdown_var_grouping_eqop + * grouping_eqop_callback for qual_is_pushdown_safe. + * + * Returns the grouping equality operator for 'var' if it references a subquery + * output column that participates in the subquery's grouping layer; InvalidOid + * otherwise. + * + * 'context' is the subquery Query whose pushdown safety we're checking. + */ +static Oid +pushdown_var_grouping_eqop(Var *var, void *context) +{ + Query *subquery = (Query *) context; + Oid eqop; + + if (var->varlevelsup != 0) + return InvalidOid; + + eqop = subquery_column_grouping_eqop(subquery, var->varattno); + + /* + * qual_is_pushdown_safe ensures any level-0 subquery Var that reaches us + * references a grouping column. + */ + Assert(OidIsValid(eqop)); + + return eqop; +} + +/* + * subquery_column_grouping_eqop + * Return the equality operator that the subquery uses to group rows on + * the given output column, or InvalidOid if the column doesn't + * participate in any grouping mechanism. + * + * A subquery output column is grouping-relevant if it appears in + * subquery->distinctClause (covering both DISTINCT and DISTINCT ON), in every + * window's PARTITION BY clause, or is grouped by some node in a set-operation + * tree. In all of these cases the parser builds the SortGroupClause with the + * column's type-default equality operator via get_sort_group_operators, so any + * matching SortGroupClause carries the correct eqop. + */ +static Oid +subquery_column_grouping_eqop(Query *subquery, AttrNumber attno) +{ + TargetEntry *tle; + ListCell *lc; + + if (attno <= 0 || attno > list_length(subquery->targetList)) + return InvalidOid; + + tle = list_nth_node(TargetEntry, subquery->targetList, attno - 1); + + /* DISTINCT or DISTINCT ON */ + foreach(lc, subquery->distinctClause) + { + SortGroupClause *sgc = lfirst_node(SortGroupClause, lc); + + if (sgc->tleSortGroupRef == tle->ressortgroupref) + return sgc->eqop; + } + + /* Window function PARTITION BY: must appear in every window's list. */ + if (subquery->hasWindowFuncs && subquery->windowClause != NIL) + { + Oid eqop = InvalidOid; + + foreach(lc, subquery->windowClause) + { + WindowClause *wc = (WindowClause *) lfirst(lc); + ListCell *lc2; + + foreach(lc2, wc->partitionClause) + { + SortGroupClause *sgc = lfirst_node(SortGroupClause, lc2); + + if (sgc->tleSortGroupRef == tle->ressortgroupref) + break; + } + if (lc2 == NULL) + break; /* not present in this window's list */ + eqop = lfirst_node(SortGroupClause, lc2)->eqop; + } + if (lc == NULL) + return eqop; /* matched in every window */ + } + + /* Set operation */ + if (subquery->setOperations != NULL) + return setop_column_grouping_eqop(subquery->setOperations, attno); + + return InvalidOid; +} + +/* + * setop_column_grouping_eqop + * Recursively search a SetOperationStmt tree for any node that groups + * rows by equality, and return the equality operator used for the given + * output column. Returns InvalidOid if no node in the tree groups (i.e., + * an entirely-UNION-ALL tree). + * + * For any set operation other than UNION ALL, groupClauses is a positional + * list of SortGroupClauses, with element N-1 corresponding to output column N + * (see makeSortGroupClauseForSetOp). + */ +static Oid +setop_column_grouping_eqop(Node *setop, AttrNumber attno) +{ + SetOperationStmt *op; + Oid eqop; + + if (setop == NULL || !IsA(setop, SetOperationStmt)) + return InvalidOid; + + op = (SetOperationStmt *) setop; + + if (op->groupClauses != NIL && + attno >= 1 && attno <= list_length(op->groupClauses)) + { + SortGroupClause *sgc = list_nth_node(SortGroupClause, + op->groupClauses, attno - 1); + + return sgc->eqop; + } + + /* Recurse into children to find any inner grouping */ + eqop = setop_column_grouping_eqop(op->larg, attno); + if (OidIsValid(eqop)) + return eqop; + return setop_column_grouping_eqop(op->rarg, attno); +} + +/* + * setop_has_grouping + * Return true if any node in the SetOperationStmt tree groups rows by + * equality (i.e., has non-NIL groupClauses). + */ +static bool +setop_has_grouping(Node *setop) +{ + SetOperationStmt *op; + + if (setop == NULL || !IsA(setop, SetOperationStmt)) + return false; + + op = (SetOperationStmt *) setop; + if (op->groupClauses != NIL) + return true; + + return setop_has_grouping(op->larg) || setop_has_grouping(op->rarg); +} + /* * subquery_push_qual - push down a qual that we have determined is safe */ diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 846bd7c1fbe..cac2ebb86c6 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -135,26 +135,21 @@ typedef struct } standard_qp_extra; /* - * Context for the find_having_collation_conflicts walker. - * - * ancestor_collids is a stack of inputcollids contributed by collation-aware - * ancestors of the current node. Entries are pushed before recursing into a - * node's children and popped afterwards, so the stack reflects exactly the - * inputcollids on the current root-to-node path. + * Context for find_having_conflicts. This is the callback context passed to + * expression_has_grouping_conflict in clauses.c. */ typedef struct { + Query *parse; Index group_rtindex; - List *ancestor_collids; -} having_collation_ctx; +} having_grouping_ctx; /* Local functions */ static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind); static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode); -static Bitmapset *find_having_collation_conflicts(Query *parse, - Index group_rtindex); -static bool having_collation_conflict_walker(Node *node, - having_collation_ctx *ctx); +static Bitmapset *find_having_conflicts(Query *parse, Index group_rtindex); +static Oid having_var_grouping_eqop(Var *var, void *context); +static Oid group_var_eqop(Query *parse, Var *var); static void grouping_planner(PlannerInfo *root, double tuple_fraction, SetOperationStmt *setops); static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root); @@ -780,7 +775,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, char *plan_name, PlannerInfo *root; List *newWithCheckOptions; List *newHaving; - Bitmapset *havingCollationConflicts; + Bitmapset *havingPushdownConflicts; int havingIdx; bool hasOuterJoins; bool hasResultRTEs; @@ -1208,25 +1203,14 @@ subquery_planner(PlannerGlobal *glob, Query *parse, char *plan_name, } /* - * Before we flatten GROUP Vars, check which HAVING clauses have collation - * conflicts. When GROUP BY uses a nondeterministic collation, values - * that are "equal" for grouping may be distinguishable under a different - * collation. If such a HAVING clause were moved to WHERE, it would - * filter individual rows before grouping, potentially eliminating some - * members of a group and thereby changing aggregate results. - * - * We do this check before flatten_group_exprs because we can easily - * identify grouping expressions by checking whether a Var references - * RTE_GROUP, and such Vars directly carry the GROUP BY collation as their - * varcollid. After flattening, these Vars are replaced by the underlying - * expressions, and we would have to match expressions in the HAVING - * clause back to grouping expressions, which is much more complex. + * Before we flatten GROUP Vars, identify HAVING clauses whose equality + * semantics disagree with the GROUP BY's. See find_having_conflicts. */ if (parse->hasGroupRTE) - havingCollationConflicts = - find_having_collation_conflicts(parse, root->group_rtindex); + havingPushdownConflicts = find_having_conflicts(parse, + root->group_rtindex); else - havingCollationConflicts = NULL; + havingPushdownConflicts = NULL; /* * Replace any Vars in the subquery's targetlist and havingQual that @@ -1272,13 +1256,13 @@ subquery_planner(PlannerGlobal *glob, Query *parse, char *plan_name, * but it's okay: it's just an optimization to avoid running pull_varnos * when there cannot be any Vars in the HAVING clause.) * - * We also cannot do this if the HAVING clause uses a different collation - * than the GROUP BY for any grouping expression whose GROUP BY collation - * is nondeterministic. This is detected before flatten_group_exprs (see - * find_having_collation_conflicts above) and recorded in the - * havingCollationConflicts bitmapset. The bitmapset indexes remain valid - * here because flatten_group_exprs uses expression_tree_mutator, which - * preserves the list length and ordering of havingQual. + * We also cannot do this for HAVING clauses that conflict with GROUP BY + * on collation or operator family. Both kinds of conflict are detected + * before flatten_group_exprs (see find_having_conflicts above) and + * recorded in the havingPushdownConflicts bitmapset. The bitmapset + * indexes remain valid here because flatten_group_exprs uses + * expression_tree_mutator, which preserves the list length and ordering + * of havingQual. * * Also, it may be that the clause is so expensive to execute that we're * better off doing it only once per group, despite the loss of @@ -1320,7 +1304,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, char *plan_name, if (contain_agg_clause(havingclause) || contain_volatile_functions(havingclause) || contain_subplans(havingclause) || - bms_is_member(havingIdx, havingCollationConflicts) || + bms_is_member(havingIdx, havingPushdownConflicts) || (parse->groupClause && parse->groupingSets && bms_is_member(root->group_rtindex, pull_varnos(root, havingclause)))) { @@ -1551,192 +1535,99 @@ preprocess_qual_conditions(PlannerInfo *root, Node *jtnode) } /* - * find_having_collation_conflicts - * Identify HAVING clauses that must not be moved to WHERE due to collation - * mismatches with GROUP BY. + * find_having_conflicts + * Identify HAVING clauses that must not be moved to WHERE because they + * apply a different equivalence relation than GROUP BY. Pushing such a + * clause to WHERE would filter individual rows before grouping happens, + * eliminating rows that GROUP BY would have merged into a single group + * and thereby changing aggregate results. + * + * The actual walking is done by expression_has_grouping_conflict; see that + * function for the kinds of conflict it looks for. We just iterate over + * havingQual and supply a HAVING-specific callback that identifies GROUP + * Vars. * * This must be called before flatten_group_exprs, while the HAVING clause * still contains GROUP Vars (Vars referencing RTE_GROUP). These GROUP Vars - * carry the GROUP BY collation as their varcollid. A GROUP Var with a - * nondeterministic varcollid conflicts whenever some collation-aware ancestor - * on its path applies a different inputcollid: that operator would distinguish - * values which the GROUP BY considers equal, so the clause is unsafe to push - * to WHERE. + * carry the GROUP BY collation as their varcollid and let us recover the + * grouping eqop via varattno. After flattening, those Vars are replaced by + * the underlying expressions, and matching back to grouping expressions is + * much harder. * * Returns a Bitmapset of zero-based indexes into the havingQual list for - * clauses that have collation conflicts and must stay in HAVING. + * clauses that conflict and must stay in HAVING. */ static Bitmapset * -find_having_collation_conflicts(Query *parse, Index group_rtindex) +find_having_conflicts(Query *parse, Index group_rtindex) { Bitmapset *result = NULL; - having_collation_ctx ctx; + having_grouping_ctx ctx; int idx; if (parse->havingQual == NULL) return NULL; + ctx.parse = parse; ctx.group_rtindex = group_rtindex; - ctx.ancestor_collids = NIL; idx = 0; foreach_ptr(Node, clause, (List *) parse->havingQual) { - if (having_collation_conflict_walker(clause, &ctx)) + if (expression_has_grouping_conflict(clause, having_var_grouping_eqop, + &ctx)) result = bms_add_member(result, idx); idx++; - Assert(ctx.ancestor_collids == NIL); } return result; } /* - * Walker function for find_having_collation_conflicts. - * - * Walk the clause top-down, maintaining a stack of inputcollids contributed - * by collation-aware ancestors. At each GROUP Var with a nondeterministic - * varcollid, the clause has a conflict if any ancestor's inputcollid differs - * from the GROUP Var's varcollid. Most collation-aware nodes expose their - * inputcollid through exprInputCollation(). Two structural exceptions need - * special handling: - * - * - RowCompareExpr carries one inputcollid per column in inputcollids[], so we - * descend into its (largs[i], rargs[i]) pairs explicitly with the matching - * collation pushed onto the stack. - * - * - A simple CASE (CaseExpr with a non-NULL arg) holds the arg outside the - * WHEN's OpExpr, even though the WHEN's OpExpr is the place where the - * comparison's inputcollid lives. Parse analysis builds each WHEN as - * "OpExpr(CaseTestExpr op val)" -- the CaseTestExpr is a placeholder for - * the arg. Before walking cexpr->arg we therefore push every WHEN's - * inputcollid onto the ancestor stack, so a GROUP Var at the arg is - * checked against the same collations the WHEN comparisons would apply. - * The WHEN bodies and defresult are then walked under the unchanged stack - * so their own collation contexts are picked up by the default path. + * having_var_grouping_eqop + * grouping_eqop_callback for find_having_conflicts. + * + * Returns the GROUP BY equality operator for 'var' if it references the + * query's RTE_GROUP, or InvalidOid otherwise. */ -static bool -having_collation_conflict_walker(Node *node, having_collation_ctx *ctx) +static Oid +having_var_grouping_eqop(Var *var, void *context) { - Oid this_collid; - bool result; - - if (node == NULL) - return false; - - if (IsA(node, Var)) - { - Var *var = (Var *) node; - - /* We should not see any upper-level Vars here */ - Assert(var->varlevelsup == 0); - - if (var->varno == ctx->group_rtindex && - OidIsValid(var->varcollid) && - !get_collation_isdeterministic(var->varcollid)) - { - foreach_oid(collid, ctx->ancestor_collids) - { - if (collid != var->varcollid) - return true; - } - } - return false; - } + having_grouping_ctx *ctx = (having_grouping_ctx *) context; - if (IsA(node, RowCompareExpr)) - { - RowCompareExpr *rcexpr = (RowCompareExpr *) node; - ListCell *lc_l; - ListCell *lc_r; - ListCell *lc_c; + if (var->varno != ctx->group_rtindex || var->varlevelsup != 0) + return InvalidOid; - /* - * Each column of a row comparison is compared under its own - * inputcollids[i]. Walk each (largs[i], rargs[i]) pair with that - * collation pushed, so a Var in column i is checked against the - * collation that actually applies to it. - */ - forthree(lc_l, rcexpr->largs, - lc_r, rcexpr->rargs, - lc_c, rcexpr->inputcollids) - { - Oid collid = lfirst_oid(lc_c); - bool found; - - if (OidIsValid(collid)) - ctx->ancestor_collids = lappend_oid(ctx->ancestor_collids, - collid); - - found = having_collation_conflict_walker((Node *) lfirst(lc_l), - ctx) || - having_collation_conflict_walker((Node *) lfirst(lc_r), - ctx); + return group_var_eqop(ctx->parse, var); +} - if (OidIsValid(collid)) - ctx->ancestor_collids = - list_delete_last(ctx->ancestor_collids); +/* + * group_var_eqop + * Return the equality operator that GROUP BY uses for the given GROUP Var. + * + * A GROUP Var's varattno is its 1-based position in the RTE_GROUP's groupexprs + * list, which addRangeTableEntryForGroup built by iterating parse->groupClause + * and including every SortGroupClause whose TLE was present in the targetlist. + * Replay that traversal here to recover the SortGroupClause for the given + * varattno. + */ +static Oid +group_var_eqop(Query *parse, Var *var) +{ + int counter = 0; - if (found) - return true; - } - return false; - } + Assert(var->varlevelsup == 0); - if (IsA(node, CaseExpr) && ((CaseExpr *) node)->arg != NULL) + foreach_node(SortGroupClause, sgc, parse->groupClause) { - CaseExpr *cexpr = (CaseExpr *) node; - int saved_len = list_length(ctx->ancestor_collids); - bool found; - - /* - * Push every WHEN's inputcollid before walking cexpr->arg, since each - * WHEN implicitly compares the arg under that inputcollid. - */ - foreach_node(CaseWhen, cw, cexpr->args) - { - Oid collid = exprInputCollation((Node *) cw->expr); - - if (OidIsValid(collid)) - ctx->ancestor_collids = lappend_oid(ctx->ancestor_collids, - collid); - } - - found = having_collation_conflict_walker((Node *) cexpr->arg, ctx); - - ctx->ancestor_collids = list_truncate(ctx->ancestor_collids, - saved_len); - - if (found) - return true; - - /* - * Walk the WHEN bodies and defresult under the unchanged ancestor - * stack; any inputcollids inside them are picked up by the default - * path. - */ - foreach_node(CaseWhen, cw, cexpr->args) - { - if (having_collation_conflict_walker((Node *) cw->expr, ctx) || - having_collation_conflict_walker((Node *) cw->result, ctx)) - return true; - } - return having_collation_conflict_walker((Node *) cexpr->defresult, - ctx); + if (get_sortgroupclause_tle(sgc, parse->targetList) == NULL) + continue; + if (++counter == var->varattno) + return sgc->eqop; } - this_collid = exprInputCollation(node); - if (OidIsValid(this_collid)) - ctx->ancestor_collids = lappend_oid(ctx->ancestor_collids, - this_collid); - - result = expression_tree_walker(node, having_collation_conflict_walker, - ctx); - - if (OidIsValid(this_collid)) - ctx->ancestor_collids = list_delete_last(ctx->ancestor_collids); - - return result; + elog(ERROR, "could not find GROUP clause for GROUP Var attno %d", + var->varattno); + return InvalidOid; /* keep compiler quiet */ } /* diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 01997e22266..aa8886ec210 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -100,6 +100,17 @@ typedef struct List *safe_param_ids; /* PARAM_EXEC Param IDs to treat as safe */ } max_parallel_hazard_context; +/* + * Walker context for expression_has_grouping_conflict. get_eqop is a callback + * that returns the equality operator used for grouping. cb_context is opaque + * to the walker and is forwarded to get_eqop unchanged. + */ +typedef struct +{ + grouping_eqop_callback get_eqop; + void *cb_context; +} grouping_walker_ctx; + static bool contain_agg_clause_walker(Node *node, void *context); static bool find_window_functions_walker(Node *node, WindowFuncLists *lists); static bool contain_subplans_walker(Node *node, void *context); @@ -118,6 +129,11 @@ static List *find_nonnullable_vars_walker(Node *node, bool top_level); static void find_subquery_safe_quals(Node *jtnode, List **safe_quals); static bool is_strict_saop(ScalarArrayOpExpr *expr, bool falseOK); static bool convert_saop_to_hashed_saop_walker(Node *node, void *context); +static bool grouping_conflict_walker(Node *node, grouping_walker_ctx *ctx); +static bool grouping_check_operands(Oid opno, Oid inputcollid, + List *args, grouping_walker_ctx *ctx); +static bool grouping_check_operand(Node *arg, Oid opno, Oid inputcollid, + grouping_walker_ctx *ctx); static Node *eval_const_expressions_mutator(Node *node, eval_const_expressions_context *context); static bool contain_non_const_walker(Node *node, void *context); @@ -6264,6 +6280,258 @@ pull_paramids_walker(Node *node, Bitmapset **context) return expression_tree_walker(node, pull_paramids_walker, context); } +/* + * expression_has_grouping_conflict + * Detect whether 'expr' would distinguish rows that a grouping mechanism + * (GROUP BY, DISTINCT, DISTINCT ON, window PARTITION BY, or set operation) + * considers equal. + * + * The caller supplies a get_eqop callback (see clauses.h) so the same walker + * serves every grouping context. The callback identifies a grouping column by + * returning a valid eqop for its Var. A grouping column is safe to reference + * only if the reference yields the same result for every value the grouping + * treats as equal. Otherwise, pushing the clause past the grouping could + * discard rows that the grouping would have combined into a single group. + * + * The reference is provably safe only when the grouping column is a direct + * operand of a comparison that tests the grouping's own equality. Such an + * operand is rejected when the comparison's operator does not have equality + * semantics compatible with the grouping eqop, or, for a nondeterministic + * collation, when the comparison applies a collation other than the column's. + * + * For a nondeterministic collation, every other reference is rejected: a + * comparison under a different collation, and any function or operator over + * the column, because we cannot tell whether the function yields the same + * result for values the grouping treats as equal, and many do not. A column + * with a deterministic collation is not restricted this way. + * + * This leaves one case uncaught: with a deterministic collation, a function + * over the column can still feed a finer comparison than the direct-operand + * check sees, for example record_image_ops over a rebuilt record, or scale() + * over numeric where two equal values differ in scale. Catching it would + * require knowing that a type's equality is bitwise, which we do not test + * here. + * + * Returns true if any such conflict exists. + */ +bool +expression_has_grouping_conflict(Node *expr, + grouping_eqop_callback get_eqop, + void *context) +{ + grouping_walker_ctx ctx; + + if (expr == NULL) + return false; + + ctx.get_eqop = get_eqop; + ctx.cb_context = context; + + return grouping_conflict_walker(expr, &ctx); +} + +/* + * Walker function for expression_has_grouping_conflict. + * + * A comparison node checks its direct operands with grouping_check_operand, + * which does not recurse into a grouping-column operand. A grouping column + * therefore reaches the Var branch only when it is referenced in some other + * way: wrapped in a function or other expression, used as the whole qual (a + * bare boolean column), or used as an operand of an operator that is not a + * btree/hash member and so is not treated as a comparison here. + * + * Comparison nodes are OpExpr/ScalarArrayOpExpr whose operator is a btree/hash + * member, and RowCompareExpr (one operator and collation per column). A + * simple CASE (CaseExpr with a non-NULL arg) is a comparison in disguise: + * parse analysis builds each WHEN as "OpExpr(CaseTestExpr op val)", with the + * CaseTestExpr standing in for the arg, so the arg is effectively an operand + * of each WHEN's comparison. Those WHEN operators are always the type-default + * "=", matching the grouping eqop, so only a collation conflict is possible + * there. + */ +static bool +grouping_conflict_walker(Node *node, grouping_walker_ctx *ctx) +{ + if (node == NULL) + return false; + + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + /* + * A grouping column reaches here when it was not handled as a direct + * operand by a comparison node above (see the function header). That + * is safe for a deterministic collation, but not for a + * nondeterministic one, where the reference may distinguish values + * the grouping considers equal. A bare boolean qual is safe too: + * boolean is not collatable, so it takes the deterministic path here. + */ + if (OidIsValid(ctx->get_eqop(var, ctx->cb_context)) && + OidIsValid(var->varcollid) && + !get_collation_isdeterministic(var->varcollid)) + return true; + return false; + } + else if (IsA(node, OpExpr)) + { + OpExpr *opexpr = (OpExpr *) node; + + if (op_is_safe_index_member(opexpr->opno)) + return grouping_check_operands(opexpr->opno, opexpr->inputcollid, + opexpr->args, ctx); + /* fall through */ + } + else if (IsA(node, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node; + + if (op_is_safe_index_member(saop->opno)) + return grouping_check_operands(saop->opno, saop->inputcollid, + saop->args, ctx); + /* fall through */ + } + else if (IsA(node, RowCompareExpr)) + { + RowCompareExpr *rcexpr = (RowCompareExpr *) node; + ListCell *lc_l; + ListCell *lc_r; + ListCell *lc_o; + ListCell *lc_c; + + /* Each column is compared under its own operator and inputcollid. */ + forfour(lc_l, rcexpr->largs, + lc_r, rcexpr->rargs, + lc_o, rcexpr->opnos, + lc_c, rcexpr->inputcollids) + { + Oid opno = lfirst_oid(lc_o); + Oid collid = lfirst_oid(lc_c); + + if (grouping_check_operand((Node *) lfirst(lc_l), opno, collid, ctx) || + grouping_check_operand((Node *) lfirst(lc_r), opno, collid, ctx)) + return true; + } + return false; + } + else if (IsA(node, CaseExpr) && ((CaseExpr *) node)->arg != NULL) + { + CaseExpr *cexpr = (CaseExpr *) node; + Node *arg = (Node *) cexpr->arg; + + /* Look through RelabelType to find a direct Var arg. */ + while (arg && IsA(arg, RelabelType)) + arg = (Node *) ((RelabelType *) arg)->arg; + + if (arg && IsA(arg, Var)) + { + Var *var = (Var *) arg; + + /* + * The arg is a grouping column compared by every WHEN. For a + * nondeterministic collation, reject if any WHEN applies a + * different collation. + */ + if (OidIsValid(ctx->get_eqop(var, ctx->cb_context)) && + OidIsValid(var->varcollid) && + !get_collation_isdeterministic(var->varcollid)) + { + foreach_node(CaseWhen, cw, cexpr->args) + { + Oid collid = exprInputCollation((Node *) cw->expr); + + if (OidIsValid(collid) && collid != var->varcollid) + return true; + } + } + } + else if (grouping_conflict_walker((Node *) cexpr->arg, ctx)) + { + /* arg is a complex expression; walked as a non-operand */ + return true; + } + + /* + * Walk the WHEN conditions, their results, and the default result as + * non-operands. The WHEN conditions hold a CaseTestExpr in place of + * the arg, so they contribute no grouping operand of their own, but + * the condition expression or the substitution result may reference + * another grouping column. + */ + foreach_node(CaseWhen, cw, cexpr->args) + { + if (grouping_conflict_walker((Node *) cw->expr, ctx) || + grouping_conflict_walker((Node *) cw->result, ctx)) + return true; + } + return grouping_conflict_walker((Node *) cexpr->defresult, ctx); + } + + return expression_tree_walker(node, grouping_conflict_walker, ctx); +} + +/* + * grouping_check_operands + * Check every argument of a comparison node as a direct operand of the + * comparison's operator 'opno' and collation 'inputcollid'. + */ +static bool +grouping_check_operands(Oid opno, Oid inputcollid, List *args, + grouping_walker_ctx *ctx) +{ + ListCell *lc; + + foreach(lc, args) + { + if (grouping_check_operand((Node *) lfirst(lc), opno, inputcollid, ctx)) + return true; + } + return false; +} + +/* + * grouping_check_operand + * Handle one operand 'arg' of a comparison with operator 'opno' and + * collation 'inputcollid'. + * + * If 'arg' is a grouping column (after looking through RelabelType), verify + * that comparison's operator has equality semantics compatible with the + * grouping eqop and, for a nondeterministic collation, that it uses the same + * collation; such a direct operand is then fully handled and is not recursed + * into. Any other operand is walked normally, so a grouping column buried + * inside it is seen as a non-operand reference. + */ +static bool +grouping_check_operand(Node *arg, Oid opno, Oid inputcollid, + grouping_walker_ctx *ctx) +{ + Node *node = arg; + + while (node && IsA(node, RelabelType)) + node = (Node *) ((RelabelType *) node)->arg; + + if (node && IsA(node, Var)) + { + Var *var = (Var *) node; + Oid grouping_eqop = ctx->get_eqop(var, ctx->cb_context); + + if (OidIsValid(grouping_eqop)) + { + /* incompatible equality semantics */ + if (!equality_ops_are_compatible(opno, grouping_eqop)) + return true; + /* nondeterministic collation compared under a different collation */ + if (OidIsValid(var->varcollid) && + !get_collation_isdeterministic(var->varcollid) && + inputcollid != var->varcollid) + return true; + } + return false; /* direct operand handled; do not recurse */ + } + + return grouping_conflict_walker(arg, ctx); +} + /* * Build ScalarArrayOpExpr on top of 'exprs.' 'haveNonConst' indicates * whether at least one of the expressions is not Const. When it's false, diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 036de5f79ef..cc6f05a0aa7 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -820,15 +820,22 @@ get_op_index_interpretation(Oid opno) /* * equality_ops_are_compatible - * Return true if the two given equality operators have compatible + * Return true if the two given operators have compatible equality * semantics. * * This is trivially true if they are the same operator. Otherwise, * we look to see if they both belong to an opfamily that guarantees * compatible semantics for equality. Either finding allows us to assume - * that they have compatible notions of equality. (The reason we need - * to do these pushups is that one might be a cross-type operator; for - * instance int24eq vs int4eq.) + * that they have compatible notions of equality. + * + * The typical use is to compare two equality operators (for instance the + * cross-type operators int24eq vs int4eq), but the test is meaningful for + * any pair of operators in a btree/hash opfamily. Btree marks its + * opfamilies as amconsistentequality, which guarantees that every member + * of the family (=, <, <=, >, >=) agrees on the equivalence relation + * defined by the family's "=". So a non-equality operator and an + * equality operator from the same opfamily are also "compatible" in this + * sense. */ bool equality_ops_are_compatible(Oid opno1, Oid opno2) @@ -963,10 +970,11 @@ collations_agree_on_equality(Oid coll1, Oid coll2) * op_is_safe_index_member * Check if the operator is a member of a B-tree or Hash operator family. * - * We use this check as a proxy for "null-safety": if an operator is trusted by - * the btree or hash opfamily, it implies that the operator adheres to standard - * boolean behavior, and would not return NULL when given valid non-null - * inputs, as doing so would break index integrity. + * Membership in such an opfamily has several useful implications: the operator + * returns non-null for non-null inputs (i.e. "null-safety", required so that + * the operator doesn't break index integrity), and it agrees with other + * members of the same opfamily on equality semantics. Callers use this check + * as a proxy for any of those properties. */ bool op_is_safe_index_member(Oid opno) diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index 853a28c0007..0e5a7b07404 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -23,6 +23,16 @@ typedef struct List **windowFuncs; /* lists of WindowFuncs for each winref */ } WindowFuncLists; +/* + * Callback used by expression_has_grouping_conflict below. Given a Var, the + * callback returns the equality operator that the relevant grouping mechanism + * (GROUP BY, DISTINCT, DISTINCT ON, window PARTITION BY, or set operation) + * uses for the column the Var references, or InvalidOid if the Var does not + * participate in that grouping. Returning InvalidOid signals "not a grouping + * column" to both the opfamily and collation checks. + */ +typedef Oid (*grouping_eqop_callback) (Var *var, void *context); + extern bool contain_agg_clause(Node *clause); extern bool contain_window_function(Node *clause); @@ -56,4 +66,8 @@ extern Query *inline_function_in_from(PlannerInfo *root, extern Bitmapset *pull_paramids(Expr *expr); +extern bool expression_has_grouping_conflict(Node *expr, + grouping_eqop_callback get_eqop, + void *context); + #endif /* CLAUSES_H */ diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 89e051ee824..8d1fae262a5 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1675,11 +1675,20 @@ explain (costs off) select y,z from t2 group by y,z; -> Seq Scan on t2 (3 rows) +drop table t1 cascade; +NOTICE: drop cascades to table t1c +drop table t2; +drop table t3; +drop table p_t1; +-- A composite type used by the tests below to exercise the asymmetry +-- between record_ops (per-field equality, the default) and record_image_ops +-- (bytewise equality): values like row(1.0) and row(1.00) are field-equal +-- but byte-distinct. +create type t_rec as (x numeric); -- A unique index proves uniqueness only under its own opfamily. When the -- GROUP BY's eqop comes from a different opfamily with looser equality, -- rows the index regards as distinct can collapse into one GROUP BY group, -- so the index is not usable for removing redundant columns. -create type t_rec as (x numeric); create temp table t_opf (a t_rec not null, b text); create unique index on t_opf (a record_image_ops); -- (1.0) and (1.00) are bytewise distinct but logically equal as records; @@ -1705,12 +1714,65 @@ select a, b from t_opf group by a, b order by b; (2 rows) drop table t_opf; +-- A HAVING clause that uses an equality operator from a different opfamily +-- than the GROUP BY's eqop must NOT be pushed down to WHERE. +create temp table t_having (id int, a t_rec); +insert into t_having values + (1, row(1.0)::t_rec), + (2, row(1.00)::t_rec), + (3, row(2)::t_rec); +-- the clause must stay in HAVING +explain (costs off) +select a, count(*) from t_having group by a having a *= row(1.0)::t_rec; + QUERY PLAN +--------------------------------- + HashAggregate + Group Key: a + Filter: (a *= '(1.0)'::t_rec) + -> Seq Scan on t_having +(4 rows) + +select a, count(*) from t_having group by a having a *= row(1.0)::t_rec; + a | count +-------+------- + (1.0) | 2 +(1 row) + +-- the clause must stay in HAVING +explain (costs off) +select a, count(*) from t_having group by a having a *= any (array[row(1.0)::t_rec]); + QUERY PLAN +------------------------------------------- + HashAggregate + Group Key: a + Filter: (a *= ANY ('{(1.0)}'::t_rec[])) + -> Seq Scan on t_having +(4 rows) + +select a, count(*) from t_having group by a having a *= any (array[row(1.0)::t_rec]); + a | count +-------+------- + (1.0) | 2 +(1 row) + +-- the clause can be pushed down to WHERE +explain (costs off) +select a, count(*) from t_having group by a having a = row(1.0)::t_rec; + QUERY PLAN +-------------------------------------- + GroupAggregate + -> Seq Scan on t_having + Filter: (a = '(1.0)'::t_rec) +(3 rows) + +select a, count(*) from t_having group by a having a = row(1.0)::t_rec; + a | count +-------+------- + (1.0) | 2 +(1 row) + +drop table t_having; drop type t_rec; -drop table t1 cascade; -NOTICE: drop cascades to table t1c -drop table t2; -drop table t3; -drop table p_t1; -- -- Test GROUP BY ALL -- diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index 04e2f6df037..cf2c55ba92e 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -2122,7 +2122,7 @@ SELECT x, count(*) FROM test3ci GROUP BY x HAVING x = 'abc' COLLATE case_insensi abc | 2 (1 row) --- Negative: function applied to grouped column with conflicting collation +-- Negative: function over the grouping column, conflicting collation EXPLAIN (COSTS OFF) SELECT x, count(*) FROM test3ci GROUP BY x HAVING upper(x) = 'ABC' COLLATE case_sensitive; QUERY PLAN @@ -2139,18 +2139,36 @@ SELECT x, count(*) FROM test3ci GROUP BY x HAVING upper(x) = 'ABC' COLLATE case_ abc | 2 (1 row) --- Positive: function with same collation as GROUP BY +-- Negative: function over the grouping column whose result is compared as an +-- integer, under no collation +EXPLAIN (COSTS OFF) +SELECT x, count(*) FROM test3ci GROUP BY x HAVING ascii(x) = 97; + QUERY PLAN +--------------------------- + HashAggregate + Group Key: x + Filter: (ascii(x) = 97) + -> Seq Scan on test3ci +(4 rows) + +SELECT x, count(*) FROM test3ci GROUP BY x HAVING ascii(x) = 97; + x | count +-----+------- + abc | 2 +(1 row) + +-- Negative: a function wrapping the grouping column is not provably safe even +-- when compared under the matching collation, since the function need not +-- preserve the collation's equality EXPLAIN (COSTS OFF) SELECT x, count(*) FROM test3ci GROUP BY x HAVING upper(x) = 'ABC' COLLATE case_insensitive; - QUERY PLAN -------------------------------------------------------------------------- - GroupAggregate + QUERY PLAN +------------------------------------------------------------- + HashAggregate Group Key: x - -> Sort - Sort Key: x COLLATE case_insensitive - -> Seq Scan on test3ci - Filter: (upper(x) = 'ABC'::text COLLATE case_insensitive) -(6 rows) + Filter: (upper(x) = 'ABC'::text COLLATE case_insensitive) + -> Seq Scan on test3ci +(4 rows) SELECT x, count(*) FROM test3ci GROUP BY x HAVING upper(x) = 'ABC' COLLATE case_insensitive; x | count @@ -2158,8 +2176,8 @@ SELECT x, count(*) FROM test3ci GROUP BY x HAVING upper(x) = 'ABC' COLLATE case_ abc | 2 (1 row) --- Negative: inner function has conflicting collation, even though outer --- operator's collation matches GROUP BY due to a COLLATE override +-- Negative: same, with the grouping column wrapped in a function whose input +-- collation is overridden; still not a direct operand, so it stays in HAVING EXPLAIN (COSTS OFF) SELECT x, count(*) FROM test3ci GROUP BY x HAVING upper(x COLLATE case_sensitive) COLLATE case_insensitive = 'ABC'; QUERY PLAN @@ -2178,17 +2196,17 @@ SELECT x, count(*) FROM test3ci GROUP BY x HAVING upper(x COLLATE case_sensitive -- Mixed AND: conflicting clause stays in HAVING, safe clause pushed to WHERE EXPLAIN (COSTS OFF) -SELECT x, count(*) FROM test3ci GROUP BY x HAVING x = 'abc' COLLATE case_sensitive AND length(x) > 1; - QUERY PLAN ----------------------------------------------------- +SELECT x, count(*) FROM test3ci GROUP BY x HAVING x = 'abc' COLLATE case_sensitive AND x >= 'a' COLLATE case_insensitive; + QUERY PLAN +----------------------------------------------------------- HashAggregate Group Key: x Filter: (x = 'abc'::text COLLATE case_sensitive) -> Seq Scan on test3ci - Filter: (length(x) > 1) + Filter: (x >= 'a'::text COLLATE case_insensitive) (5 rows) -SELECT x, count(*) FROM test3ci GROUP BY x HAVING x = 'abc' COLLATE case_sensitive AND length(x) > 1; +SELECT x, count(*) FROM test3ci GROUP BY x HAVING x = 'abc' COLLATE case_sensitive AND x >= 'a' COLLATE case_insensitive; x | count -----+------- abc | 2 @@ -2196,15 +2214,15 @@ SELECT x, count(*) FROM test3ci GROUP BY x HAVING x = 'abc' COLLATE case_sensiti -- Positive: AND of two safe clauses, both can be pushed EXPLAIN (COSTS OFF) -SELECT x, count(*) FROM test3ci GROUP BY x HAVING x = 'abc' COLLATE case_insensitive AND length(x) > 1; - QUERY PLAN ----------------------------------------------------------------------------------- +SELECT x, count(*) FROM test3ci GROUP BY x HAVING x = 'abc' COLLATE case_insensitive AND x >= 'a' COLLATE case_insensitive; + QUERY PLAN +------------------------------------------------------------------------------------------------------------ GroupAggregate -> Seq Scan on test3ci - Filter: ((x = 'abc'::text COLLATE case_insensitive) AND (length(x) > 1)) + Filter: ((x >= 'a'::text COLLATE case_insensitive) AND (x = 'abc'::text COLLATE case_insensitive)) (3 rows) -SELECT x, count(*) FROM test3ci GROUP BY x HAVING x = 'abc' COLLATE case_insensitive AND length(x) > 1; +SELECT x, count(*) FROM test3ci GROUP BY x HAVING x = 'abc' COLLATE case_insensitive AND x >= 'a' COLLATE case_insensitive; x | count -----+------- abc | 2 @@ -2348,6 +2366,230 @@ SELECT x, count(*) FROM test3cs GROUP BY x HAVING x = 'abc' COLLATE case_insensi ABC | 1 (2 rows) +-- Test WHERE-pushdown past a grouping layer (DISTINCT, DISTINCT ON, window +-- PARTITION BY) when the qual applies a different collation than the +-- grouping column's nondeterministic collation. The qual would distinguish +-- rows the grouping considers equal, so it must NOT be pushed inside the +-- subquery. +CREATE TABLE pushdown_ci (id int, x text COLLATE case_insensitive); +INSERT INTO pushdown_ci VALUES (1, 'ABC'), (2, 'abc'), (3, 'def'); +-- DISTINCT ON: conflict, qual stays in outer query +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT DISTINCT ON (x) id, x FROM pushdown_ci ORDER BY x, id) s +WHERE x = 'abc' COLLATE case_sensitive; + QUERY PLAN +-------------------------------------------------------------------------------- + Subquery Scan on s + Filter: (s.x = 'abc'::text COLLATE case_sensitive) + -> Unique + -> Sort + Sort Key: pushdown_ci.x COLLATE case_insensitive, pushdown_ci.id + -> Seq Scan on pushdown_ci +(6 rows) + +SELECT * FROM (SELECT DISTINCT ON (x) id, x FROM pushdown_ci ORDER BY x, id) s +WHERE x = 'abc' COLLATE case_sensitive; + id | x +----+--- +(0 rows) + +-- Window function PARTITION BY: conflict, qual stays outside the WindowAgg +EXPLAIN (COSTS OFF) +SELECT * FROM ( + SELECT id, x, count(*) OVER (PARTITION BY x) AS cnt FROM pushdown_ci +) s +WHERE x = 'abc' COLLATE case_sensitive; + QUERY PLAN +---------------------------------------------------------------- + Subquery Scan on s + Filter: (s.x = 'abc'::text COLLATE case_sensitive) + -> WindowAgg + Window: w1 AS (PARTITION BY pushdown_ci.x) + -> Sort + Sort Key: pushdown_ci.x COLLATE case_insensitive + -> Seq Scan on pushdown_ci +(7 rows) + +SELECT * FROM ( + SELECT id, x, count(*) OVER (PARTITION BY x) AS cnt FROM pushdown_ci +) s +WHERE x = 'abc' COLLATE case_sensitive; + id | x | cnt +----+-----+----- + 2 | abc | 2 +(1 row) + +-- Plain DISTINCT: conflict, qual stays in outer query +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT DISTINCT x FROM pushdown_ci) s +WHERE x = 'abc' COLLATE case_sensitive; + QUERY PLAN +------------------------------------------------------ + Subquery Scan on s + Filter: (s.x = 'abc'::text COLLATE case_sensitive) + -> HashAggregate + Group Key: pushdown_ci.x + -> Seq Scan on pushdown_ci +(5 rows) + +SELECT * FROM (SELECT DISTINCT x FROM pushdown_ci) s +WHERE x = 'abc' COLLATE case_sensitive; + x +--- +(0 rows) + +-- Positive: matching collation, safe to push past the grouping +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT DISTINCT ON (x) id, x FROM pushdown_ci ORDER BY x, id) s +WHERE x = 'abc' COLLATE case_insensitive; + QUERY PLAN +------------------------------------------------------------------ + Limit + -> Sort + Sort Key: pushdown_ci.id + -> Seq Scan on pushdown_ci + Filter: (x = 'abc'::text COLLATE case_insensitive) +(5 rows) + +SELECT * FROM (SELECT DISTINCT ON (x) id, x FROM pushdown_ci ORDER BY x, id) s +WHERE x = 'abc' COLLATE case_insensitive; + id | x +----+----- + 1 | ABC +(1 row) + +-- Set operations: any operation other than UNION ALL groups rows by equality, +-- so the same collation-mismatch rules apply. +CREATE TABLE pushdown_ci2 (x text COLLATE case_insensitive); +INSERT INTO pushdown_ci2 VALUES ('abc'); +-- UNION: conflict, qual stays in outer query +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT x FROM pushdown_ci UNION SELECT x FROM pushdown_ci2) s +WHERE x = 'abc' COLLATE case_sensitive; + QUERY PLAN +------------------------------------------------------ + Subquery Scan on s + Filter: (s.x = 'abc'::text COLLATE case_sensitive) + -> HashAggregate + Group Key: pushdown_ci.x + -> Append + -> Seq Scan on pushdown_ci + -> Seq Scan on pushdown_ci2 +(7 rows) + +SELECT * FROM (SELECT x FROM pushdown_ci UNION SELECT x FROM pushdown_ci2) s +WHERE x = 'abc' COLLATE case_sensitive; + x +--- +(0 rows) + +-- INTERSECT: same +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT x FROM pushdown_ci INTERSECT SELECT x FROM pushdown_ci2) s +WHERE x = 'abc' COLLATE case_sensitive; + QUERY PLAN +------------------------------------------------------ + Subquery Scan on s + Filter: (s.x = 'abc'::text COLLATE case_sensitive) + -> HashSetOp Intersect + -> Seq Scan on pushdown_ci + -> Seq Scan on pushdown_ci2 +(5 rows) + +SELECT * FROM (SELECT x FROM pushdown_ci INTERSECT SELECT x FROM pushdown_ci2) s +WHERE x = 'abc' COLLATE case_sensitive; + x +--- +(0 rows) + +-- INTERSECT ALL: still groups +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT x FROM pushdown_ci INTERSECT ALL SELECT x FROM pushdown_ci2) s +WHERE x = 'abc' COLLATE case_sensitive; + QUERY PLAN +------------------------------------------------------ + Subquery Scan on s + Filter: (s.x = 'abc'::text COLLATE case_sensitive) + -> HashSetOp Intersect All + -> Seq Scan on pushdown_ci + -> Seq Scan on pushdown_ci2 +(5 rows) + +SELECT * FROM (SELECT x FROM pushdown_ci INTERSECT ALL SELECT x FROM pushdown_ci2) s +WHERE x = 'abc' COLLATE case_sensitive; + x +--- +(0 rows) + +-- Negative: a function over a grouping column with a nondeterministic +-- collation, whose result is compared under no collation (an integer +-- comparison), can distinguish values the grouping considers equal. +-- PARTITION BY +EXPLAIN (COSTS OFF) +SELECT * FROM ( + SELECT id, x, count(*) OVER (PARTITION BY x) AS cnt FROM pushdown_ci +) s +WHERE ascii(x) = 97; + QUERY PLAN +---------------------------------------------------------------- + Subquery Scan on s + Filter: (ascii(s.x) = 97) + -> WindowAgg + Window: w1 AS (PARTITION BY pushdown_ci.x) + -> Sort + Sort Key: pushdown_ci.x COLLATE case_insensitive + -> Seq Scan on pushdown_ci +(7 rows) + +SELECT * FROM ( + SELECT id, x, count(*) OVER (PARTITION BY x) AS cnt FROM pushdown_ci +) s +WHERE ascii(x) = 97; + id | x | cnt +----+-----+----- + 2 | abc | 2 +(1 row) + +-- Same with DISTINCT +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT DISTINCT x FROM pushdown_ci) s WHERE ascii(x) = 97; + QUERY PLAN +------------------------------------- + Subquery Scan on s + Filter: (ascii(s.x) = 97) + -> HashAggregate + Group Key: pushdown_ci.x + -> Seq Scan on pushdown_ci +(5 rows) + +SELECT * FROM (SELECT DISTINCT x FROM pushdown_ci) s WHERE ascii(x) = 97; + x +--- +(0 rows) + +-- Same with Set operations +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT x FROM pushdown_ci UNION SELECT x FROM pushdown_ci2) s +WHERE ascii(x) = 97; + QUERY PLAN +-------------------------------------------- + Subquery Scan on s + Filter: (ascii(s.x) = 97) + -> HashAggregate + Group Key: pushdown_ci.x + -> Append + -> Seq Scan on pushdown_ci + -> Seq Scan on pushdown_ci2 +(7 rows) + +SELECT * FROM (SELECT x FROM pushdown_ci UNION SELECT x FROM pushdown_ci2) s +WHERE ascii(x) = 97; + x +--- +(0 rows) + +DROP TABLE pushdown_ci2; +DROP TABLE pushdown_ci; -- bpchar CREATE TABLE test1bpci (x char(3) COLLATE case_insensitive); CREATE TABLE test2bpci (x char(3) COLLATE case_insensitive); diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index a3778c23c34..20140f171af 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -2031,6 +2031,220 @@ NOTICE: x = 3, y = 0 drop function tattle(x int, y int); -- +-- check that an upper-level qual is not pushed down if its operator is from a +-- different btree opfamily than the subquery's grouping eqop +-- +BEGIN; +CREATE TYPE t_rec AS (x numeric); +CREATE TEMP TABLE pdt (id int, a t_rec); +INSERT INTO pdt VALUES + (1, ROW(1.00)::t_rec), + (2, ROW(1.0)::t_rec), + (3, ROW(2)::t_rec); +-- DISTINCT ON: conflict, qual stays in outer query +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT DISTINCT ON (a) id, a FROM pdt ORDER BY a, id) s +WHERE a *= ROW(1.0)::t_rec; + QUERY PLAN +--------------------------------------- + Subquery Scan on s + Filter: (s.a *= '(1.0)'::t_rec) + -> Unique + -> Sort + Sort Key: pdt.a, pdt.id + -> Seq Scan on pdt +(6 rows) + +SELECT * FROM (SELECT DISTINCT ON (a) id, a FROM pdt ORDER BY a, id) s +WHERE a *= ROW(1.0)::t_rec; + id | a +----+--- +(0 rows) + +-- Window function PARTITION BY: conflict, qual stays outside the WindowAgg +EXPLAIN (COSTS OFF) +SELECT * FROM ( + SELECT id, a, count(*) OVER (PARTITION BY a) AS cnt FROM pdt +) s +WHERE a *= ROW(1.0)::t_rec; + QUERY PLAN +-------------------------------------------- + Subquery Scan on s + Filter: (s.a *= '(1.0)'::t_rec) + -> WindowAgg + Window: w1 AS (PARTITION BY pdt.a) + -> Sort + Sort Key: pdt.a + -> Seq Scan on pdt +(7 rows) + +SELECT * FROM ( + SELECT id, a, count(*) OVER (PARTITION BY a) AS cnt FROM pdt +) s +WHERE a *= ROW(1.0)::t_rec; + id | a | cnt +----+-------+----- + 2 | (1.0) | 2 +(1 row) + +-- Plain DISTINCT: conflict, qual stays in outer query +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT DISTINCT a FROM pdt) s WHERE a *= ROW(1.0)::t_rec; + QUERY PLAN +----------------------------------- + Subquery Scan on s + Filter: (s.a *= '(1.0)'::t_rec) + -> HashAggregate + Group Key: pdt.a + -> Seq Scan on pdt +(5 rows) + +SELECT * FROM (SELECT DISTINCT a FROM pdt) s WHERE a *= ROW(1.0)::t_rec; + a +--- +(0 rows) + +-- Positive: compatible opfamily, safe to push past the grouping +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT DISTINCT ON (a) id, a FROM pdt ORDER BY a, id) s +WHERE a = ROW(1.0)::t_rec; + QUERY PLAN +-------------------------------------------- + Limit + -> Sort + Sort Key: pdt.id + -> Seq Scan on pdt + Filter: (a = '(1.0)'::t_rec) +(5 rows) + +SELECT * FROM (SELECT DISTINCT ON (a) id, a FROM pdt ORDER BY a, id) s +WHERE a = ROW(1.0)::t_rec; + id | a +----+-------- + 1 | (1.00) +(1 row) + +-- Set operations: any operation other than UNION ALL groups rows by equality, +-- so the same opfamily-mismatch rules apply. +CREATE TEMP TABLE u1 (a t_rec); +CREATE TEMP TABLE u2 (a t_rec); +INSERT INTO u1 VALUES (ROW(1.00)::t_rec), (ROW(1.0)::t_rec); +INSERT INTO u2 VALUES (ROW(1.0)::t_rec); +-- UNION: conflict, qual stays in outer query +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT a FROM u1 UNION SELECT a FROM u2) s +WHERE a *= ROW(1.0)::t_rec; + QUERY PLAN +----------------------------------- + Subquery Scan on s + Filter: (s.a *= '(1.0)'::t_rec) + -> HashAggregate + Group Key: u1.a + -> Append + -> Seq Scan on u1 + -> Seq Scan on u2 +(7 rows) + +SELECT * FROM (SELECT a FROM u1 UNION SELECT a FROM u2) s +WHERE a *= ROW(1.0)::t_rec; + a +--- +(0 rows) + +-- INTERSECT: same +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT a FROM u1 INTERSECT SELECT a FROM u2) s +WHERE a *= ROW(1.0)::t_rec; + QUERY PLAN +----------------------------------- + Subquery Scan on s + Filter: (s.a *= '(1.0)'::t_rec) + -> HashSetOp Intersect + -> Seq Scan on u1 + -> Seq Scan on u2 +(5 rows) + +SELECT * FROM (SELECT a FROM u1 INTERSECT SELECT a FROM u2) s +WHERE a *= ROW(1.0)::t_rec; + a +--- +(0 rows) + +-- INTERSECT ALL: still groups +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT a FROM u1 INTERSECT ALL SELECT a FROM u2) s +WHERE a *= ROW(1.0)::t_rec; + QUERY PLAN +----------------------------------- + Subquery Scan on s + Filter: (s.a *= '(1.0)'::t_rec) + -> HashSetOp Intersect All + -> Seq Scan on u1 + -> Seq Scan on u2 +(5 rows) + +SELECT * FROM (SELECT a FROM u1 INTERSECT ALL SELECT a FROM u2) s +WHERE a *= ROW(1.0)::t_rec; + a +--- +(0 rows) + +-- UNION ALL of (UNION ...): an inner grouping node still exposes the +-- conflict to a qual pushed down through the outer UNION ALL. +EXPLAIN (COSTS OFF) +SELECT * FROM ( + (SELECT a FROM u1 UNION SELECT a FROM u2) + UNION ALL + SELECT a FROM u2 +) s +WHERE a *= ROW(1.0)::t_rec; + QUERY PLAN +---------------------------------------- + Subquery Scan on s + Filter: (s.a *= '(1.0)'::t_rec) + -> Append + -> HashAggregate + Group Key: u1.a + -> Append + -> Seq Scan on u1 + -> Seq Scan on u2 + -> Seq Scan on u2 u2_1 +(9 rows) + +SELECT * FROM ( + (SELECT a FROM u1 UNION SELECT a FROM u2) + UNION ALL + SELECT a FROM u2 +) s +WHERE a *= ROW(1.0)::t_rec; + a +------- + (1.0) +(1 row) + +-- UNION ALL only: no grouping anywhere, pushdown remains allowed. +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT a FROM u1 UNION ALL SELECT a FROM u2) s +WHERE a *= ROW(1.0)::t_rec; + QUERY PLAN +--------------------------------------- + Append + -> Seq Scan on u1 + Filter: (a *= '(1.0)'::t_rec) + -> Seq Scan on u2 + Filter: (a *= '(1.0)'::t_rec) +(5 rows) + +SELECT * FROM (SELECT a FROM u1 UNION ALL SELECT a FROM u2) s +WHERE a *= ROW(1.0)::t_rec; + a +------- + (1.0) + (1.0) +(2 rows) + +ROLLBACK; +-- -- Test that LIMIT can be pushed to SORT through a subquery that just projects -- columns. We check for that having happened by looking to see if EXPLAIN -- ANALYZE shows that a top-N sort was used. We must suppress or filter away diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 916383db927..d082876b5c1 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -588,11 +588,21 @@ alter table t2 alter column z drop not null; create unique index t2_z_uidx on t2(z) nulls not distinct; explain (costs off) select y,z from t2 group by y,z; +drop table t1 cascade; +drop table t2; +drop table t3; +drop table p_t1; + +-- A composite type used by the tests below to exercise the asymmetry +-- between record_ops (per-field equality, the default) and record_image_ops +-- (bytewise equality): values like row(1.0) and row(1.00) are field-equal +-- but byte-distinct. +create type t_rec as (x numeric); + -- A unique index proves uniqueness only under its own opfamily. When the -- GROUP BY's eqop comes from a different opfamily with looser equality, -- rows the index regards as distinct can collapse into one GROUP BY group, -- so the index is not usable for removing redundant columns. -create type t_rec as (x numeric); create temp table t_opf (a t_rec not null, b text); create unique index on t_opf (a record_image_ops); -- (1.0) and (1.00) are bytewise distinct but logically equal as records; @@ -603,12 +613,32 @@ explain (costs off) select a, b from t_opf group by a, b order by b; select a, b from t_opf group by a, b order by b; drop table t_opf; -drop type t_rec; -drop table t1 cascade; -drop table t2; -drop table t3; -drop table p_t1; +-- A HAVING clause that uses an equality operator from a different opfamily +-- than the GROUP BY's eqop must NOT be pushed down to WHERE. +create temp table t_having (id int, a t_rec); +insert into t_having values + (1, row(1.0)::t_rec), + (2, row(1.00)::t_rec), + (3, row(2)::t_rec); + +-- the clause must stay in HAVING +explain (costs off) +select a, count(*) from t_having group by a having a *= row(1.0)::t_rec; +select a, count(*) from t_having group by a having a *= row(1.0)::t_rec; + +-- the clause must stay in HAVING +explain (costs off) +select a, count(*) from t_having group by a having a *= any (array[row(1.0)::t_rec]); +select a, count(*) from t_having group by a having a *= any (array[row(1.0)::t_rec]); + +-- the clause can be pushed down to WHERE +explain (costs off) +select a, count(*) from t_having group by a having a = row(1.0)::t_rec; +select a, count(*) from t_having group by a having a = row(1.0)::t_rec; + +drop table t_having; +drop type t_rec; -- -- Test GROUP BY ALL diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index 18c47e6e05a..a1f10708c96 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -761,31 +761,39 @@ EXPLAIN (COSTS OFF) SELECT x, count(*) FROM test3ci GROUP BY x HAVING x = 'abc' COLLATE case_insensitive; SELECT x, count(*) FROM test3ci GROUP BY x HAVING x = 'abc' COLLATE case_insensitive; --- Negative: function applied to grouped column with conflicting collation +-- Negative: function over the grouping column, conflicting collation EXPLAIN (COSTS OFF) SELECT x, count(*) FROM test3ci GROUP BY x HAVING upper(x) = 'ABC' COLLATE case_sensitive; SELECT x, count(*) FROM test3ci GROUP BY x HAVING upper(x) = 'ABC' COLLATE case_sensitive; --- Positive: function with same collation as GROUP BY +-- Negative: function over the grouping column whose result is compared as an +-- integer, under no collation +EXPLAIN (COSTS OFF) +SELECT x, count(*) FROM test3ci GROUP BY x HAVING ascii(x) = 97; +SELECT x, count(*) FROM test3ci GROUP BY x HAVING ascii(x) = 97; + +-- Negative: a function wrapping the grouping column is not provably safe even +-- when compared under the matching collation, since the function need not +-- preserve the collation's equality EXPLAIN (COSTS OFF) SELECT x, count(*) FROM test3ci GROUP BY x HAVING upper(x) = 'ABC' COLLATE case_insensitive; SELECT x, count(*) FROM test3ci GROUP BY x HAVING upper(x) = 'ABC' COLLATE case_insensitive; --- Negative: inner function has conflicting collation, even though outer --- operator's collation matches GROUP BY due to a COLLATE override +-- Negative: same, with the grouping column wrapped in a function whose input +-- collation is overridden; still not a direct operand, so it stays in HAVING EXPLAIN (COSTS OFF) SELECT x, count(*) FROM test3ci GROUP BY x HAVING upper(x COLLATE case_sensitive) COLLATE case_insensitive = 'ABC'; SELECT x, count(*) FROM test3ci GROUP BY x HAVING upper(x COLLATE case_sensitive) COLLATE case_insensitive = 'ABC'; -- Mixed AND: conflicting clause stays in HAVING, safe clause pushed to WHERE EXPLAIN (COSTS OFF) -SELECT x, count(*) FROM test3ci GROUP BY x HAVING x = 'abc' COLLATE case_sensitive AND length(x) > 1; -SELECT x, count(*) FROM test3ci GROUP BY x HAVING x = 'abc' COLLATE case_sensitive AND length(x) > 1; +SELECT x, count(*) FROM test3ci GROUP BY x HAVING x = 'abc' COLLATE case_sensitive AND x >= 'a' COLLATE case_insensitive; +SELECT x, count(*) FROM test3ci GROUP BY x HAVING x = 'abc' COLLATE case_sensitive AND x >= 'a' COLLATE case_insensitive; -- Positive: AND of two safe clauses, both can be pushed EXPLAIN (COSTS OFF) -SELECT x, count(*) FROM test3ci GROUP BY x HAVING x = 'abc' COLLATE case_insensitive AND length(x) > 1; -SELECT x, count(*) FROM test3ci GROUP BY x HAVING x = 'abc' COLLATE case_insensitive AND length(x) > 1; +SELECT x, count(*) FROM test3ci GROUP BY x HAVING x = 'abc' COLLATE case_insensitive AND x >= 'a' COLLATE case_insensitive; +SELECT x, count(*) FROM test3ci GROUP BY x HAVING x = 'abc' COLLATE case_insensitive AND x >= 'a' COLLATE case_insensitive; -- Negative: OR with a conflicting clause: must stay in HAVING EXPLAIN (COSTS OFF) @@ -826,6 +834,111 @@ EXPLAIN (COSTS OFF) SELECT x, count(*) FROM test3cs GROUP BY x HAVING x = 'abc' COLLATE case_insensitive ORDER BY 1; SELECT x, count(*) FROM test3cs GROUP BY x HAVING x = 'abc' COLLATE case_insensitive ORDER BY 1; +-- Test WHERE-pushdown past a grouping layer (DISTINCT, DISTINCT ON, window +-- PARTITION BY) when the qual applies a different collation than the +-- grouping column's nondeterministic collation. The qual would distinguish +-- rows the grouping considers equal, so it must NOT be pushed inside the +-- subquery. +CREATE TABLE pushdown_ci (id int, x text COLLATE case_insensitive); +INSERT INTO pushdown_ci VALUES (1, 'ABC'), (2, 'abc'), (3, 'def'); + +-- DISTINCT ON: conflict, qual stays in outer query +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT DISTINCT ON (x) id, x FROM pushdown_ci ORDER BY x, id) s +WHERE x = 'abc' COLLATE case_sensitive; + +SELECT * FROM (SELECT DISTINCT ON (x) id, x FROM pushdown_ci ORDER BY x, id) s +WHERE x = 'abc' COLLATE case_sensitive; + +-- Window function PARTITION BY: conflict, qual stays outside the WindowAgg +EXPLAIN (COSTS OFF) +SELECT * FROM ( + SELECT id, x, count(*) OVER (PARTITION BY x) AS cnt FROM pushdown_ci +) s +WHERE x = 'abc' COLLATE case_sensitive; + +SELECT * FROM ( + SELECT id, x, count(*) OVER (PARTITION BY x) AS cnt FROM pushdown_ci +) s +WHERE x = 'abc' COLLATE case_sensitive; + +-- Plain DISTINCT: conflict, qual stays in outer query +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT DISTINCT x FROM pushdown_ci) s +WHERE x = 'abc' COLLATE case_sensitive; + +SELECT * FROM (SELECT DISTINCT x FROM pushdown_ci) s +WHERE x = 'abc' COLLATE case_sensitive; + +-- Positive: matching collation, safe to push past the grouping +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT DISTINCT ON (x) id, x FROM pushdown_ci ORDER BY x, id) s +WHERE x = 'abc' COLLATE case_insensitive; + +SELECT * FROM (SELECT DISTINCT ON (x) id, x FROM pushdown_ci ORDER BY x, id) s +WHERE x = 'abc' COLLATE case_insensitive; + +-- Set operations: any operation other than UNION ALL groups rows by equality, +-- so the same collation-mismatch rules apply. +CREATE TABLE pushdown_ci2 (x text COLLATE case_insensitive); +INSERT INTO pushdown_ci2 VALUES ('abc'); + +-- UNION: conflict, qual stays in outer query +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT x FROM pushdown_ci UNION SELECT x FROM pushdown_ci2) s +WHERE x = 'abc' COLLATE case_sensitive; + +SELECT * FROM (SELECT x FROM pushdown_ci UNION SELECT x FROM pushdown_ci2) s +WHERE x = 'abc' COLLATE case_sensitive; + +-- INTERSECT: same +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT x FROM pushdown_ci INTERSECT SELECT x FROM pushdown_ci2) s +WHERE x = 'abc' COLLATE case_sensitive; + +SELECT * FROM (SELECT x FROM pushdown_ci INTERSECT SELECT x FROM pushdown_ci2) s +WHERE x = 'abc' COLLATE case_sensitive; + +-- INTERSECT ALL: still groups +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT x FROM pushdown_ci INTERSECT ALL SELECT x FROM pushdown_ci2) s +WHERE x = 'abc' COLLATE case_sensitive; + +SELECT * FROM (SELECT x FROM pushdown_ci INTERSECT ALL SELECT x FROM pushdown_ci2) s +WHERE x = 'abc' COLLATE case_sensitive; + +-- Negative: a function over a grouping column with a nondeterministic +-- collation, whose result is compared under no collation (an integer +-- comparison), can distinguish values the grouping considers equal. +-- PARTITION BY +EXPLAIN (COSTS OFF) +SELECT * FROM ( + SELECT id, x, count(*) OVER (PARTITION BY x) AS cnt FROM pushdown_ci +) s +WHERE ascii(x) = 97; + +SELECT * FROM ( + SELECT id, x, count(*) OVER (PARTITION BY x) AS cnt FROM pushdown_ci +) s +WHERE ascii(x) = 97; + +-- Same with DISTINCT +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT DISTINCT x FROM pushdown_ci) s WHERE ascii(x) = 97; + +SELECT * FROM (SELECT DISTINCT x FROM pushdown_ci) s WHERE ascii(x) = 97; + +-- Same with Set operations +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT x FROM pushdown_ci UNION SELECT x FROM pushdown_ci2) s +WHERE ascii(x) = 97; + +SELECT * FROM (SELECT x FROM pushdown_ci UNION SELECT x FROM pushdown_ci2) s +WHERE ascii(x) = 97; + +DROP TABLE pushdown_ci2; +DROP TABLE pushdown_ci; + -- bpchar CREATE TABLE test1bpci (x char(3) COLLATE case_insensitive); CREATE TABLE test2bpci (x char(3) COLLATE case_insensitive); diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index 1a02c3f86c0..3defbc29177 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -994,6 +994,111 @@ select * from drop function tattle(x int, y int); +-- +-- check that an upper-level qual is not pushed down if its operator is from a +-- different btree opfamily than the subquery's grouping eqop +-- +BEGIN; + +CREATE TYPE t_rec AS (x numeric); +CREATE TEMP TABLE pdt (id int, a t_rec); +INSERT INTO pdt VALUES + (1, ROW(1.00)::t_rec), + (2, ROW(1.0)::t_rec), + (3, ROW(2)::t_rec); + +-- DISTINCT ON: conflict, qual stays in outer query +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT DISTINCT ON (a) id, a FROM pdt ORDER BY a, id) s +WHERE a *= ROW(1.0)::t_rec; + +SELECT * FROM (SELECT DISTINCT ON (a) id, a FROM pdt ORDER BY a, id) s +WHERE a *= ROW(1.0)::t_rec; + +-- Window function PARTITION BY: conflict, qual stays outside the WindowAgg +EXPLAIN (COSTS OFF) +SELECT * FROM ( + SELECT id, a, count(*) OVER (PARTITION BY a) AS cnt FROM pdt +) s +WHERE a *= ROW(1.0)::t_rec; + +SELECT * FROM ( + SELECT id, a, count(*) OVER (PARTITION BY a) AS cnt FROM pdt +) s +WHERE a *= ROW(1.0)::t_rec; + +-- Plain DISTINCT: conflict, qual stays in outer query +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT DISTINCT a FROM pdt) s WHERE a *= ROW(1.0)::t_rec; + +SELECT * FROM (SELECT DISTINCT a FROM pdt) s WHERE a *= ROW(1.0)::t_rec; + +-- Positive: compatible opfamily, safe to push past the grouping +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT DISTINCT ON (a) id, a FROM pdt ORDER BY a, id) s +WHERE a = ROW(1.0)::t_rec; + +SELECT * FROM (SELECT DISTINCT ON (a) id, a FROM pdt ORDER BY a, id) s +WHERE a = ROW(1.0)::t_rec; + +-- Set operations: any operation other than UNION ALL groups rows by equality, +-- so the same opfamily-mismatch rules apply. +CREATE TEMP TABLE u1 (a t_rec); +CREATE TEMP TABLE u2 (a t_rec); +INSERT INTO u1 VALUES (ROW(1.00)::t_rec), (ROW(1.0)::t_rec); +INSERT INTO u2 VALUES (ROW(1.0)::t_rec); + +-- UNION: conflict, qual stays in outer query +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT a FROM u1 UNION SELECT a FROM u2) s +WHERE a *= ROW(1.0)::t_rec; + +SELECT * FROM (SELECT a FROM u1 UNION SELECT a FROM u2) s +WHERE a *= ROW(1.0)::t_rec; + +-- INTERSECT: same +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT a FROM u1 INTERSECT SELECT a FROM u2) s +WHERE a *= ROW(1.0)::t_rec; + +SELECT * FROM (SELECT a FROM u1 INTERSECT SELECT a FROM u2) s +WHERE a *= ROW(1.0)::t_rec; + +-- INTERSECT ALL: still groups +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT a FROM u1 INTERSECT ALL SELECT a FROM u2) s +WHERE a *= ROW(1.0)::t_rec; + +SELECT * FROM (SELECT a FROM u1 INTERSECT ALL SELECT a FROM u2) s +WHERE a *= ROW(1.0)::t_rec; + +-- UNION ALL of (UNION ...): an inner grouping node still exposes the +-- conflict to a qual pushed down through the outer UNION ALL. +EXPLAIN (COSTS OFF) +SELECT * FROM ( + (SELECT a FROM u1 UNION SELECT a FROM u2) + UNION ALL + SELECT a FROM u2 +) s +WHERE a *= ROW(1.0)::t_rec; + +SELECT * FROM ( + (SELECT a FROM u1 UNION SELECT a FROM u2) + UNION ALL + SELECT a FROM u2 +) s +WHERE a *= ROW(1.0)::t_rec; + +-- UNION ALL only: no grouping anywhere, pushdown remains allowed. +EXPLAIN (COSTS OFF) +SELECT * FROM (SELECT a FROM u1 UNION ALL SELECT a FROM u2) s +WHERE a *= ROW(1.0)::t_rec; + +SELECT * FROM (SELECT a FROM u1 UNION ALL SELECT a FROM u2) s +WHERE a *= ROW(1.0)::t_rec; + +ROLLBACK; + -- -- Test that LIMIT can be pushed to SORT through a subquery that just projects -- columns. We check for that having happened by looking to see if EXPLAIN diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 3a2720fb5f9..3442c4b9ec9 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3841,7 +3841,9 @@ gistxlogPageDelete gistxlogPageReuse gistxlogPageSplit gistxlogPageUpdate +grouping_eqop_callback grouping_sets_data +grouping_walker_ctx growable_trgm_array gseg_picksplit_item gss_OID_set @@ -3854,7 +3856,7 @@ gss_key_value_set_desc gss_name_t gtrgm_consistent_cache gzFile -having_collation_ctx +having_grouping_ctx heap_page_items_state help_handler hlCheck -- 2.39.5 (Apple Git-154)