From a130b09b7acf5e98785e4560b98f4aca2fce4080 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Sat, 8 May 2021 23:43:25 +1200 Subject: [PATCH v2] Teach planner and executor about monotonic window funcs Window functions such as row_number() always return a value higher than the one previously in any given partition. If a query were to only request the first few row numbers, then traditionally we would continue evaluating the WindowAgg node until all tuples are exhausted. However, it is possible if someone, say only wanted all row numbers <= 10, then we could just stop once we get number 11. Here we implement means to do just that. This is done by way of adding a prosupport function to various of the built-in window functions and adding supporting code to allow the support function to inform the planner if the function is monotonically increasing, monotonically decreasing, both or neither. The planner is then able to make use of that information and possibly allow the executor to short-circuit execution by way of adding a "run condition" to the WindowAgg to instruct it to run while this condition is true and stop when it becomes false. This optimization is only possible when the subquery contains only a single window clause. The problem with having multiple clauses is that at the time when we're looking for run conditions, we don't yet know the order that the window clauses will be evaluated. We cannot add a run condition to a window clause that is evaluated first as we may stop execution and not output rows that are required for a window clause which will be evaluated later. Here we add prosupport functions to allow this to work for; row_number(), rank(), dense_rank(), count(*) and count(expr). Author: David Rowley Reviewed-by: Andy Fan Discussion: https://postgr.es/m/CAApHDvqvp3At8++yF8ij06sdcoo1S_b2YoaT9D4Nf+MObzsrLQ@mail.gmail.com --- src/backend/commands/explain.c | 4 + src/backend/executor/nodeWindowAgg.c | 28 +- src/backend/nodes/copyfuncs.c | 4 + src/backend/nodes/equalfuncs.c | 2 + src/backend/nodes/outfuncs.c | 4 + src/backend/nodes/readfuncs.c | 4 + src/backend/optimizer/path/allpaths.c | 405 +++++++++++++++++++++++- src/backend/optimizer/plan/createplan.c | 7 +- src/backend/optimizer/plan/setrefs.c | 8 + src/backend/utils/adt/int8.c | 45 +++ src/backend/utils/adt/windowfuncs.c | 63 ++++ src/include/catalog/pg_proc.dat | 35 +- src/include/nodes/execnodes.h | 4 + src/include/nodes/nodes.h | 3 +- src/include/nodes/parsenodes.h | 2 + src/include/nodes/plannodes.h | 20 ++ src/include/nodes/supportnodes.h | 61 +++- src/test/regress/expected/window.out | 315 ++++++++++++++++++ src/test/regress/sql/window.sql | 165 ++++++++++ 19 files changed, 1161 insertions(+), 18 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 9f632285b6..92ca4acf50 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -1985,6 +1985,10 @@ ExplainNode(PlanState *planstate, List *ancestors, show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); break; + case T_WindowAgg: + show_upper_qual(((WindowAgg *) plan)->runconditionorig, + "Run Condition", planstate, ancestors, es); + break; case T_Group: show_group_keys(castNode(GroupState, planstate), ancestors, es); show_upper_qual(plan->qual, "Filter", planstate, ancestors, es); diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index 08ce05ca5a..3e4f00e162 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -2023,6 +2023,7 @@ static TupleTableSlot * ExecWindowAgg(PlanState *pstate) { WindowAggState *winstate = castNode(WindowAggState, pstate); + TupleTableSlot *slot; ExprContext *econtext; int i; int numfuncs; @@ -2235,7 +2236,20 @@ ExecWindowAgg(PlanState *pstate) */ econtext->ecxt_outertuple = winstate->ss.ss_ScanTupleSlot; - return ExecProject(winstate->ss.ps.ps_ProjInfo); + slot = ExecProject(winstate->ss.ps.ps_ProjInfo); + + /* + * Now evaluate the run condition to see if we need to continue further + * with execution. + */ + econtext->ecxt_scantuple = slot; + if (!ExecQual(winstate->runcondition, econtext)) + { + winstate->all_done = true; + return NULL; + } + + return slot; } /* ----------------- @@ -2307,6 +2321,18 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) Assert(node->plan.qual == NIL); winstate->ss.ps.qual = NULL; + /* + * Setup the run condition, if we received one from the query planner. + * When set, this can allow us to finish execution early because we know + * some higher-level filter exists that would just filter out any further + * results that we produce. + */ + if (node->runcondition != NIL) + winstate->runcondition = ExecInitQual(node->runcondition, + (PlanState *) winstate); + else + winstate->runcondition = NULL; + /* * initialize child nodes */ diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 55c36b46a8..1ce3f50ce1 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -1102,6 +1102,8 @@ _copyWindowAgg(const WindowAgg *from) COPY_SCALAR_FIELD(frameOptions); COPY_NODE_FIELD(startOffset); COPY_NODE_FIELD(endOffset); + COPY_NODE_FIELD(runcondition); + COPY_NODE_FIELD(runconditionorig); COPY_SCALAR_FIELD(startInRangeFunc); COPY_SCALAR_FIELD(endInRangeFunc); COPY_SCALAR_FIELD(inRangeColl); @@ -2625,6 +2627,8 @@ _copyWindowClause(const WindowClause *from) COPY_SCALAR_FIELD(frameOptions); COPY_NODE_FIELD(startOffset); COPY_NODE_FIELD(endOffset); + COPY_NODE_FIELD(runcondition); + COPY_NODE_FIELD(runconditionorig); COPY_SCALAR_FIELD(startInRangeFunc); COPY_SCALAR_FIELD(endInRangeFunc); COPY_SCALAR_FIELD(inRangeColl); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 0ddebd066e..b893cffb64 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -2909,6 +2909,8 @@ _equalWindowClause(const WindowClause *a, const WindowClause *b) COMPARE_SCALAR_FIELD(frameOptions); COMPARE_NODE_FIELD(startOffset); COMPARE_NODE_FIELD(endOffset); + COMPARE_NODE_FIELD(runcondition); + COMPARE_NODE_FIELD(runconditionorig); COMPARE_SCALAR_FIELD(startInRangeFunc); COMPARE_SCALAR_FIELD(endInRangeFunc); COMPARE_SCALAR_FIELD(inRangeColl); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 449d90c8f4..a22e7983b0 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -827,6 +827,8 @@ _outWindowAgg(StringInfo str, const WindowAgg *node) WRITE_INT_FIELD(frameOptions); WRITE_NODE_FIELD(startOffset); WRITE_NODE_FIELD(endOffset); + WRITE_NODE_FIELD(runcondition); + WRITE_NODE_FIELD(runconditionorig); WRITE_OID_FIELD(startInRangeFunc); WRITE_OID_FIELD(endInRangeFunc); WRITE_OID_FIELD(inRangeColl); @@ -3178,6 +3180,8 @@ _outWindowClause(StringInfo str, const WindowClause *node) WRITE_INT_FIELD(frameOptions); WRITE_NODE_FIELD(startOffset); WRITE_NODE_FIELD(endOffset); + WRITE_NODE_FIELD(runcondition); + WRITE_NODE_FIELD(runconditionorig); WRITE_OID_FIELD(startInRangeFunc); WRITE_OID_FIELD(endInRangeFunc); WRITE_OID_FIELD(inRangeColl); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 6f398cdc15..937d7394bd 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -382,6 +382,8 @@ _readWindowClause(void) READ_INT_FIELD(frameOptions); READ_NODE_FIELD(startOffset); READ_NODE_FIELD(endOffset); + READ_NODE_FIELD(runcondition); + READ_NODE_FIELD(runconditionorig); READ_OID_FIELD(startInRangeFunc); READ_OID_FIELD(endInRangeFunc); READ_OID_FIELD(inRangeColl); @@ -2392,6 +2394,8 @@ _readWindowAgg(void) READ_INT_FIELD(frameOptions); READ_NODE_FIELD(startOffset); READ_NODE_FIELD(endOffset); + READ_NODE_FIELD(runcondition); + READ_NODE_FIELD(runconditionorig); READ_OID_FIELD(startInRangeFunc); READ_OID_FIELD(endInRangeFunc); READ_OID_FIELD(inRangeColl); diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 169b1d53fc..d823c09e3c 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -27,6 +27,7 @@ #include "miscadmin.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" #ifdef OPTIMIZER_DEBUG #include "nodes/print.h" #endif @@ -2157,6 +2158,391 @@ has_multiple_baserels(PlannerInfo *root) return false; } +/* + * find_window_run_conditions + * Call wfunc's prosupport function to ask if the given window function + * is monotonic and then see if 'opexpr' can be used to stop processing + * WindowAgg nodes early. + * + * For example row_number() over (order by ...) always produces a value one + * higher than the previous. If someone has a window function such as that + * in a subquery and just wants, say all rows with a row number less than or + * equal to 10, then we may as well stop processing the windowagg once the row + * number reaches 11. Here we look for opexprs that might help us to stop + * doing needless extra processing in WindowAgg nodes. + * + * To do this we make use of the window function's prosupport function to + * determine if the given window function with the given window clause is a + * monotonically increasing or decreasing function. + * + * '*keep_original' is set to true if the caller should also use 'opexpr' for + * its original purpose. This is set to false if the caller can assume that + * the run condition will handle all of the required filtering. + * + * Returns true if a run condition qual was found and added to the + * wclause->runcondition list and sets *keep_original accordingly. Returns + * false if unable to use 'opexpr' as a run condition and does not set + * *keep_original. + */ +static bool +find_window_run_conditions(Query *subquery, RangeTblEntry *rte, Index rti, + AttrNumber attno, WindowClause *wclause, + WindowFunc *wfunc, OpExpr *opexpr, bool wfunc_left, + bool *keep_original) +{ + Oid prosupport; + Expr *otherexpr; + SupportRequestWFuncMonotonic req; + SupportRequestWFuncMonotonic *res; + List *opinfos; + OpExpr *runopexpr; + ListCell *lc; + + *keep_original = true; + + /* + * Currently the WindowAgg node just stops when the run condition is no + * longer true. If there is a PARTITION BY clause then we cannot just + * stop as other partitions still need to be processed. + */ + if (wclause->partitionClause != NIL) + return false; + + prosupport = get_func_support(wfunc->winfnoid); + + /* Check if there's a support function for 'wfunc' */ + if (!OidIsValid(prosupport)) + return false; + + /* get the Expr from the other side of the OpExpr */ + if (wfunc_left) + otherexpr = lsecond(opexpr->args); + else + otherexpr = linitial(opexpr->args); + + /* + * The value being compared must not change during the evaluation of the + * window partition. + */ + if (!is_pseudo_constant_clause((Node *) otherexpr)) + return false; + + req.type = T_SupportRequestWFuncMonotonic; + req.window_func = wfunc; + req.window_clause = wclause; + + /* Call the support function */ + res = (SupportRequestWFuncMonotonic *) + DatumGetPointer(OidFunctionCall1(prosupport, + PointerGetDatum(&req))); + + /* + * Nothing to do if the function is not monotonically increasing or + * decreasing. + */ + if (res == NULL || res->monotonic == MONOTONICFUNC_NONE) + return false; + + runopexpr = NULL; + opinfos = get_op_btree_interpretation(opexpr->opno); + + foreach(lc, opinfos) + { + OpBtreeInterpretation *opinfo = (OpBtreeInterpretation *) lfirst(lc); + int strategy = opinfo->strategy; + + if (wfunc_left) + { + /* Handle < / <= */ + if (strategy == BTLessStrategyNumber || + strategy == BTLessEqualStrategyNumber) + { + /* + * If the frame is bound to the top of the window then the + * result cannot decrease. + */ + if (res->monotonic & MONOTONICFUNC_INCREASING) + { + *keep_original = false; + runopexpr = opexpr; + } + break; + } + /* Handle > / >= */ + else if (strategy == BTGreaterStrategyNumber || + strategy == BTGreaterEqualStrategyNumber) + { + /* + * If the frame is bound to the bottom of the window then the + * result cannot increase. + */ + if (res->monotonic & MONOTONICFUNC_DECREASING) + { + *keep_original = false; + runopexpr = opexpr; + } + break; + } + /* Handle = */ + else if (strategy == BTEqualStrategyNumber) + { + OpExpr *newopexpr; + Oid op; + int16 newstrategy; + + /* + * When both monotonically increasing and decreasing then the + * return value of the window function will be the same each + * time. We can simply use 'opexpr' as the run condition + * without modifying it. + */ + if ((res->monotonic & MONOTONICFUNC_BOTH) == MONOTONICFUNC_BOTH) + { + *keep_original = false; + runopexpr = opexpr; + break; + } + + /* + * When monotonically increasing we make a qual with + * <= in order to filter out values which are above + * the value in the equality condition. For monotonically + * decreasing we want to filter values below the value in the + * equality condition. + */ + if (res->monotonic & MONOTONICFUNC_INCREASING) + newstrategy = BTLessEqualStrategyNumber; + else + newstrategy = BTGreaterEqualStrategyNumber; + + op = get_opfamily_member(opinfo->opfamily_id, + opinfo->oplefttype, + opinfo->oprighttype, + newstrategy); + + newopexpr = (OpExpr *) make_opclause(op, + opexpr->opresulttype, + opexpr->opretset, + (Expr *) wfunc, + otherexpr, + opexpr->opcollid, + opexpr->inputcollid); + newopexpr->opfuncid = get_opcode(op); + + *keep_original = true; + runopexpr = newopexpr; + break; + } + } + else + { + /* Handle > / >= */ + if (strategy == BTGreaterStrategyNumber || + strategy == BTGreaterEqualStrategyNumber) + { + /* + * If the frame is bound to the top of the window then the + * result cannot decrease. + */ + if (res->monotonic & MONOTONICFUNC_INCREASING) + { + *keep_original = false; + runopexpr = opexpr; + } + break; + } + /* Handle < / <= */ + else if (strategy == BTLessStrategyNumber || + strategy == BTLessEqualStrategyNumber) + { + /* + * If the frame is bound to the bottom of the window then the + * result cannot increase. + */ + if (res->monotonic & MONOTONICFUNC_DECREASING) + { + *keep_original = false; + runopexpr = opexpr; + } + break; + } + /* Handle = */ + else if (strategy == BTEqualStrategyNumber) + { + OpExpr *newopexpr; + Oid op; + int16 newstrategy; + + /* + * When both monotonically increasing and decreasing then the + * return value of the window function will be the same each + * time. We can simply use 'opexpr' as the run condition + * without modifying it. + */ + if ((res->monotonic & MONOTONICFUNC_BOTH) == MONOTONICFUNC_BOTH) + { + *keep_original = false; + runopexpr = opexpr; + break; + } + + /* + * When monotonically increasing we make a qual with + * >= in order to filter out values which are above + * the value in the equality condition. For monotonically + * decreasing we want to filter values below the value in the + * equality condition. + */ + if (res->monotonic & MONOTONICFUNC_INCREASING) + newstrategy = BTGreaterEqualStrategyNumber; + else + newstrategy = BTLessEqualStrategyNumber; + + op = get_opfamily_member(opinfo->opfamily_id, + opinfo->oplefttype, + opinfo->oprighttype, + newstrategy); + + newopexpr = (OpExpr *) make_opclause(op, + opexpr->opresulttype, + opexpr->opretset, + otherexpr, + (Expr *) wfunc, + opexpr->opcollid, + opexpr->inputcollid); + newopexpr->opfuncid = get_opcode(op); + + *keep_original = true; + runopexpr = newopexpr; + break; + } + } + } + + if (runopexpr != NULL) + { + Expr *origexpr; + + wclause->runcondition = lappend(wclause->runcondition, runopexpr); + + /* also create a version of the qual that we can display in EXPLAIN */ + if (wfunc_left) + origexpr = make_opclause(runopexpr->opno, + runopexpr->opresulttype, + runopexpr->opretset, (Expr *) wfunc, + otherexpr, runopexpr->opcollid, + runopexpr->inputcollid); + else + origexpr = make_opclause(runopexpr->opno, + runopexpr->opresulttype, + runopexpr->opretset, + otherexpr, (Expr *) wfunc, + runopexpr->opcollid, + runopexpr->inputcollid); + + wclause->runconditionorig = lappend(wclause->runconditionorig, origexpr); + return true; + } + + /* unsupported OpExpr */ + return false; +} + +/* + * check_and_push_window_quals + * Check if 'rinfo' is a qual that can be pushed into a WindowFunc as a + * 'runcondition' qual. These, when present, cause the window function + * evaluation to stop when the condition becomes false. + * + * Returns true if the caller still must keep the original qual or false if + * the caller can safely ignore the original qual because the window function + * will use the run condition to stop at the right time. + */ +static bool +check_and_push_window_quals(Query *subquery, RangeTblEntry *rte, Index rti, + Node *clause) +{ + OpExpr *opexpr = (OpExpr *) clause; + bool keep_original = true; + Var *var1; + Var *var2; + + /* We're only able to use OpExprs with 2 operands */ + if (!IsA(opexpr, OpExpr)) + return true; + + if (list_length(opexpr->args) != 2) + return true; + + /* + * We don't currently attempt to generate window function run conditions + * when there are multiple window clauses. It may seem possible to relax + * this a bit and ensure we only put run conditions on the final window + * clause to be evaluated, however, currently we've yet to determine the + * order that the window clauses will be evaluated. This's done later in + * select_active_windows(). If we were to put run conditions on anything + * apart from the final window clause to be evaluated then we may filter + * rows that are required for a yet-to-be-evaluated window clause. + */ + if (list_length(subquery->windowClause) > 1) + return true; + + /* + * Check for plain Vars which reference window functions in the subquery. + * If we find any, we'll ask find_window_run_conditions() if 'opexpr' can + * be used as a run condition to allow us to stop windowagg execution + * early. + */ + + /* Check the left side of the OpExpr */ + var1 = linitial(opexpr->args); + if (IsA(var1, Var) && var1->varattno > 0) + { + TargetEntry *tle = list_nth(subquery->targetList, var1->varattno - 1); + WindowFunc *wfunc = (WindowFunc *) tle->expr; + + while (IsA(wfunc, RelabelType)) + wfunc = (WindowFunc *) ((RelabelType *) wfunc)->arg; + + if (IsA(wfunc, WindowFunc)) + { + WindowClause *wclause = (WindowClause *) + list_nth(subquery->windowClause, + wfunc->winref - 1); + + if (find_window_run_conditions(subquery, rte, rti, tle->resno, + wclause, wfunc, opexpr, true, + &keep_original)) + return keep_original; + } + } + + /* and check the right side */ + var2 = lsecond(opexpr->args); + if (IsA(var2, Var) && var2->varattno > 0) + { + TargetEntry *tle = list_nth(subquery->targetList, var2->varattno - 1); + WindowFunc *wfunc = (WindowFunc *) tle->expr; + + while (IsA(wfunc, RelabelType)) + wfunc = (WindowFunc *) ((RelabelType *) wfunc)->arg; + + if (IsA(wfunc, WindowFunc)) + { + WindowClause *wclause = (WindowClause *) + list_nth(subquery->windowClause, + wfunc->winref - 1); + + if (find_window_run_conditions(subquery, rte, rti, tle->resno, + wclause, wfunc, opexpr, false, + &keep_original)) + return keep_original; + } + } + + return true; +} + /* * set_subquery_pathlist * Generate SubqueryScan access paths for a subquery RTE @@ -2245,19 +2631,30 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, foreach(l, rel->baserestrictinfo) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + Node *clause = (Node *) rinfo->clause; if (!rinfo->pseudoconstant && qual_is_pushdown_safe(subquery, rti, rinfo, &safetyInfo)) { - Node *clause = (Node *) rinfo->clause; - /* Push it down */ subquery_push_qual(subquery, rte, rti, clause); } else { - /* Keep it in the upper query */ - upperrestrictlist = lappend(upperrestrictlist, rinfo); + /* + * Since we can't push the qual down into the subquery, check + * if it happens to reference a window function. If so then + * it might be useful to allow the window evaluation to stop + * early. + */ + if (check_and_push_window_quals(subquery, rte, rti, clause)) + { + /* + * It's not a suitable window run condition qual or it is, + * but the original must also be kept in the upper query. + */ + upperrestrictlist = lappend(upperrestrictlist, rinfo); + } } } rel->baserestrictinfo = upperrestrictlist; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index fa069a217c..1c750bd7a4 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -288,6 +288,7 @@ static WindowAgg *make_windowagg(List *tlist, Index winref, int frameOptions, Node *startOffset, Node *endOffset, Oid startInRangeFunc, Oid endInRangeFunc, Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst, + List *runcondition, List *runconditionorig, Plan *lefttree); static Group *make_group(List *tlist, List *qual, int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, @@ -2641,6 +2642,8 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) wc->inRangeColl, wc->inRangeAsc, wc->inRangeNullsFirst, + wc->runcondition, + wc->runconditionorig, subplan); copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -6525,7 +6528,7 @@ make_windowagg(List *tlist, Index winref, int frameOptions, Node *startOffset, Node *endOffset, Oid startInRangeFunc, Oid endInRangeFunc, Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst, - Plan *lefttree) + List *runcondition, List *runconditionorig, Plan *lefttree) { WindowAgg *node = makeNode(WindowAgg); Plan *plan = &node->plan; @@ -6542,6 +6545,8 @@ make_windowagg(List *tlist, Index winref, node->frameOptions = frameOptions; node->startOffset = startOffset; node->endOffset = endOffset; + node->runcondition = runcondition; + node->runconditionorig = runconditionorig; node->startInRangeFunc = startInRangeFunc; node->endInRangeFunc = endInRangeFunc; node->inRangeColl = inRangeColl; diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index a7b11b7f03..eaeba17088 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -897,6 +897,14 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) fix_scan_expr(root, wplan->startOffset, rtoffset, 1); wplan->endOffset = fix_scan_expr(root, wplan->endOffset, rtoffset, 1); + wplan->runcondition = fix_scan_list(root, + wplan->runcondition, + rtoffset, + NUM_EXEC_TLIST(plan)); + wplan->runconditionorig = fix_scan_list(root, + wplan->runconditionorig, + rtoffset, + NUM_EXEC_TLIST(plan)); } break; case T_Result: diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c index 4a87114a4f..b4bcc26d6c 100644 --- a/src/backend/utils/adt/int8.c +++ b/src/backend/utils/adt/int8.c @@ -24,6 +24,7 @@ #include "nodes/supportnodes.h" #include "optimizer/optimizer.h" #include "utils/builtins.h" +#include "utils/lsyscache.h" typedef struct @@ -791,6 +792,7 @@ int8dec(PG_FUNCTION_ARGS) } + /* * These functions are exactly like int8inc/int8dec but are used for * aggregates that count only non-null values. Since the functions are @@ -818,6 +820,49 @@ int8dec_any(PG_FUNCTION_ARGS) return int8dec(fcinfo); } +/* + * int8inc_support + * prosupport function for int8inc() and int8inc_any() + */ +Datum +int8inc_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + if (IsA(rawreq, SupportRequestWFuncMonotonic)) + { + SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq; + MonotonicFunction monotonic = MONOTONICFUNC_NONE; + int frameOptions = req->window_clause->frameOptions; + + /* No ORDER BY clause then all rows are peers */ + if (req->window_clause->orderClause == NIL) + monotonic = MONOTONICFUNC_BOTH; + else + { + /* + * Otherwise take into account the frame options. When the frame + * bound is the start of the window then the resulting value can + * never decrease, therefore is monotonically increasing + */ + if (frameOptions & FRAMEOPTION_START_UNBOUNDED_PRECEDING) + monotonic |= MONOTONICFUNC_INCREASING; + + /* + * Likewise, if the frame bound is the end of the window then the + * resulting value can never increase. + */ + if (frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING) + monotonic |= MONOTONICFUNC_DECREASING; + } + + req->monotonic = monotonic; + PG_RETURN_POINTER(req); + } + + PG_RETURN_POINTER(NULL); +} + Datum int8larger(PG_FUNCTION_ARGS) diff --git a/src/backend/utils/adt/windowfuncs.c b/src/backend/utils/adt/windowfuncs.c index 3e0cc9be1a..596564fa15 100644 --- a/src/backend/utils/adt/windowfuncs.c +++ b/src/backend/utils/adt/windowfuncs.c @@ -13,6 +13,7 @@ */ #include "postgres.h" +#include "nodes/supportnodes.h" #include "utils/builtins.h" #include "windowapi.h" @@ -88,6 +89,26 @@ window_row_number(PG_FUNCTION_ARGS) PG_RETURN_INT64(curpos + 1); } +/* + * window_row_number_support + * prosupport function for window_row_number() + */ +Datum +window_row_number_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + if (IsA(rawreq, SupportRequestWFuncMonotonic)) + { + SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq; + + /* row_number() is monotonically increasing */ + req->monotonic = MONOTONICFUNC_INCREASING; + PG_RETURN_POINTER(req); + } + + PG_RETURN_POINTER(NULL); +} /* * rank @@ -110,6 +131,27 @@ window_rank(PG_FUNCTION_ARGS) PG_RETURN_INT64(context->rank); } +/* + * window_rank_support + * prosupport function for window_rank() + */ +Datum +window_rank_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + if (IsA(rawreq, SupportRequestWFuncMonotonic)) + { + SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq; + + /* rank() is monotonically increasing */ + req->monotonic = MONOTONICFUNC_INCREASING; + PG_RETURN_POINTER(req); + } + + PG_RETURN_POINTER(NULL); +} + /* * dense_rank * Rank increases by 1 when key columns change. @@ -130,6 +172,27 @@ window_dense_rank(PG_FUNCTION_ARGS) PG_RETURN_INT64(context->rank); } +/* + * window_dense_rank_support + * prosupport function for window_dense_rank() + */ +Datum +window_dense_rank_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + + if (IsA(rawreq, SupportRequestWFuncMonotonic)) + { + SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq; + + /* dense_rank() is monotonically increasing */ + req->monotonic = MONOTONICFUNC_INCREASING; + PG_RETURN_POINTER(req); + } + + PG_RETURN_POINTER(NULL); +} + /* * percent_rank * return fraction between 0 and 1 inclusive, diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index d8e8715ed1..990a5098d5 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -6641,11 +6641,16 @@ # count has two forms: count(any) and count(*) { oid => '2147', descr => 'number of input rows for which the input expression is not null', - proname => 'count', prokind => 'a', proisstrict => 'f', prorettype => 'int8', - proargtypes => 'any', prosrc => 'aggregate_dummy' }, + proname => 'count', prosupport => 'int8inc_support', prokind => 'a', + proisstrict => 'f', prorettype => 'int8', proargtypes => 'any', + prosrc => 'aggregate_dummy' }, { oid => '2803', descr => 'number of input rows', - proname => 'count', prokind => 'a', proisstrict => 'f', prorettype => 'int8', - proargtypes => '', prosrc => 'aggregate_dummy' }, + proname => 'count', prosupport => 'int8inc_support', prokind => 'a', + proisstrict => 'f', prorettype => 'int8', proargtypes => '', + prosrc => 'aggregate_dummy' }, +{ oid => '8802', descr => 'planner support for count run condition', + proname => 'int8inc_support', prorettype => 'internal', + proargtypes => 'internal', prosrc => 'int8inc_support' }, { oid => '2718', descr => 'population variance of bigint input values (square of the population standard deviation)', @@ -10079,14 +10084,26 @@ # SQL-spec window functions { oid => '3100', descr => 'row number within partition', - proname => 'row_number', prokind => 'w', proisstrict => 'f', - prorettype => 'int8', proargtypes => '', prosrc => 'window_row_number' }, + proname => 'row_number', prosupport => 'window_row_number_support', + prokind => 'w', proisstrict => 'f', prorettype => 'int8', + proargtypes => '', prosrc => 'window_row_number' }, +{ oid => '8799', descr => 'planner support for row_number run condition', + proname => 'window_row_number_support', prorettype => 'internal', + proargtypes => 'internal', prosrc => 'window_row_number_support' }, { oid => '3101', descr => 'integer rank with gaps', - proname => 'rank', prokind => 'w', proisstrict => 'f', prorettype => 'int8', + proname => 'rank', prosupport => 'window_rank_support', + prokind => 'w', proisstrict => 'f', prorettype => 'int8', proargtypes => '', prosrc => 'window_rank' }, +{ oid => '8800', descr => 'planner support for rank run condition', + proname => 'window_rank_support', prorettype => 'internal', + proargtypes => 'internal', prosrc => 'window_rank_support' }, { oid => '3102', descr => 'integer rank without gaps', - proname => 'dense_rank', prokind => 'w', proisstrict => 'f', - prorettype => 'int8', proargtypes => '', prosrc => 'window_dense_rank' }, + proname => 'dense_rank', prosupport => 'window_dense_rank_support', + prokind => 'w', proisstrict => 'f', prorettype => 'int8', proargtypes => '', + prosrc => 'window_dense_rank' }, +{ oid => '8801', descr => 'planner support for dense rank run condition', + proname => 'window_dense_rank_support', prorettype => 'internal', + proargtypes => 'internal', prosrc => 'window_dense_rank_support' }, { oid => '3103', descr => 'fractional rank within partition', proname => 'percent_rank', prokind => 'w', proisstrict => 'f', prorettype => 'float8', proargtypes => '', prosrc => 'window_percent_rank' }, diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 44dd73fc80..e41850eb9f 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -2424,6 +2424,10 @@ typedef struct WindowAggState MemoryContext curaggcontext; /* current aggregate's working data */ ExprContext *tmpcontext; /* short-term evaluation context */ + ExprState *runcondition; /* Condition which must remain true otherwise + * execution of the WindowAgg will finish, or + * NULL if there is no such condition. */ + bool all_first; /* true if the scan is starting */ bool all_done; /* true if the scan is finished */ bool partition_spooled; /* true if all tuples in current partition diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 59737f1034..263aca4419 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -531,7 +531,8 @@ typedef enum NodeTag T_SupportRequestSelectivity, /* in nodes/supportnodes.h */ T_SupportRequestCost, /* in nodes/supportnodes.h */ T_SupportRequestRows, /* in nodes/supportnodes.h */ - T_SupportRequestIndexCondition /* in nodes/supportnodes.h */ + T_SupportRequestIndexCondition, /* in nodes/supportnodes.h */ + T_SupportRequestWFuncMonotonic /* in nodes/supportnodes.h */ } NodeTag; /* diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 712e56b5f2..f849977319 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -1396,6 +1396,8 @@ typedef struct WindowClause int frameOptions; /* frame_clause options, see WindowDef */ Node *startOffset; /* expression for starting bound, if any */ Node *endOffset; /* expression for ending bound, if any */ + List *runcondition; /* Exec WindowAgg while this is true */ + List *runconditionorig; /* EXPLAIN compatible version of above */ Oid startInRangeFunc; /* in_range function for startOffset */ Oid endInRangeFunc; /* in_range function for endOffset */ Oid inRangeColl; /* collation for in_range tests */ diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 0b518ce6b2..2330b90a03 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -911,6 +911,9 @@ typedef struct WindowAgg int frameOptions; /* frame_clause options, see WindowDef */ Node *startOffset; /* expression for starting bound, if any */ Node *endOffset; /* expression for ending bound, if any */ + List *runcondition; /* Conditions that must remain true in order + * for execution to continue */ + List *runconditionorig; /* runcondition for display in EXPLAIN */ /* these fields are used with RANGE offset PRECEDING/FOLLOWING: */ Oid startInRangeFunc; /* in_range function for startOffset */ Oid endInRangeFunc; /* in_range function for endOffset */ @@ -1309,4 +1312,21 @@ typedef struct PlanInvalItem uint32 hashValue; /* hash value of object's cache lookup key */ } PlanInvalItem; +/* + * MonotonicFunction + * + * Allows the planner to track monotonic properties of functions. A function + * is monotonically increasing if a subsequent call cannot yield a lower value + * than the previous call. A monotonically decreasing function cannot yield a + * higher value on subsequent calls, and a function which is both must return + * the same value on each call. + */ +typedef enum MonotonicFunction +{ + MONOTONICFUNC_NONE = 0, + MONOTONICFUNC_INCREASING = (1 << 0), + MONOTONICFUNC_DECREASING = (1 << 1), + MONOTONICFUNC_BOTH = MONOTONICFUNC_INCREASING | MONOTONICFUNC_DECREASING +} MonotonicFunction; + #endif /* PLANNODES_H */ diff --git a/src/include/nodes/supportnodes.h b/src/include/nodes/supportnodes.h index 88b61b3ab3..0538a45094 100644 --- a/src/include/nodes/supportnodes.h +++ b/src/include/nodes/supportnodes.h @@ -33,12 +33,12 @@ #ifndef SUPPORTNODES_H #define SUPPORTNODES_H -#include "nodes/primnodes.h" +#include "nodes/plannodes.h" struct PlannerInfo; /* avoid including pathnodes.h here */ struct IndexOptInfo; struct SpecialJoinInfo; - +struct WindowClause; /* * The Simplify request allows the support function to perform plan-time @@ -239,4 +239,61 @@ typedef struct SupportRequestIndexCondition * equivalent of the function call */ } SupportRequestIndexCondition; +/* ---------- + * To allow more efficient execution of any monotonically increasing and/or + * monotonically decreasing window functions, we support calling the window + * function's prosupport function passing along this struct whenever the + * planner sees an OpExpr qual directly reference a window function in a + * subquery. When the planner encounters this, we populate this struct and + * pass it along to the window function's prosupport function so that it can + * evaluate if the given WindowFunc is; + * + * a) monotonically increasing, or + * b) monotonically decreasing, or + * c) both monotonically increasing and decreasing, or + * d) none of the above. + * + * A function that is monotonically increasing can never return a value that + * is lower than a value returned in a "previous call". A monotonically + * decreasing function can never return a value higher than a value returned + * in a previous call. A function that is both must return the same value + * each time. + * + * We define "previous call" to mean a previous call to the same WindowFunc + * struct in the same window partition. + * + * row_number() is an example of a monotonically increasing function. The + * return value will be reset back to 1 in each new partition. An example of + * a monotonically increasing and decreasing function is COUNT(*) OVER (). + * Since there is no ORDER BY clause in this example, all rows in the + * partition are peers and all rows within the partition will be within the + * frame bound. Likewise for COUNT(*) OVER(ORDER BY a ROWS BETWEEN UNBOUNDED + * PRECEDING AND UNBOUNDED FOLLOWING). + * + * COUNT(*) OVER (ORDER BY a ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + * is an example of a monotonically decreasing function. + * + * Inputs: + * 'window_func' is the pointer to the window function being called. + * + * 'window_clause' pointer to the WindowClause data. Support functions can + * use this to check frame bounds and partition by clauses, etc. + * + * Outputs: + * 'monotonic' the resulting MonotonicFunction value for the given input + * window function and window clause. + * ---------- + */ +typedef struct SupportRequestWFuncMonotonic +{ + NodeTag type; + + /* Input fields: */ + WindowFunc *window_func; /* Pointer to the window function data */ + struct WindowClause *window_clause; /* Pointer to the window clause data */ + + /* Output fields: */ + MonotonicFunction monotonic; +} SupportRequestWFuncMonotonic; + #endif /* SUPPORTNODES_H */ diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out index bb9ff7f07b..fdf053ac83 100644 --- a/src/test/regress/expected/window.out +++ b/src/test/regress/expected/window.out @@ -3336,6 +3336,321 @@ WHERE depname = 'sales'; -> Seq Scan on empsalary (9 rows) +-- Test window function run conditions are properly pushed down into the +-- WindowAgg +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + row_number() OVER (ORDER BY empno) rn + FROM empsalary) emp +WHERE rn < 3; + QUERY PLAN +---------------------------------------------- + WindowAgg + Run Condition: (row_number() OVER (?) < 3) + -> Sort + Sort Key: empsalary.empno + -> Seq Scan on empsalary +(5 rows) + +-- The following 3 statements should result the same result. +SELECT * FROM + (SELECT empno, + row_number() OVER (ORDER BY empno) rn + FROM empsalary) emp +WHERE rn < 3; + empno | rn +-------+---- + 1 | 1 + 2 | 2 +(2 rows) + +SELECT * FROM + (SELECT empno, + row_number() OVER (ORDER BY empno) rn + FROM empsalary) emp +WHERE 3 > rn; + empno | rn +-------+---- + 1 | 1 + 2 | 2 +(2 rows) + +SELECT * FROM + (SELECT empno, + row_number() OVER (ORDER BY empno) rn + FROM empsalary) emp +WHERE 2 >= rn; + empno | rn +-------+---- + 1 | 1 + 2 | 2 +(2 rows) + +-- Ensure r <= 3 is pushed down into the run condition of the window agg +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + rank() OVER (ORDER BY salary DESC) r + FROM empsalary) emp +WHERE r <= 3; + QUERY PLAN +----------------------------------------- + WindowAgg + Run Condition: (rank() OVER (?) <= 3) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(5 rows) + +SELECT * FROM + (SELECT empno, + salary, + rank() OVER (ORDER BY salary DESC) r + FROM empsalary) emp +WHERE r <= 3; + empno | salary | r +-------+--------+--- + 8 | 6000 | 1 + 10 | 5200 | 2 + 11 | 5200 | 2 +(3 rows) + +-- Ensure dr = 1 is converted to dr <= 1 to get all rows leading up to dr = 1 +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + dense_rank() OVER (ORDER BY salary DESC) dr + FROM empsalary) emp +WHERE dr = 1; + QUERY PLAN +----------------------------------------------------- + Subquery Scan on emp + Filter: (emp.dr = 1) + -> WindowAgg + Run Condition: (dense_rank() OVER (?) <= 1) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(7 rows) + +SELECT * FROM + (SELECT empno, + salary, + dense_rank() OVER (ORDER BY salary DESC) dr + FROM empsalary) emp +WHERE dr = 1; + empno | salary | dr +-------+--------+---- + 8 | 6000 | 1 +(1 row) + +-- Check COUNT() and COUNT(*) +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary DESC) c + FROM empsalary) emp +WHERE c <= 3; + QUERY PLAN +------------------------------------------- + WindowAgg + Run Condition: (count(*) OVER (?) <= 3) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(5 rows) + +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary DESC) c + FROM empsalary) emp +WHERE c <= 3; + empno | salary | c +-------+--------+--- + 8 | 6000 | 1 + 10 | 5200 | 3 + 11 | 5200 | 3 +(3 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(empno) OVER (ORDER BY salary DESC) c + FROM empsalary) emp +WHERE c <= 3; + QUERY PLAN +--------------------------------------------------------- + WindowAgg + Run Condition: (count(empsalary.empno) OVER (?) <= 3) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(5 rows) + +SELECT * FROM + (SELECT empno, + salary, + count(empno) OVER (ORDER BY salary DESC) c + FROM empsalary) emp +WHERE c <= 3; + empno | salary | c +-------+--------+--- + 8 | 6000 | 1 + 10 | 5200 | 3 + 11 | 5200 | 3 +(3 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary DESC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) c + FROM empsalary) emp +WHERE c >= 3; + QUERY PLAN +------------------------------------------- + WindowAgg + Run Condition: (count(*) OVER (?) >= 3) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(5 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER () c + FROM empsalary) emp +WHERE 11 <= c; + QUERY PLAN +-------------------------------------------- + WindowAgg + Run Condition: (11 <= count(*) OVER (?)) + -> Seq Scan on empsalary +(3 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary DESC) c, + dense_rank() OVER (ORDER BY salary DESC) dr + FROM empsalary) emp +WHERE dr = 1; + QUERY PLAN +----------------------------------------------------- + Subquery Scan on emp + Filter: (emp.dr = 1) + -> WindowAgg + Run Condition: (dense_rank() OVER (?) <= 1) + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(7 rows) + +-- Tests to ensure we don't push down the run condition when it's not valid to +-- do so. +-- Ensure we don't push down the clause when there is a PARTITION BY clause +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + row_number() OVER (PARTITION BY depname ORDER BY empno) rn + FROM empsalary) emp +WHERE rn < 3; + QUERY PLAN +------------------------------------------------------------ + Subquery Scan on emp + Filter: (emp.rn < 3) + -> WindowAgg + -> Sort + Sort Key: empsalary.depname, empsalary.empno + -> Seq Scan on empsalary +(6 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(empno) OVER (PARTITION BY depname ORDER BY salary DESC) c + FROM empsalary) emp +WHERE c <= 3; + QUERY PLAN +------------------------------------------------------------------ + Subquery Scan on emp + Filter: (emp.c <= 3) + -> WindowAgg + -> Sort + Sort Key: empsalary.depname, empsalary.salary DESC + -> Seq Scan on empsalary +(6 rows) + +-- Ensure we don't push down when the frame options show that the window +-- function is not monotonically increasing +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary DESC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) c + FROM empsalary) emp +WHERE c <= 3; + QUERY PLAN +----------------------------------------------- + Subquery Scan on emp + Filter: (emp.c <= 3) + -> WindowAgg + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(6 rows) + +-- Ensure we don't push down when the window function's monotonic properties +-- don't match that of the clauses. +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary) c + FROM empsalary) emp +WHERE 3 <= c; + QUERY PLAN +------------------------------------------ + Subquery Scan on emp + Filter: (3 <= emp.c) + -> WindowAgg + -> Sort + Sort Key: empsalary.salary + -> Seq Scan on empsalary +(6 rows) + +-- Ensure we don't pushdown when there are multiple window clauses to evaluate +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY empno DESC) c, + dense_rank() OVER (ORDER BY salary DESC) dr + FROM empsalary) emp +WHERE dr = 1; + QUERY PLAN +----------------------------------------------------------- + Subquery Scan on emp + Filter: (emp.dr = 1) + -> WindowAgg + -> Sort + Sort Key: empsalary.empno DESC + -> WindowAgg + -> Sort + Sort Key: empsalary.salary DESC + -> Seq Scan on empsalary +(9 rows) + -- Test Sort node collapsing EXPLAIN (COSTS OFF) SELECT * FROM diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql index 41a8e0d152..094045b873 100644 --- a/src/test/regress/sql/window.sql +++ b/src/test/regress/sql/window.sql @@ -988,6 +988,171 @@ SELECT * FROM FROM empsalary) emp WHERE depname = 'sales'; +-- Test window function run conditions are properly pushed down into the +-- WindowAgg +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + row_number() OVER (ORDER BY empno) rn + FROM empsalary) emp +WHERE rn < 3; + +-- The following 3 statements should result the same result. +SELECT * FROM + (SELECT empno, + row_number() OVER (ORDER BY empno) rn + FROM empsalary) emp +WHERE rn < 3; + +SELECT * FROM + (SELECT empno, + row_number() OVER (ORDER BY empno) rn + FROM empsalary) emp +WHERE 3 > rn; + +SELECT * FROM + (SELECT empno, + row_number() OVER (ORDER BY empno) rn + FROM empsalary) emp +WHERE 2 >= rn; + +-- Ensure r <= 3 is pushed down into the run condition of the window agg +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + rank() OVER (ORDER BY salary DESC) r + FROM empsalary) emp +WHERE r <= 3; + +SELECT * FROM + (SELECT empno, + salary, + rank() OVER (ORDER BY salary DESC) r + FROM empsalary) emp +WHERE r <= 3; + +-- Ensure dr = 1 is converted to dr <= 1 to get all rows leading up to dr = 1 +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + dense_rank() OVER (ORDER BY salary DESC) dr + FROM empsalary) emp +WHERE dr = 1; + +SELECT * FROM + (SELECT empno, + salary, + dense_rank() OVER (ORDER BY salary DESC) dr + FROM empsalary) emp +WHERE dr = 1; + +-- Check COUNT() and COUNT(*) +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary DESC) c + FROM empsalary) emp +WHERE c <= 3; + +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary DESC) c + FROM empsalary) emp +WHERE c <= 3; + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(empno) OVER (ORDER BY salary DESC) c + FROM empsalary) emp +WHERE c <= 3; + +SELECT * FROM + (SELECT empno, + salary, + count(empno) OVER (ORDER BY salary DESC) c + FROM empsalary) emp +WHERE c <= 3; + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary DESC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) c + FROM empsalary) emp +WHERE c >= 3; + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER () c + FROM empsalary) emp +WHERE 11 <= c; + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary DESC) c, + dense_rank() OVER (ORDER BY salary DESC) dr + FROM empsalary) emp +WHERE dr = 1; + +-- Tests to ensure we don't push down the run condition when it's not valid to +-- do so. + +-- Ensure we don't push down the clause when there is a PARTITION BY clause +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + row_number() OVER (PARTITION BY depname ORDER BY empno) rn + FROM empsalary) emp +WHERE rn < 3; + +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(empno) OVER (PARTITION BY depname ORDER BY salary DESC) c + FROM empsalary) emp +WHERE c <= 3; + +-- Ensure we don't push down when the frame options show that the window +-- function is not monotonically increasing +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary DESC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) c + FROM empsalary) emp +WHERE c <= 3; + +-- Ensure we don't push down when the window function's monotonic properties +-- don't match that of the clauses. +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY salary) c + FROM empsalary) emp +WHERE 3 <= c; + +-- Ensure we don't pushdown when there are multiple window clauses to evaluate +EXPLAIN (COSTS OFF) +SELECT * FROM + (SELECT empno, + salary, + count(*) OVER (ORDER BY empno DESC) c, + dense_rank() OVER (ORDER BY salary DESC) dr + FROM empsalary) emp +WHERE dr = 1; + -- Test Sort node collapsing EXPLAIN (COSTS OFF) SELECT * FROM -- 2.32.0