From 9cd400f11faad7289ad94672d5ea2f8357415845 Mon Sep 17 00:00:00 2001 From: Jeevan Chalke Date: Fri, 26 Jun 2026 12:54:24 +0530 Subject: [PATCH v1 1/2] Add support for ON EMPTY clause in aggregate and window functions This commit introduces the ON EMPTY clause, allowing users to specify a default value to be returned when an aggregate or window function receives an empty input set (zero processed rows). The syntax follows the pattern: agg_function(args, default_value ON EMPTY) The ON EMPTY clause is distinct from standard NULL handling. It is only invoked when the aggregate processes no rows at all. If the aggregate processes rows that happen to be NULL, the normal transition logic applies and the ON EMPTY clause is ignored. Likewise, when all rows are removed by a FILTER clause the input set is empty and the ON EMPTY value is returned. For a window aggregate, the default is returned for any row whose frame contains no rows. Implementation adds an 'inputReceived' flag to each per-group aggregate state. A dedicated EEOP_AGG_INPUT_RECEIVED expression step sets the flag once per row that reaches the aggregate -- after any FILTER but before the strict-input NULL check -- so that a row whose input is NULL (and is thus skipped by a strict transition function) still counts as input. The step is emitted only for aggregates that carry an ON EMPTY default. At finalization, if the flag is false, the default expression is evaluated and returned in place of the normal result (and final function, if any). Because a partial aggregate's leader only sees workers' combined transition states rather than the original rows, this flag cannot distinguish the empty-input case under partial aggregation; such aggregates are therefore marked non-partial so they are not parallelized. This patch implements the new step in the expression interpreter only; the feature is fully functional without LLVM. LLVM/JIT support for the EEOP_AGG_INPUT_RECEIVED step is added by a separate follow-on patch. The default expression must be a constant-like expression coercible to the aggregate's result type. It may not reference columns (at any query level) or contain aggregates, window functions, subqueries, or volatile functions. ON EMPTY cannot be combined with DISTINCT, and is rejected for non-aggregate window functions; it may be used with an ordered-set (WITHIN GROUP) aggregate, written before the WITHIN GROUP clause. Patch also includes documentation and regression tests for aggregates and window functions, covering empty tables, filtered results, type coercion, all-NULL inputs to strict aggregates, and view deparsing. This commit adds fields to Aggref and WindowFunc (and FuncCall). These node types are serialized into pg_rewrite (views/rules) and other stored expression trees, which requires a CATALOG_VERSION_NO bump. That is omitted here to avoid conflicts with concurrent commits; the committer should bump it at commit time. Proposed-by: Peter Eisentraut Jeevan Chalke --- doc/src/sgml/syntax.sgml | 46 +++ src/backend/executor/execExpr.c | 64 ++++ src/backend/executor/execExprInterp.c | 22 ++ src/backend/executor/nodeAgg.c | 32 ++ src/backend/executor/nodeWindowAgg.c | 41 ++ src/backend/nodes/makefuncs.c | 1 + src/backend/nodes/nodeFuncs.c | 8 + src/backend/optimizer/prep/prepagg.c | 12 + src/backend/optimizer/util/clauses.c | 6 + src/backend/parser/gram.y | 16 + src/backend/parser/parse_agg.c | 134 ++++++- src/backend/parser/parse_expr.c | 11 +- src/backend/parser/parse_func.c | 37 +- src/backend/utils/adt/ruleutils.c | 22 ++ src/include/executor/execExpr.h | 8 + src/include/executor/nodeAgg.h | 10 + src/include/nodes/parsenodes.h | 1 + src/include/nodes/primnodes.h | 5 + src/include/parser/parse_agg.h | 4 +- src/include/parser/parse_node.h | 1 + src/test/regress/expected/aggregates.out | 463 +++++++++++++++++++++++ src/test/regress/expected/window.out | 108 ++++++ src/test/regress/sql/aggregates.sql | 198 ++++++++++ src/test/regress/sql/window.sql | 55 +++ 24 files changed, 1297 insertions(+), 8 deletions(-) diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml index 67482996861..d708a5ee484 100644 --- a/doc/src/sgml/syntax.sgml +++ b/doc/src/sgml/syntax.sgml @@ -1555,6 +1555,10 @@ sqrt(2) FILTER + + ON EMPTY + + An aggregate expression represents the application of an aggregate function across the rows selected by a @@ -1564,6 +1568,7 @@ sqrt(2) aggregate_name (expression [ , ... ] [ order_by_clause ] ) [ FILTER ( WHERE filter_clause ) ] +aggregate_name (expression [ , ... ] , default_expression ON EMPTY [ order_by_clause ] ) [ FILTER ( WHERE filter_clause ) ] aggregate_name (ALL expression [ , ... ] [ order_by_clause ] ) [ FILTER ( WHERE filter_clause ) ] aggregate_name (DISTINCT expression [ , ... ] [ order_by_clause ] ) [ FILTER ( WHERE filter_clause ) ] aggregate_name ( * ) [ FILTER ( WHERE filter_clause ) ] @@ -1744,6 +1749,36 @@ FROM generate_series(1,10) AS s(i); + + If ON EMPTY is specified, then + default_expression supplies the value of the + aggregate when it processes no input rows at all; in that case the + aggregate's normal final result (and its final function, if any) is not + computed. This is distinct from null handling: it is triggered only by + an empty input set, not by null inputs that are merely ignored during + aggregation. Note that when rows exist but are all removed by a + filter_clause, the input set is empty and the + ON EMPTY value is returned. For example: + +SELECT sum(i) AS plain, sum(i, -1 ON EMPTY) AS defaulted +FROM generate_series(1,10) AS s(i) WHERE i > 100; + plain | defaulted +-------+----------- + | -1 +(1 row) + + default_expression must be a constant-like + expression of a type coercible to the aggregate's result type: it may not + refer to columns or contain aggregates, window functions, subqueries, or + volatile functions. ON EMPTY cannot be combined with + DISTINCT. It may be used with an ordered-set + aggregate, written before the WITHIN GROUP clause. + Because a grouped query never produces empty groups, + ON EMPTY only takes effect for an ungrouped aggregate + over zero rows (or a group whose rows are all removed by + filter_clause). + + The predefined aggregate functions are described in . Other aggregate functions can be added @@ -1843,6 +1878,17 @@ EXCLUDE NO OTHERS described in . + + When an aggregate function is used as a window function, an + ON EMPTY clause may be given after its arguments + (using the same syntax as for a plain aggregate call, described in + ). The + default_expression is then returned for any + row whose window frame contains no rows, including a frame that becomes + empty as the frame moves. ON EMPTY is not accepted for + non-aggregate window functions. + + window_name is a reference to a named window specification defined in the query's WINDOW clause. diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index cfea7e160c2..9a817eecb45 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -3736,6 +3736,70 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase, state->steps_len - 1); } + /* + * If aggonempty present, emit step(s) to record that the group has + * received an input row. This is placed after any FILTER (so that + * filtered-out rows do not count as input) but before the + * strict-input NULL check emitted below (so that a row whose input is + * NULL still counts as input). + * + * The step is emitted once per concurrently-evaluated grouping set, + * mirroring the transition-function calls below. We skip it entirely + * for aggregates without ON EMPTY so the common case pays no + * overhead. + */ + { + bool has_aggonempty = false; + + for (int aggno = 0; aggno < aggstate->numaggs; aggno++) + { + if (aggstate->peragg[aggno].transno == transno && + aggstate->peragg[aggno].aggref->aggonempty != NULL) + { + has_aggonempty = true; + break; + } + } + + if (has_aggonempty) + { + scratch.opcode = EEOP_AGG_INPUT_RECEIVED; + scratch.d.agg_input_received.transno = transno; + + if (doSort) + { + int processGroupingSets = Max(phase->numsets, 1); + int setoff = 0; + + for (int setno = 0; setno < processGroupingSets; setno++) + { + scratch.d.agg_input_received.setoff = setoff; + ExprEvalPushStep(state, &scratch); + setoff++; + } + } + + if (doHash) + { + int numHashes = aggstate->num_hashes; + int setoff; + + /* in MIXED mode, there'll be preceding transition values */ + if (aggstate->aggstrategy != AGG_HASHED) + setoff = aggstate->maxsets; + else + setoff = 0; + + for (int setno = 0; setno < numHashes; setno++) + { + scratch.d.agg_input_received.setoff = setoff; + ExprEvalPushStep(state, &scratch); + setoff++; + } + } + } + } + /* * Evaluate arguments to aggregate/combine function. */ diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 0634af964a9..2df5722ddba 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -592,6 +592,7 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull) &&CASE_EEOP_AGG_STRICT_INPUT_CHECK_ARGS_1, &&CASE_EEOP_AGG_STRICT_INPUT_CHECK_NULLS, &&CASE_EEOP_AGG_PLAIN_PERGROUP_NULLCHECK, + &&CASE_EEOP_AGG_INPUT_RECEIVED, &&CASE_EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYVAL, &&CASE_EEOP_AGG_PLAIN_TRANS_STRICT_BYVAL, &&CASE_EEOP_AGG_PLAIN_TRANS_BYVAL, @@ -2106,6 +2107,27 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull) EEO_NEXT(); } + /* + * Mark that an aggregate's per-group state has received an input row + * (used by the ON EMPTY clause). This is emitted once per row that + * reaches the aggregate, after any FILTER but before the strict-input + * NULL check, so that a row whose input is NULL still counts as + * input. Only emitted for aggregates that actually carry an ON EMPTY + * default. + */ + EEO_CASE(EEOP_AGG_INPUT_RECEIVED) + { + AggState *aggstate = castNode(AggState, state->parent); + AggStatePerGroup pergroup_allaggs = + aggstate->all_pergroups[op->d.agg_input_received.setoff]; + + /* pergroup may be NULL in the hashed/spilled case; just skip */ + if (pergroup_allaggs != NULL) + pergroup_allaggs[op->d.agg_input_received.transno].inputReceived = true; + + EEO_NEXT(); + } + /* * Different types of aggregate transition functions are implemented * as different types of steps, to avoid incurring unnecessary diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 925caadd2ce..fed8fc33710 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -649,6 +649,12 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans, * still need to do this. */ pergroupstate->noTransValue = pertrans->initValueIsNull; + + /* + * Initialize the flag used for ON EMPTY to track whether any input rows + * were received. + */ + pergroupstate->inputReceived = false; } /* @@ -1057,6 +1063,25 @@ finalize_aggregate(AggState *aggstate, oldContext = MemoryContextSwitchTo(aggstate->ss.ps.ps_ExprContext->ecxt_per_tuple_memory); + /* + * If the ON EMPTY clause is specified with a default value, evaluate and + * return it in cases where no input rows were received. + * + * The inputReceived flag is used to detect the empty set case (zero rows + * processed). + */ + if (peragg->aggonemptystate != NULL && !pergroupstate->inputReceived) + { + *resultVal = ExecEvalExpr(peragg->aggonemptystate, + aggstate->ss.ps.ps_ExprContext, + resultIsNull); + + /* Switch back to the caller's context before returning */ + MemoryContextSwitchTo(oldContext); + + return; + } + /* * Evaluate any direct arguments. We do this even if there's no finalfn * (which is unlikely anyway), so that side-effects happen as expected. @@ -3959,6 +3984,13 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) get_func_name(transfn_oid)); InvokeFunctionExecuteHook(transfn_oid); + /* Build expression state for ON EMPTY default expression */ + if (aggref->aggonempty) + peragg->aggonemptystate = ExecInitExpr(aggref->aggonempty, + (PlanState *) aggstate); + else + peragg->aggonemptystate = NULL; + /* * initval is potentially null, so don't try to access it as a * struct field. Must do it the hard way with SysCacheGetAttr. diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index f1c524d00df..2a943d0a93b 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -172,6 +172,12 @@ typedef struct WindowStatePerAggData /* Data local to eval_windowaggregates() */ bool restart; /* need to restart this agg in this cycle? */ + + /* ON EMPTY support */ + bool inputReceived; /* true if any input row was received in + * current frame */ + /* ExprState for evaluating ON EMPTY default value, or NULL */ + ExprState *aggonemptystate; } WindowStatePerAggData; static void initialize_windowaggregate(WindowAggState *winstate, @@ -279,6 +285,12 @@ initialize_windowaggregate(WindowAggState *winstate, peraggstate->transValueCount = 0; peraggstate->resultValue = (Datum) 0; peraggstate->resultValueIsNull = true; + + /* + * Initialize the flag used for ON EMPTY to track whether any input rows + * were received in this frame. + */ + peraggstate->inputReceived = false; } /* @@ -326,6 +338,9 @@ advance_windowaggregate(WindowAggState *winstate, i++; } + /* Mark that this aggregate received input (used for ON EMPTY) */ + peraggstate->inputReceived = true; + if (peraggstate->transfn.fn_strict) { /* @@ -635,6 +650,25 @@ finalize_windowaggregate(WindowAggState *winstate, oldContext = MemoryContextSwitchTo(winstate->ss.ps.ps_ExprContext->ecxt_per_tuple_memory); + /* + * If the ON EMPTY clause is specified with a default value, evaluate and + * return it in cases where no input rows were received in the current + * frame. + * + * The inputReceived flag is used to detect the empty frame case (zero + * rows processed in this frame). + */ + if (peraggstate->aggonemptystate != NULL && !peraggstate->inputReceived) + { + *result = ExecEvalExpr(peraggstate->aggonemptystate, + winstate->ss.ps.ps_ExprContext, + isnull); + + MemoryContextSwitchTo(oldContext); + + return; + } + /* * Apply the agg's finalfn if one is provided, else return transValue. */ @@ -3135,6 +3169,13 @@ initialize_peragg(WindowAggState *winstate, WindowFunc *wfunc, &peraggstate->transtypeLen, &peraggstate->transtypeByVal); + /* Build expression state for ON EMPTY default expression */ + if (wfunc->aggonempty) + peraggstate->aggonemptystate = ExecInitExpr(wfunc->aggonempty, + (PlanState *) winstate); + else + peraggstate->aggonemptystate = NULL; + /* * initval is potentially null, so don't try to access it as a struct * field. Must do it the hard way with SysCacheGetAttr. diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c index 40b09958ac2..f76a5d3a491 100644 --- a/src/backend/nodes/makefuncs.c +++ b/src/backend/nodes/makefuncs.c @@ -681,6 +681,7 @@ makeFuncCall(List *name, List *args, CoercionForm funcformat, int location) n->args = args; n->agg_order = NIL; n->agg_filter = NULL; + n->agg_on_empty = NULL; n->over = NULL; n->agg_within_group = false; n->agg_star = false; diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index 2a2e00b372e..c6e50979350 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -2168,6 +2168,8 @@ expression_tree_walker_impl(Node *node, return true; if (WALK(expr->aggfilter)) return true; + if (WALK(expr->aggonempty)) + return true; } break; case T_GroupingFunc: @@ -2187,6 +2189,8 @@ expression_tree_walker_impl(Node *node, return true; if (WALK(expr->aggfilter)) return true; + if (WALK(expr->aggonempty)) + return true; if (WALK(expr->runCondition)) return true; } @@ -3101,6 +3105,7 @@ expression_tree_mutator_impl(Node *node, MUTATE(newnode->aggorder, aggref->aggorder, List *); MUTATE(newnode->aggdistinct, aggref->aggdistinct, List *); MUTATE(newnode->aggfilter, aggref->aggfilter, Expr *); + MUTATE(newnode->aggonempty, aggref->aggonempty, Expr *); return (Node *) newnode; } break; @@ -3135,6 +3140,7 @@ expression_tree_mutator_impl(Node *node, FLATCOPY(newnode, wfunc, WindowFunc); MUTATE(newnode->args, wfunc->args, List *); MUTATE(newnode->aggfilter, wfunc->aggfilter, Expr *); + MUTATE(newnode->aggonempty, wfunc->aggonempty, Expr *); return (Node *) newnode; } break; @@ -4532,6 +4538,8 @@ raw_expression_tree_walker_impl(Node *node, return true; if (WALK(fcall->agg_filter)) return true; + if (WALK(fcall->agg_on_empty)) + return true; if (WALK(fcall->over)) return true; /* function name is deemed uninteresting */ diff --git a/src/backend/optimizer/prep/prepagg.c b/src/backend/optimizer/prep/prepagg.c index 3737cc15ba1..cc16bc4edeb 100644 --- a/src/backend/optimizer/prep/prepagg.c +++ b/src/backend/optimizer/prep/prepagg.c @@ -215,6 +215,18 @@ preprocess_aggref(Aggref *aggref, PlannerInfo *root) ReleaseSysCache(aggTuple); + /* + * An ON EMPTY default is returned only when the aggregate processes zero + * input rows, which is detected via a per-group "input received" flag set + * as the aggregate's input rows are processed. Under partial aggregation + * the leader sees only the workers' combined partial states (via the + * combine function), not the original rows, so that flag cannot reliably + * distinguish the empty-input case. Disable partial aggregation for such + * aggregates. + */ + if (aggref->aggonempty != NULL) + root->hasNonPartialAggs = true; + /* * 1. See if this is identical to another aggregate function call that * we've seen already. diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 01997e22266..a3df2b451a6 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -2777,6 +2777,7 @@ eval_const_expressions_mutator(Node *node, Oid funcid = expr->winfnoid; List *args; Expr *aggfilter; + Expr *aggonempty; HeapTuple func_tuple; WindowFunc *newexpr; @@ -2806,6 +2807,10 @@ eval_const_expressions_mutator(Node *node, aggfilter = (Expr *) eval_const_expressions_mutator((Node *) expr->aggfilter, context); + /* ... and the ON EMPTY expression */ + aggonempty = (Expr *) + eval_const_expressions_mutator((Node *) expr->aggonempty, + context); /* And build the replacement WindowFunc node */ newexpr = makeNode(WindowFunc); @@ -2815,6 +2820,7 @@ eval_const_expressions_mutator(Node *node, newexpr->inputcollid = expr->inputcollid; newexpr->args = args; newexpr->aggfilter = aggfilter; + newexpr->aggonempty = aggonempty; newexpr->runCondition = expr->runCondition; newexpr->winref = expr->winref; newexpr->winstar = expr->winstar; diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index ff4e1388c55..52271b010c0 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -467,6 +467,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type group_by_list %type group_by_item empty_grouping_set rollup_clause cube_clause %type grouping_sets_clause +%type on_empty %type opt_fdw_options fdw_options %type fdw_option @@ -16590,6 +16591,16 @@ func_application: func_name '(' ')' n->agg_order = $7; $$ = (Node *) n; } + | func_name '(' func_arg_list ',' on_empty opt_sort_clause ')' + { + FuncCall *n = makeFuncCall($1, $3, + COERCE_EXPLICIT_CALL, + @1); + + n->agg_on_empty = $5; + n->agg_order = $6; + $$ = (Node *) n; + } | func_name '(' ALL func_arg_list opt_sort_clause ')' { FuncCall *n = makeFuncCall($1, $4, @@ -16635,6 +16646,11 @@ func_application: func_name '(' ')' ; +on_empty: a_expr ON EMPTY_P + { $$ = $1; } + ; + + /* * func_expr and its cousin func_expr_windowless are split out from c_expr just * so that we have classifications for "everything that is a function call or diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index acb933392de..9479a1a03b3 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -21,6 +21,7 @@ #include "common/int.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" #include "optimizer/optimizer.h" #include "parser/parse_agg.h" #include "parser/parse_clause.h" @@ -82,6 +83,9 @@ static Var *buildGroupedVar(int attnum, Index ressortgroupref, static void check_agglevels_and_constraints(ParseState *pstate, Node *expr); static List *expand_groupingset_node(GroupingSet *gs); static Node *make_agg_arg(Oid argtype, Oid argcollation); +static Node *check_agg_on_empty(ParseState *pstate, Oid aggtype, + Expr *agg_on_empty, int location); +static bool agg_on_empty_contains_var_walker(Node *node, void *context); /* @@ -111,7 +115,8 @@ static Node *make_agg_arg(Oid argtype, Oid argcollation); */ void transformAggregateCall(ParseState *pstate, Aggref *agg, - List *args, List *aggorder, bool agg_distinct) + List *args, List *aggorder, bool agg_distinct, + Expr *agg_on_empty) { List *argtypes = NIL; List *tlist = NIL; @@ -231,6 +236,8 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, agg->args = tlist; agg->aggorder = torder; agg->aggdistinct = tdistinct; + agg->aggonempty = (Expr *) check_agg_on_empty(pstate, agg->aggtype, + agg_on_empty, agg->location); /* * Now build the aggargtypes list with the type OIDs of the direct and @@ -499,6 +506,14 @@ check_agglevels_and_constraints(ParseState *pstate, Node *expr) else err = _("grouping operations are not allowed in DEFAULT expressions"); + break; + case EXPR_KIND_AGG_ON_EMPTY: + + if (isAgg) + err = _("aggregate functions are not allowed in ON EMPTY expressions"); + else + err = _("grouping operations are not allowed in ON EMPTY expressions"); + break; case EXPR_KIND_INDEX_EXPRESSION: if (isAgg) @@ -892,7 +907,7 @@ check_agg_arguments_walker(Node *node, */ void transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc, - WindowDef *windef) + WindowDef *windef, Expr *agg_on_empty) { const char *err; bool errkind; @@ -1003,6 +1018,9 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc, case EXPR_KIND_FUNCTION_DEFAULT: err = _("window functions are not allowed in DEFAULT expressions"); break; + case EXPR_KIND_AGG_ON_EMPTY: + err = _("window functions are not allowed in ON EMPTY expressions"); + break; case EXPR_KIND_INDEX_EXPRESSION: err = _("window functions are not allowed in index expressions"); break; @@ -1140,6 +1158,9 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc, } pstate->p_hasWindowFuncs = true; + wfunc->aggonempty = (Expr *) check_agg_on_empty(pstate, wfunc->wintype, + agg_on_empty, + wfunc->location); } /* @@ -2408,3 +2429,112 @@ make_agg_arg(Oid argtype, Oid argcollation) argp->location = -1; return (Node *) argp; } + +/* + * check_agg_on_empty - + * Checks the aggregate's ON EMPTY expression for correctness. + */ +static Node * +check_agg_on_empty(ParseState *pstate, Oid aggtype, Expr *agg_on_empty, + int location) +{ + Oid defexprtype; + + if (!agg_on_empty) + return NULL; + + /* + * default expression must be a constant - it cannot contain column + * references (Var nodes), aggregates, or window functions. + * + * Note we cannot use contain_var_clause() here: it only detects Vars of + * the current query level, so a correlated reference to an outer query's + * column (varlevelsup > 0) would slip through and make the "constant" + * default vary per outer row. Reject Vars of any level. + */ + if (agg_on_empty_contains_var_walker((Node *) agg_on_empty, NULL)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ON EMPTY expression must be a constant value"), + parser_errposition(pstate, location))); + + if (contain_agg_clause((Node *) agg_on_empty)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ON EMPTY expression must not contain an aggregate function"), + parser_errposition(pstate, location))); + + if (contain_windowfuncs((Node *) agg_on_empty)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ON EMPTY expression must not contain a window function"), + parser_errposition(pstate, location))); + + /* + * Sub-selects are not constant and, more importantly, the ON EMPTY + * expression is only run through ExecInitExpr() at execution time, never + * the sub-plan setup that a normal target expression receives, so a + * sub-select here would fail (or crash) at execution. Reject it. + */ + if (checkExprHasSubLink((Node *) agg_on_empty)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ON EMPTY expression must not contain a subquery"), + parser_errposition(pstate, location))); + + /* + * The default is evaluated only once, for the empty-input case, so a + * volatile expression would not behave like a per-row value. Restrict it + * to a stable/immutable, constant-like expression. + */ + if (contain_volatile_functions((Node *) agg_on_empty)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ON EMPTY expression must not contain a volatile function"), + parser_errposition(pstate, location))); + + defexprtype = exprType((Node *) agg_on_empty); + + /* default expression must be coercible to the aggregate's result type. */ + agg_on_empty = (Expr *) coerce_to_target_type(pstate, + (Node *) agg_on_empty, + defexprtype, + aggtype, + -1, + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, + -1); + + if (agg_on_empty == NULL) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("ON EMPTY expression type %s cannot be coerced to aggregate result type %s", + format_type_be(defexprtype), + format_type_be(aggtype)), + parser_errposition(pstate, location))); + + return (Node *) agg_on_empty; +} + +/* + * agg_on_empty_contains_var_walker - + * Returns true if the node tree contains any Var (a column reference) or + * CurrentOfExpr, regardless of query level. + * + * Unlike contain_var_clause(), this also reports Vars belonging to outer + * query levels (varlevelsup > 0), which is what we need to ensure an ON EMPTY + * default really is constant. Sub-selects are rejected separately, so there + * is no need to descend into them here. This runs during parse analysis, so + * PlaceHolderVars (which the planner introduces) cannot appear; a Var inside + * one would be caught by the recursion anyway. + */ +static bool +agg_on_empty_contains_var_walker(Node *node, void *context) +{ + if (node == NULL) + return false; + if (IsA(node, Var) || IsA(node, CurrentOfExpr)) + return true; + return expression_tree_walker(node, agg_on_empty_contains_var_walker, + context); +} diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index 9adc9d4c0f6..add73b52580 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -567,6 +567,7 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref) case EXPR_KIND_CHECK_CONSTRAINT: case EXPR_KIND_DOMAIN_CHECK: case EXPR_KIND_FUNCTION_DEFAULT: + case EXPR_KIND_AGG_ON_EMPTY: case EXPR_KIND_INDEX_EXPRESSION: case EXPR_KIND_INDEX_PREDICATE: case EXPR_KIND_STATS_EXPRESSION: @@ -1853,6 +1854,9 @@ transformSubLink(ParseState *pstate, SubLink *sublink) case EXPR_KIND_FUNCTION_DEFAULT: err = _("cannot use subquery in DEFAULT expression"); break; + case EXPR_KIND_AGG_ON_EMPTY: + err = _("cannot use subquery in ON EMPTY expression"); + break; case EXPR_KIND_INDEX_EXPRESSION: err = _("cannot use subquery in index expression"); break; @@ -3227,6 +3231,8 @@ ParseExprKindName(ParseExprKind exprKind) case EXPR_KIND_COLUMN_DEFAULT: case EXPR_KIND_FUNCTION_DEFAULT: return "DEFAULT"; + case EXPR_KIND_AGG_ON_EMPTY: + return "ON EMPTY"; case EXPR_KIND_INDEX_EXPRESSION: return "index expression"; case EXPR_KIND_INDEX_PREDICATE: @@ -3991,7 +3997,7 @@ transformJsonAggConstructor(ParseState *pstate, JsonAggConstructor *agg_ctor, parser_errposition(pstate, agg_ctor->location)); /* parse_agg.c does additional window-func-specific processing */ - transformWindowFuncCall(pstate, wfunc, agg_ctor->over); + transformWindowFuncCall(pstate, wfunc, agg_ctor->over, NULL); node = (Node *) wfunc; } @@ -4018,7 +4024,8 @@ transformJsonAggConstructor(ParseState *pstate, JsonAggConstructor *agg_ctor, aggref->aggtransno = -1; aggref->location = agg_ctor->location; - transformAggregateCall(pstate, aggref, args, agg_ctor->agg_order, false); + transformAggregateCall(pstate, aggref, args, agg_ctor->agg_order, + false, NULL); node = (Node *) aggref; } diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c index fb306c05112..c9aa8baa2ff 100644 --- a/src/backend/parser/parse_func.c +++ b/src/backend/parser/parse_func.c @@ -95,6 +95,7 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, bool is_column = (fn == NULL); List *agg_order = (fn ? fn->agg_order : NIL); Expr *agg_filter = NULL; + Expr *agg_on_empty = NULL; WindowDef *over = (fn ? fn->over : NULL); bool agg_within_group = (fn ? fn->agg_within_group : false); bool agg_star = (fn ? fn->agg_star : false); @@ -130,6 +131,15 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, EXPR_KIND_FILTER, "FILTER"); + /* + * If there's an aggregate ON EMPTY default value given, transform it + * here. It is parsed in its own expression kind so that disallowed + * constructs are reported as occurring "in ON EMPTY expressions". + */ + if (fn && fn->agg_on_empty != NULL) + agg_on_empty = (Expr *) transformExpr(pstate, fn->agg_on_empty, + EXPR_KIND_AGG_ON_EMPTY); + /* * Most of the rest of the parser just assumes that functions do not have * more than FUNC_MAX_ARGS parameters. We have to test here to protect @@ -226,6 +236,7 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, */ could_be_projection = (nargs == 1 && !proc_call && agg_order == NIL && agg_filter == NULL && + agg_on_empty == NULL && !agg_star && !agg_distinct && over == NULL && !func_variadic && argnames == NIL && list_length(funcname) == 1 && @@ -339,6 +350,12 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, errmsg("ORDER BY specified, but %s is not an aggregate function", NameListToString(funcname)), parser_errposition(pstate, location))); + if (agg_on_empty != NULL) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("ON EMPTY specified, but %s is not an aggregate function", + NameListToString(funcname)), + parser_errposition(pstate, location))); if (agg_filter) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), @@ -835,7 +852,8 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, parser_errposition(pstate, location))); /* parse_agg.c does additional aggregate-specific processing */ - transformAggregateCall(pstate, aggref, fargs, agg_order, agg_distinct); + transformAggregateCall(pstate, aggref, fargs, agg_order, agg_distinct, + agg_on_empty); retval = (Node *) aggref; } @@ -897,6 +915,18 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, errmsg("FILTER is not implemented for non-aggregate window functions"), parser_errposition(pstate, location))); + /* + * ON EMPTY only has meaning for aggregates: it supplies the value to + * return when the aggregate processes no rows. A true window + * function does not aggregate input rows, so it would be silently + * ignored; reject it instead. + */ + if (!wfunc->winagg && agg_on_empty) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ON EMPTY is not implemented for non-aggregate window functions"), + parser_errposition(pstate, location))); + /* * Window functions can't either take or return sets */ @@ -915,7 +945,7 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, parser_errposition(pstate, location))); /* parse_agg.c does additional window-func-specific processing */ - transformWindowFuncCall(pstate, wfunc, over); + transformWindowFuncCall(pstate, wfunc, over, agg_on_empty); retval = (Node *) wfunc; } @@ -2755,6 +2785,9 @@ check_srf_call_placement(ParseState *pstate, Node *last_srf, int location) case EXPR_KIND_FUNCTION_DEFAULT: err = _("set-returning functions are not allowed in DEFAULT expressions"); break; + case EXPR_KIND_AGG_ON_EMPTY: + err = _("set-returning functions are not allowed in ON EMPTY expressions"); + break; case EXPR_KIND_INDEX_EXPRESSION: err = _("set-returning functions are not allowed in index expressions"); break; diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 88de5c0481c..81da620f461 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -11524,6 +11524,14 @@ get_agg_expr_helper(Aggref *aggref, deparse_context *context, */ Assert(!aggref->aggvariadic); get_rule_expr((Node *) aggref->aggdirectargs, context, true); + + if (aggref->aggonempty != NULL) + { + appendStringInfoString(buf, ", "); + get_rule_expr((Node *) aggref->aggonempty, context, false); + appendStringInfoString(buf, " ON EMPTY"); + } + Assert(aggref->aggorder != NIL); appendStringInfoString(buf, ") WITHIN GROUP (ORDER BY "); get_rule_orderby(aggref->aggorder, aggref->args, false, context); @@ -11569,6 +11577,13 @@ get_agg_expr_helper(Aggref *aggref, deparse_context *context, } } + if (aggref->aggonempty != NULL) + { + appendStringInfoString(buf, ", "); + get_rule_expr((Node *) aggref->aggonempty, context, false); + appendStringInfoString(buf, " ON EMPTY"); + } + if (aggref->aggorder != NIL) { appendStringInfoString(buf, " ORDER BY "); @@ -11669,6 +11684,13 @@ get_windowfunc_expr_helper(WindowFunc *wfunc, deparse_context *context, get_rule_expr((Node *) wfunc->args, context, true); } + if (wfunc->aggonempty != NULL) + { + appendStringInfoString(buf, ", "); + get_rule_expr((Node *) wfunc->aggonempty, context, false); + appendStringInfoString(buf, " ON EMPTY"); + } + if (options) appendStringInfoString(buf, options); diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h index c61b3d624d5..ac95dd78316 100644 --- a/src/include/executor/execExpr.h +++ b/src/include/executor/execExpr.h @@ -281,6 +281,7 @@ typedef enum ExprEvalOp EEOP_AGG_STRICT_INPUT_CHECK_ARGS_1, EEOP_AGG_STRICT_INPUT_CHECK_NULLS, EEOP_AGG_PLAIN_PERGROUP_NULLCHECK, + EEOP_AGG_INPUT_RECEIVED, EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYVAL, EEOP_AGG_PLAIN_TRANS_STRICT_BYVAL, EEOP_AGG_PLAIN_TRANS_BYVAL, @@ -729,6 +730,13 @@ typedef struct ExprEvalStep int jumpnull; } agg_plain_pergroup_nullcheck; + /* for EEOP_AGG_INPUT_RECEIVED */ + struct + { + int setoff; + int transno; + } agg_input_received; + /* for EEOP_AGG_PRESORTED_DISTINCT_{SINGLE,MULTI} */ struct { diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h index 1e1be9666ae..377fac40494 100644 --- a/src/include/executor/nodeAgg.h +++ b/src/include/executor/nodeAgg.h @@ -229,6 +229,9 @@ typedef struct AggStatePerAggData * aggregates because the final function is read-write. */ bool shareable; + + /* ExprState for evaluating ON EMPTY default value, or NULL */ + ExprState *aggonemptystate; } AggStatePerAggData; /* @@ -264,6 +267,13 @@ typedef struct AggStatePerGroupData * NULL and not auto-replace it with a later input value. Only the first * non-NULL input will be auto-substituted. */ + + /* + * Whether this group has seen any input row yet. Used by the ON EMPTY + * clause to detect the empty-input case at finalization. + */ +#define FIELDNO_AGGSTATEPERGROUPDATA_INPUTRECEIVED 3 + bool inputReceived; } AggStatePerGroupData; /* diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 4133c404a6b..925f2ed7f64 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -462,6 +462,7 @@ typedef struct FuncCall bool agg_within_group; /* ORDER BY appeared in WITHIN GROUP */ bool agg_star; /* argument was really '*' */ bool agg_distinct; /* arguments were labeled DISTINCT */ + Node *agg_on_empty; /* user given value to use ON EMPTY set */ bool func_variadic; /* last argument was labeled VARIADIC */ CoercionForm funcformat; /* how to display this node */ ParseLoc location; /* token location, or -1 if unknown */ diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index bb05aeebee4..930d9eac6d3 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -493,6 +493,9 @@ typedef struct Aggref /* FILTER expression, if any */ Expr *aggfilter; + /* ON EMPTY expression, if any */ + Expr *aggonempty; + /* true if argument list was really '*' */ bool aggstar pg_node_attr(query_jumble_ignore); @@ -598,6 +601,8 @@ typedef struct WindowFunc List *args; /* FILTER expression, if any */ Expr *aggfilter; + /* ON EMPTY expression, if any */ + Expr *aggonempty; /* List of WindowFuncRunConditions to help short-circuit execution */ List *runCondition pg_node_attr(query_jumble_ignore); /* index of associated WindowClause */ diff --git a/src/include/parser/parse_agg.h b/src/include/parser/parse_agg.h index 8a5653798db..f93e956b83e 100644 --- a/src/include/parser/parse_agg.h +++ b/src/include/parser/parse_agg.h @@ -17,12 +17,12 @@ extern void transformAggregateCall(ParseState *pstate, Aggref *agg, List *args, List *aggorder, - bool agg_distinct); + bool agg_distinct, Expr *agg_on_empty); extern Node *transformGroupingFunc(ParseState *pstate, GroupingFunc *p); extern void transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc, - WindowDef *windef); + WindowDef *windef, Expr *agg_on_empty); extern void parseCheckAggregates(ParseState *pstate, Query *qry); diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h index f7f4ba6c2a8..00a6b6f9f00 100644 --- a/src/include/parser/parse_node.h +++ b/src/include/parser/parse_node.h @@ -70,6 +70,7 @@ typedef enum ParseExprKind EXPR_KIND_DOMAIN_CHECK, /* CHECK constraint for a domain */ EXPR_KIND_COLUMN_DEFAULT, /* default value for a table column */ EXPR_KIND_FUNCTION_DEFAULT, /* default parameter value for function */ + EXPR_KIND_AGG_ON_EMPTY, /* aggregate/window ON EMPTY default value */ EXPR_KIND_INDEX_EXPRESSION, /* index expression */ EXPR_KIND_INDEX_PREDICATE, /* index predicate */ EXPR_KIND_STATS_EXPRESSION, /* extended statistics expression */ diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 89e051ee824..a9c5315bc52 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -3983,3 +3983,466 @@ drop table agg_hash_1; drop table agg_hash_2; drop table agg_hash_3; drop table agg_hash_4; +-- Test ON EMPTY clause for aggregates +-- create a test table for ON EMPTY tests +CREATE TABLE agg_on_empty(id int, val int, grpid int); +-- with empty table +SELECT sum(val, -1 ON EMPTY) FROM agg_on_empty; + sum +----- + -1 +(1 row) + +INSERT INTO agg_on_empty VALUES (1, 10, 10), (2, 20, 10), (3, 30, 20), (4, NULL, 20), (5, 50, 30); +-- basic ON EMPTY tests with numeric default +SELECT sum(val, 0 ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + sum +----- + 0 +(1 row) + +SELECT sum(val, -1 ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + sum +----- + -1 +(1 row) + +SELECT avg(val, 0 ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + avg +----- + 0 +(1 row) + +SELECT max(val, 999 ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + max +----- + 999 +(1 row) + +SELECT min(val, -999 ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + min +------ + -999 +(1 row) + +SELECT count(val, 0 ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + count +------- + 0 +(1 row) + +-- with non-empty result set (should use actual aggregate result) +SELECT sum(val, 0 ON EMPTY) FROM agg_on_empty WHERE grpid = 10; + sum +----- + 30 +(1 row) + +SELECT avg(val, 0 ON EMPTY) FROM agg_on_empty WHERE grpid = 10; + avg +--------------------- + 15.0000000000000000 +(1 row) + +SELECT max(val, 999 ON EMPTY) FROM agg_on_empty WHERE grpid = 10; + max +----- + 20 +(1 row) + +SELECT min(val, -999 ON EMPTY) FROM agg_on_empty WHERE grpid = 10; + min +----- + 10 +(1 row) + +-- with string default +SELECT string_agg(grpid::text, ',', 'NONE' ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + string_agg +------------ + NONE +(1 row) + +SELECT string_agg(grpid::text, ',', 'NONE' ON EMPTY) FROM agg_on_empty WHERE grpid = 10; + string_agg +------------ + 10,10 +(1 row) + +-- with NULL default +SELECT sum(val, NULL ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + sum +----- + +(1 row) + +SELECT avg(val, NULL ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + avg +----- + +(1 row) + +SELECT string_agg(grpid::text, ',', NULL ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + string_agg +------------ + +(1 row) + +-- with type coercion (integer to numeric) +SELECT sum(val::numeric, 0 ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + sum +----- + 0 +(1 row) + +SELECT sum(val, 0.5 ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + sum +----- + 1 +(1 row) + +-- with GROUP BY +SELECT grpid, sum(val, 0 ON EMPTY) FROM agg_on_empty GROUP BY grpid ORDER BY grpid; + grpid | sum +-------+----- + 10 | 30 + 20 | 30 + 30 | 50 +(3 rows) + +-- with GROUP BY and empty groups +SELECT g, sum(val, -1 ON EMPTY) FROM (VALUES (100), (200), (10), (20)) AS groups(g) + LEFT JOIN agg_on_empty ON groups.g = agg_on_empty.grpid + GROUP BY g ORDER BY g; + g | sum +-----+----- + 10 | 30 + 20 | 30 + 100 | + 200 | +(4 rows) + +-- with grouping sets: per-group state is reset across grouping-set phases, so +-- ON EMPTY must not fire spuriously for groups that do have rows +SELECT grpid, sum(val, -1 ON EMPTY) FROM agg_on_empty + GROUP BY ROLLUP(grpid) ORDER BY grpid; + grpid | sum +-------+----- + 10 | 30 + 20 | 30 + 30 | 50 + | 110 +(4 rows) + +-- with grouping sets where a FILTER empties one group: ON EMPTY fires only for +-- that group (checked under both sorted and hashed grouping-set execution) +SELECT grpid, sum(val, -1 ON EMPTY) FILTER (WHERE val > 25) FROM agg_on_empty + GROUP BY GROUPING SETS ((grpid), ()) ORDER BY grpid; + grpid | sum +-------+----- + 10 | -1 + 20 | 30 + 30 | 50 + | 80 +(4 rows) + +SET enable_sort = off; +SELECT grpid, sum(val, -1 ON EMPTY) FILTER (WHERE val > 25) FROM agg_on_empty + GROUP BY GROUPING SETS ((grpid), ()) ORDER BY grpid; + grpid | sum +-------+----- + 10 | -1 + 20 | 30 + 30 | 50 + | 80 +(4 rows) + +RESET enable_sort; +-- multi-dimensional CUBE with a FILTER: ON EMPTY fires independently for each +-- empty cell (including sub-aggregates), while populated cells return real sums +SELECT grpid, id % 2 AS parity, sum(val, -1 ON EMPTY) FILTER (WHERE val > 25) AS s + FROM agg_on_empty GROUP BY CUBE(grpid, id % 2) ORDER BY grpid, parity; + grpid | parity | s +-------+--------+---- + 10 | 0 | -1 + 10 | 1 | -1 + 10 | | -1 + 20 | 0 | -1 + 20 | 1 | 30 + 20 | | 30 + 30 | 1 | 50 + 30 | | 50 + | 0 | -1 + | 1 | 80 + | | 80 +(11 rows) + +-- with FILTER clause +SELECT sum(val, 100 ON EMPTY) FILTER (WHERE val > 100) FROM agg_on_empty; + sum +----- + 100 +(1 row) + +SELECT count(val, 0 ON EMPTY) FILTER (WHERE val > 100) FROM agg_on_empty; + count +------- + 0 +(1 row) + +-- with ORDER BY in aggregate +SELECT avg(val, -999 ON EMPTY ORDER BY val) FROM agg_on_empty WHERE grpid = 40; + avg +------ + -999 +(1 row) + +SELECT avg(val, -999 ON EMPTY ORDER BY val) FROM agg_on_empty WHERE grpid = 10; + avg +--------------------- + 15.0000000000000000 +(1 row) + +-- under a forced parallel plan: ON EMPTY must work whether or not the +-- aggregate is parallelized (such aggregates are not partially aggregated). +SET max_parallel_workers_per_gather = 4; +SET parallel_setup_cost = 0; +SET parallel_tuple_cost = 0; +SET min_parallel_table_scan_size = 0; +SELECT sum(val, -1 ON EMPTY) FROM agg_on_empty WHERE grpid = 40; -- default + sum +----- + -1 +(1 row) + +SELECT sum(val, -1 ON EMPTY) FROM agg_on_empty WHERE grpid = 10; -- real result + sum +----- + 30 +(1 row) + +RESET max_parallel_workers_per_gather; +RESET parallel_setup_cost; +RESET parallel_tuple_cost; +RESET min_parallel_table_scan_size; +-- Error cases for ON EMPTY +-- with complex expressions (should fail - must be constant) +SELECT sum(val, val ON EMPTY) FROM agg_on_empty WHERE grpid = 40; +ERROR: ON EMPTY expression must be a constant value +LINE 1: SELECT sum(val, val ON EMPTY) FROM agg_on_empty WHERE grpid ... + ^ +-- with type mismatch (should fail) +SELECT sum(val, 'invalid' ON EMPTY) FROM agg_on_empty WHERE grpid = 40; +ERROR: invalid input syntax for type bigint: "invalid" +LINE 1: SELECT sum(val, 'invalid' ON EMPTY) FROM agg_on_empty WHERE ... + ^ +-- with non-aggregate function (should fail) +SELECT length('test', 'default' ON EMPTY); +ERROR: ON EMPTY specified, but length is not an aggregate function +LINE 1: SELECT length('test', 'default' ON EMPTY); + ^ +-- with a subquery (should fail) +SELECT sum(val, (SELECT 1) ON EMPTY) FROM agg_on_empty WHERE grpid = 40; +ERROR: cannot use subquery in ON EMPTY expression +LINE 1: SELECT sum(val, (SELECT 1) ON EMPTY) FROM agg_on_empty WHERE... + ^ +-- with a volatile function (should fail) +SELECT sum(val, random()::int ON EMPTY) FROM agg_on_empty WHERE grpid = 40; +ERROR: ON EMPTY expression must not contain a volatile function +LINE 1: SELECT sum(val, random()::int ON EMPTY) FROM agg_on_empty WH... + ^ +-- with a set-returning function (should fail) +SELECT sum(val, generate_series(1, 2) ON EMPTY) FROM agg_on_empty WHERE grpid = 40; +ERROR: set-returning functions are not allowed in ON EMPTY expressions +LINE 1: SELECT sum(val, generate_series(1, 2) ON EMPTY) FROM agg_on_... + ^ +-- with a nested aggregate (should fail) +SELECT sum(val, sum(val) ON EMPTY) FROM agg_on_empty WHERE grpid = 40; +ERROR: aggregate functions are not allowed in ON EMPTY expressions +LINE 1: SELECT sum(val, sum(val) ON EMPTY) FROM agg_on_empty WHERE g... + ^ +-- with a window function (should fail) +SELECT sum(val, row_number() OVER () ON EMPTY) FROM agg_on_empty WHERE grpid = 40; +ERROR: window functions are not allowed in ON EMPTY expressions +LINE 1: SELECT sum(val, row_number() OVER () ON EMPTY) FROM agg_on_e... + ^ +-- combined with DISTINCT (should fail - not supported by the grammar) +SELECT sum(DISTINCT val, 0 ON EMPTY) FROM agg_on_empty WHERE grpid = 40; +ERROR: syntax error at or near "ON" +LINE 1: SELECT sum(DISTINCT val, 0 ON EMPTY) FROM agg_on_empty WHERE... + ^ +-- combined with an ordered-set aggregate (WITHIN GROUP): the ON EMPTY value +-- is written before WITHIN GROUP and is returned for an empty input set +SELECT percentile_cont(0.5, -7 ON EMPTY) WITHIN GROUP (ORDER BY val) + FROM agg_on_empty WHERE grpid = 40; -- empty -> default + percentile_cont +----------------- + -7 +(1 row) + +SELECT percentile_cont(0.5, -7 ON EMPTY) WITHIN GROUP (ORDER BY val) + FROM agg_on_empty WHERE grpid = 10; -- real result + percentile_cont +----------------- + 15 +(1 row) + +-- and it round-trips through view deparsing +CREATE VIEW agg_on_empty_os_vw AS + SELECT percentile_cont(0.5, -7 ON EMPTY) WITHIN GROUP (ORDER BY val) AS p + FROM agg_on_empty; +SELECT pg_get_viewdef('agg_on_empty_os_vw'); + pg_get_viewdef +--------------------------------------------------------------------------------------------------------------------------------- + SELECT percentile_cont((0.5)::double precision, '-7'::integer ON EMPTY) WITHIN GROUP (ORDER BY ((val)::double precision)) AS p+ + FROM agg_on_empty; +(1 row) + +DROP VIEW agg_on_empty_os_vw; +-- deparsing of ON EMPTY in views +CREATE VIEW agg_on_empty_vw AS + SELECT grpid, sum(val, 0 ON EMPTY ORDER BY val) AS total, + avg(val, 0 ON EMPTY) AS average, count(val, 0 ON EMPTY) AS cnt + FROM agg_on_empty GROUP BY grpid; +\d+ agg_on_empty_vw + View "public.agg_on_empty_vw" + Column | Type | Collation | Nullable | Default | Storage | Description +---------+---------+-----------+----------+---------+---------+------------- + grpid | integer | | | | plain | + total | bigint | | | | plain | + average | numeric | | | | main | + cnt | bigint | | | | plain | +View definition: + SELECT grpid, + sum(val, 0 ON EMPTY ORDER BY val) AS total, + avg(val, 0 ON EMPTY) AS average, + count(val, 0 ON EMPTY) AS cnt + FROM agg_on_empty + GROUP BY grpid; + +SELECT * FROM agg_on_empty_vw ORDER BY grpid; + grpid | total | average | cnt +-------+-------+---------------------+----- + 10 | 30 | 15.0000000000000000 | 2 + 20 | 30 | 30.0000000000000000 | 1 + 30 | 50 | 50.0000000000000000 | 1 +(3 rows) + +-- with prepared statements (must use literal constants) +PREPARE on_empty_test(int) AS + SELECT sum(val, 42 ON EMPTY) FROM agg_on_empty WHERE grpid = $1; +EXECUTE on_empty_test(40); + sum +----- + 42 +(1 row) + +EXECUTE on_empty_test(10); + sum +----- + 30 +(1 row) + +DEALLOCATE on_empty_test; +-- with HAVING clause +SELECT grpid, sum(val, 0 ON EMPTY) AS total FROM agg_on_empty + GROUP BY grpid HAVING sum(val, 0 ON EMPTY) > 20 ORDER BY grpid; + grpid | total +-------+------- + 10 | 30 + 20 | 30 + 30 | 50 +(3 rows) + +-- ON EMPTY must NOT fire for a non-empty group whose rows are all NULL: such +-- a group has processed rows, so the normal (NULL) result applies. This is +-- exercised against a strict transition function (max/min/sum of int are +-- handled differently, so test a strict one explicitly) and across the +-- sorted, hashed and grouping-set execution paths. +INSERT INTO agg_on_empty VALUES (6, NULL, 50), (7, NULL, 50); +-- strict transition function (max/min) over an all-NULL, non-empty group +SELECT max(val, -1 ON EMPTY) FROM agg_on_empty WHERE grpid = 50; -- NULL, not -1 + max +----- + +(1 row) + +SELECT min(val, -1 ON EMPTY) FROM agg_on_empty WHERE grpid = 50; -- NULL, not -1 + min +----- + +(1 row) + +-- and per group, under both sorted and hashed aggregation +SELECT grpid, max(val, -1 ON EMPTY) FROM agg_on_empty + GROUP BY grpid ORDER BY grpid; + grpid | max +-------+----- + 10 | 20 + 20 | 30 + 30 | 50 + 50 | +(4 rows) + +SET enable_sort = off; SET enable_hashagg = on; +SELECT grpid, max(val, -1 ON EMPTY) FROM agg_on_empty + GROUP BY grpid ORDER BY grpid; + grpid | max +-------+----- + 10 | 20 + 20 | 30 + 30 | 50 + 50 | +(4 rows) + +RESET enable_sort; RESET enable_hashagg; +-- and under grouping sets +SELECT grpid, max(val, -1 ON EMPTY) FROM agg_on_empty + GROUP BY ROLLUP(grpid) ORDER BY grpid; + grpid | max +-------+----- + 10 | 20 + 20 | 30 + 30 | 50 + 50 | + | 50 +(5 rows) + +-- a strict ordered-set aggregate over an all-NULL group also processes rows, +-- so ON EMPTY is not used (NULL); a truly empty input still uses it +SELECT percentile_cont(0.5, -7 ON EMPTY) WITHIN GROUP (ORDER BY val) + FROM agg_on_empty WHERE grpid = 50; -- NULL + percentile_cont +----------------- + +(1 row) + +SELECT percentile_cont(0.5, -7 ON EMPTY) WITHIN GROUP (ORDER BY val) + FROM agg_on_empty WHERE grpid = 60; -- empty -> -7 + percentile_cont +----------------- + -7 +(1 row) + +-- the all-NULL behavior is the same for a window aggregate, which uses ON +-- EMPTY only for a row whose frame contains no rows +SELECT val, max(val, -1 ON EMPTY) OVER () AS m + FROM agg_on_empty WHERE grpid = 50 ORDER BY id; + val | m +-----+--- + | + | +(2 rows) + +-- ON EMPTY must be a constant: a correlated reference to an outer query's +-- column is not constant and must be rejected (it is not caught by a +-- current-level-only Var check) +SELECT (SELECT max(inner_t.val, agg_on_empty.val ON EMPTY) + FROM agg_on_empty inner_t WHERE false) + FROM agg_on_empty WHERE grpid = 10; +ERROR: ON EMPTY expression must be a constant value +LINE 1: SELECT (SELECT max(inner_t.val, agg_on_empty.val ON EMPTY) + ^ +-- clean up +DROP VIEW agg_on_empty_vw; +DROP TABLE agg_on_empty; diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out index 90d9f953b81..c685757e410 100644 --- a/src/test/regress/expected/window.out +++ b/src/test/regress/expected/window.out @@ -6046,3 +6046,111 @@ SELECT last_value FROM null_treatment_seq; --cleanup DROP TABLE planets CASCADE; NOTICE: drop cascades to view planets_view +-- Test ON EMPTY clause with window functions +-- create a test table for ON EMPTY tests +CREATE TABLE win_on_empty(id int, val int, grpid int); +INSERT INTO win_on_empty VALUES (1, 10, 10), (2, 20, 10), (3, 30, 20), (4, NULL, 20), (5, 50, 30); +-- Empty frames using ROWS (ON EMPTY value WILL be returned) +-- On first row, "1 PRECEDING" frame is empty - should return ON EMPTY value +SELECT id, val, + sum(val, -111 ON EMPTY) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS sum_prev_row +FROM win_on_empty ORDER BY id; + id | val | sum_prev_row +----+-----+-------------- + 1 | 10 | -111 + 2 | 20 | 10 + 3 | 30 | 20 + 4 | | 30 + 5 | 50 | +(5 rows) + +-- Empty frames using ROWS FOLLOWING (ON EMPTY value WILL be returned) +-- On last rows, FOLLOWING frame is empty - should return ON EMPTY value +SELECT id, val, + sum(val, -222 ON EMPTY) OVER (ORDER BY id ASC ROWS BETWEEN 1 FOLLOWING AND 2 FOLLOWING) AS sum_following +FROM win_on_empty ORDER BY id; + id | val | sum_following +----+-----+--------------- + 1 | 10 | 50 + 2 | 20 | 30 + 3 | 30 | 50 + 4 | | 50 + 5 | 50 | -222 +(5 rows) + +SELECT id, val, + sum(val, -222 ON EMPTY) OVER (ORDER BY id DESC ROWS BETWEEN 1 FOLLOWING AND 2 FOLLOWING) AS sum_following +FROM win_on_empty ORDER BY id; + id | val | sum_following +----+-----+--------------- + 1 | 10 | -222 + 2 | 20 | 10 + 3 | 30 | 30 + 4 | | 50 + 5 | 50 | 30 +(5 rows) + +-- Frames with only NULL values (ON EMPTY value WILL NOT be returned) +-- Row 4 has NULL, when frame contains only that row, should NOT return ON EMPTY value +SELECT id, val, + sum(val, -333 ON EMPTY) OVER (ORDER BY id ROWS BETWEEN CURRENT ROW AND CURRENT ROW) AS sum_current, + avg(val, -3.0 ON EMPTY) OVER (ORDER BY id ROWS BETWEEN CURRENT ROW AND CURRENT ROW) AS avg_current +FROM win_on_empty WHERE id = 4 ORDER BY id; + id | val | sum_current | avg_current +----+-----+-------------+------------- + 4 | | | +(1 row) + +-- Partition with only NULL values (ON EMPTY value WILL NOT be returned) +-- Add more rows with NULLs to demonstrate +INSERT INTO win_on_empty VALUES (6, NULL, 40), (7, NULL, 40); +SELECT grpid, id, val, + sum(val, -444 ON EMPTY) OVER (PARTITION BY grpid ORDER BY id) as sum_nulls, + avg(val, -4.0 ON EMPTY) OVER (PARTITION BY grpid ORDER BY id) as avg_nulls, + max(val, -999 ON EMPTY) OVER (PARTITION BY grpid ORDER BY id) as max_nulls +FROM win_on_empty WHERE grpid = 40 ORDER BY id; + grpid | id | val | sum_nulls | avg_nulls | max_nulls +-------+----+-----+-----------+-----------+----------- + 40 | 6 | | | | + 40 | 7 | | | | +(2 rows) + +CREATE VIEW win_on_empty_vw AS + SELECT id, val, + sum(val, -111 ON EMPTY) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS sum_prev_row + FROM win_on_empty ORDER BY id; +\d+ win_on_empty_vw + View "public.win_on_empty_vw" + Column | Type | Collation | Nullable | Default | Storage | Description +--------------+---------+-----------+----------+---------+---------+------------- + id | integer | | | | plain | + val | integer | | | | plain | + sum_prev_row | bigint | | | | plain | +View definition: + SELECT id, + val, + sum(val, '-111'::integer ON EMPTY) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS sum_prev_row + FROM win_on_empty + ORDER BY id; + +SELECT * FROM win_on_empty_vw ORDER BY id; + id | val | sum_prev_row +----+-----+-------------- + 1 | 10 | -111 + 2 | 20 | 10 + 3 | 30 | 20 + 4 | | 30 + 5 | 50 | + 6 | | 50 + 7 | | +(7 rows) + +-- ON EMPTY is only meaningful for aggregates; reject it for non-aggregate +-- window functions (should fail) +SELECT lag(val, 1, 0 ON EMPTY) OVER (ORDER BY id) FROM win_on_empty; +ERROR: ON EMPTY is not implemented for non-aggregate window functions +LINE 1: SELECT lag(val, 1, 0 ON EMPTY) OVER (ORDER BY id) FROM win_o... + ^ +-- clean up +DROP VIEW win_on_empty_vw; +DROP TABLE win_on_empty; diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 916383db927..78a595e1354 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -1798,3 +1798,201 @@ drop table agg_hash_1; drop table agg_hash_2; drop table agg_hash_3; drop table agg_hash_4; + + +-- Test ON EMPTY clause for aggregates + +-- create a test table for ON EMPTY tests +CREATE TABLE agg_on_empty(id int, val int, grpid int); + +-- with empty table +SELECT sum(val, -1 ON EMPTY) FROM agg_on_empty; + +INSERT INTO agg_on_empty VALUES (1, 10, 10), (2, 20, 10), (3, 30, 20), (4, NULL, 20), (5, 50, 30); + +-- basic ON EMPTY tests with numeric default +SELECT sum(val, 0 ON EMPTY) FROM agg_on_empty WHERE grpid = 40; +SELECT sum(val, -1 ON EMPTY) FROM agg_on_empty WHERE grpid = 40; +SELECT avg(val, 0 ON EMPTY) FROM agg_on_empty WHERE grpid = 40; +SELECT max(val, 999 ON EMPTY) FROM agg_on_empty WHERE grpid = 40; +SELECT min(val, -999 ON EMPTY) FROM agg_on_empty WHERE grpid = 40; +SELECT count(val, 0 ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + +-- with non-empty result set (should use actual aggregate result) +SELECT sum(val, 0 ON EMPTY) FROM agg_on_empty WHERE grpid = 10; +SELECT avg(val, 0 ON EMPTY) FROM agg_on_empty WHERE grpid = 10; +SELECT max(val, 999 ON EMPTY) FROM agg_on_empty WHERE grpid = 10; +SELECT min(val, -999 ON EMPTY) FROM agg_on_empty WHERE grpid = 10; + +-- with string default +SELECT string_agg(grpid::text, ',', 'NONE' ON EMPTY) FROM agg_on_empty WHERE grpid = 40; +SELECT string_agg(grpid::text, ',', 'NONE' ON EMPTY) FROM agg_on_empty WHERE grpid = 10; + +-- with NULL default +SELECT sum(val, NULL ON EMPTY) FROM agg_on_empty WHERE grpid = 40; +SELECT avg(val, NULL ON EMPTY) FROM agg_on_empty WHERE grpid = 40; +SELECT string_agg(grpid::text, ',', NULL ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + +-- with type coercion (integer to numeric) +SELECT sum(val::numeric, 0 ON EMPTY) FROM agg_on_empty WHERE grpid = 40; +SELECT sum(val, 0.5 ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + +-- with GROUP BY +SELECT grpid, sum(val, 0 ON EMPTY) FROM agg_on_empty GROUP BY grpid ORDER BY grpid; + +-- with GROUP BY and empty groups +SELECT g, sum(val, -1 ON EMPTY) FROM (VALUES (100), (200), (10), (20)) AS groups(g) + LEFT JOIN agg_on_empty ON groups.g = agg_on_empty.grpid + GROUP BY g ORDER BY g; + +-- with grouping sets: per-group state is reset across grouping-set phases, so +-- ON EMPTY must not fire spuriously for groups that do have rows +SELECT grpid, sum(val, -1 ON EMPTY) FROM agg_on_empty + GROUP BY ROLLUP(grpid) ORDER BY grpid; + +-- with grouping sets where a FILTER empties one group: ON EMPTY fires only for +-- that group (checked under both sorted and hashed grouping-set execution) +SELECT grpid, sum(val, -1 ON EMPTY) FILTER (WHERE val > 25) FROM agg_on_empty + GROUP BY GROUPING SETS ((grpid), ()) ORDER BY grpid; +SET enable_sort = off; +SELECT grpid, sum(val, -1 ON EMPTY) FILTER (WHERE val > 25) FROM agg_on_empty + GROUP BY GROUPING SETS ((grpid), ()) ORDER BY grpid; +RESET enable_sort; + +-- multi-dimensional CUBE with a FILTER: ON EMPTY fires independently for each +-- empty cell (including sub-aggregates), while populated cells return real sums +SELECT grpid, id % 2 AS parity, sum(val, -1 ON EMPTY) FILTER (WHERE val > 25) AS s + FROM agg_on_empty GROUP BY CUBE(grpid, id % 2) ORDER BY grpid, parity; + +-- with FILTER clause +SELECT sum(val, 100 ON EMPTY) FILTER (WHERE val > 100) FROM agg_on_empty; +SELECT count(val, 0 ON EMPTY) FILTER (WHERE val > 100) FROM agg_on_empty; + +-- with ORDER BY in aggregate +SELECT avg(val, -999 ON EMPTY ORDER BY val) FROM agg_on_empty WHERE grpid = 40; +SELECT avg(val, -999 ON EMPTY ORDER BY val) FROM agg_on_empty WHERE grpid = 10; + +-- under a forced parallel plan: ON EMPTY must work whether or not the +-- aggregate is parallelized (such aggregates are not partially aggregated). +SET max_parallel_workers_per_gather = 4; +SET parallel_setup_cost = 0; +SET parallel_tuple_cost = 0; +SET min_parallel_table_scan_size = 0; +SELECT sum(val, -1 ON EMPTY) FROM agg_on_empty WHERE grpid = 40; -- default +SELECT sum(val, -1 ON EMPTY) FROM agg_on_empty WHERE grpid = 10; -- real result +RESET max_parallel_workers_per_gather; +RESET parallel_setup_cost; +RESET parallel_tuple_cost; +RESET min_parallel_table_scan_size; + +-- Error cases for ON EMPTY + +-- with complex expressions (should fail - must be constant) +SELECT sum(val, val ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + +-- with type mismatch (should fail) +SELECT sum(val, 'invalid' ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + +-- with non-aggregate function (should fail) +SELECT length('test', 'default' ON EMPTY); + +-- with a subquery (should fail) +SELECT sum(val, (SELECT 1) ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + +-- with a volatile function (should fail) +SELECT sum(val, random()::int ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + +-- with a set-returning function (should fail) +SELECT sum(val, generate_series(1, 2) ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + +-- with a nested aggregate (should fail) +SELECT sum(val, sum(val) ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + +-- with a window function (should fail) +SELECT sum(val, row_number() OVER () ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + +-- combined with DISTINCT (should fail - not supported by the grammar) +SELECT sum(DISTINCT val, 0 ON EMPTY) FROM agg_on_empty WHERE grpid = 40; + +-- combined with an ordered-set aggregate (WITHIN GROUP): the ON EMPTY value +-- is written before WITHIN GROUP and is returned for an empty input set +SELECT percentile_cont(0.5, -7 ON EMPTY) WITHIN GROUP (ORDER BY val) + FROM agg_on_empty WHERE grpid = 40; -- empty -> default +SELECT percentile_cont(0.5, -7 ON EMPTY) WITHIN GROUP (ORDER BY val) + FROM agg_on_empty WHERE grpid = 10; -- real result + +-- and it round-trips through view deparsing +CREATE VIEW agg_on_empty_os_vw AS + SELECT percentile_cont(0.5, -7 ON EMPTY) WITHIN GROUP (ORDER BY val) AS p + FROM agg_on_empty; +SELECT pg_get_viewdef('agg_on_empty_os_vw'); +DROP VIEW agg_on_empty_os_vw; + +-- deparsing of ON EMPTY in views +CREATE VIEW agg_on_empty_vw AS + SELECT grpid, sum(val, 0 ON EMPTY ORDER BY val) AS total, + avg(val, 0 ON EMPTY) AS average, count(val, 0 ON EMPTY) AS cnt + FROM agg_on_empty GROUP BY grpid; + +\d+ agg_on_empty_vw + +SELECT * FROM agg_on_empty_vw ORDER BY grpid; + +-- with prepared statements (must use literal constants) +PREPARE on_empty_test(int) AS + SELECT sum(val, 42 ON EMPTY) FROM agg_on_empty WHERE grpid = $1; + +EXECUTE on_empty_test(40); +EXECUTE on_empty_test(10); + +DEALLOCATE on_empty_test; + +-- with HAVING clause +SELECT grpid, sum(val, 0 ON EMPTY) AS total FROM agg_on_empty + GROUP BY grpid HAVING sum(val, 0 ON EMPTY) > 20 ORDER BY grpid; + +-- ON EMPTY must NOT fire for a non-empty group whose rows are all NULL: such +-- a group has processed rows, so the normal (NULL) result applies. This is +-- exercised against a strict transition function (max/min/sum of int are +-- handled differently, so test a strict one explicitly) and across the +-- sorted, hashed and grouping-set execution paths. +INSERT INTO agg_on_empty VALUES (6, NULL, 50), (7, NULL, 50); + +-- strict transition function (max/min) over an all-NULL, non-empty group +SELECT max(val, -1 ON EMPTY) FROM agg_on_empty WHERE grpid = 50; -- NULL, not -1 +SELECT min(val, -1 ON EMPTY) FROM agg_on_empty WHERE grpid = 50; -- NULL, not -1 + +-- and per group, under both sorted and hashed aggregation +SELECT grpid, max(val, -1 ON EMPTY) FROM agg_on_empty + GROUP BY grpid ORDER BY grpid; +SET enable_sort = off; SET enable_hashagg = on; +SELECT grpid, max(val, -1 ON EMPTY) FROM agg_on_empty + GROUP BY grpid ORDER BY grpid; +RESET enable_sort; RESET enable_hashagg; + +-- and under grouping sets +SELECT grpid, max(val, -1 ON EMPTY) FROM agg_on_empty + GROUP BY ROLLUP(grpid) ORDER BY grpid; + +-- a strict ordered-set aggregate over an all-NULL group also processes rows, +-- so ON EMPTY is not used (NULL); a truly empty input still uses it +SELECT percentile_cont(0.5, -7 ON EMPTY) WITHIN GROUP (ORDER BY val) + FROM agg_on_empty WHERE grpid = 50; -- NULL +SELECT percentile_cont(0.5, -7 ON EMPTY) WITHIN GROUP (ORDER BY val) + FROM agg_on_empty WHERE grpid = 60; -- empty -> -7 + +-- the all-NULL behavior is the same for a window aggregate, which uses ON +-- EMPTY only for a row whose frame contains no rows +SELECT val, max(val, -1 ON EMPTY) OVER () AS m + FROM agg_on_empty WHERE grpid = 50 ORDER BY id; + +-- ON EMPTY must be a constant: a correlated reference to an outer query's +-- column is not constant and must be rejected (it is not caught by a +-- current-level-only Var check) +SELECT (SELECT max(inner_t.val, agg_on_empty.val ON EMPTY) + FROM agg_on_empty inner_t WHERE false) + FROM agg_on_empty WHERE grpid = 10; + +-- clean up +DROP VIEW agg_on_empty_vw; +DROP TABLE agg_on_empty; diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql index 5ac3a486e16..4e66a83fb78 100644 --- a/src/test/regress/sql/window.sql +++ b/src/test/regress/sql/window.sql @@ -2190,3 +2190,58 @@ SELECT last_value FROM null_treatment_seq; --cleanup DROP TABLE planets CASCADE; + + +-- Test ON EMPTY clause with window functions + +-- create a test table for ON EMPTY tests +CREATE TABLE win_on_empty(id int, val int, grpid int); +INSERT INTO win_on_empty VALUES (1, 10, 10), (2, 20, 10), (3, 30, 20), (4, NULL, 20), (5, 50, 30); + +-- Empty frames using ROWS (ON EMPTY value WILL be returned) +-- On first row, "1 PRECEDING" frame is empty - should return ON EMPTY value +SELECT id, val, + sum(val, -111 ON EMPTY) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS sum_prev_row +FROM win_on_empty ORDER BY id; + +-- Empty frames using ROWS FOLLOWING (ON EMPTY value WILL be returned) +-- On last rows, FOLLOWING frame is empty - should return ON EMPTY value +SELECT id, val, + sum(val, -222 ON EMPTY) OVER (ORDER BY id ASC ROWS BETWEEN 1 FOLLOWING AND 2 FOLLOWING) AS sum_following +FROM win_on_empty ORDER BY id; +SELECT id, val, + sum(val, -222 ON EMPTY) OVER (ORDER BY id DESC ROWS BETWEEN 1 FOLLOWING AND 2 FOLLOWING) AS sum_following +FROM win_on_empty ORDER BY id; + +-- Frames with only NULL values (ON EMPTY value WILL NOT be returned) +-- Row 4 has NULL, when frame contains only that row, should NOT return ON EMPTY value +SELECT id, val, + sum(val, -333 ON EMPTY) OVER (ORDER BY id ROWS BETWEEN CURRENT ROW AND CURRENT ROW) AS sum_current, + avg(val, -3.0 ON EMPTY) OVER (ORDER BY id ROWS BETWEEN CURRENT ROW AND CURRENT ROW) AS avg_current +FROM win_on_empty WHERE id = 4 ORDER BY id; + +-- Partition with only NULL values (ON EMPTY value WILL NOT be returned) +-- Add more rows with NULLs to demonstrate +INSERT INTO win_on_empty VALUES (6, NULL, 40), (7, NULL, 40); +SELECT grpid, id, val, + sum(val, -444 ON EMPTY) OVER (PARTITION BY grpid ORDER BY id) as sum_nulls, + avg(val, -4.0 ON EMPTY) OVER (PARTITION BY grpid ORDER BY id) as avg_nulls, + max(val, -999 ON EMPTY) OVER (PARTITION BY grpid ORDER BY id) as max_nulls +FROM win_on_empty WHERE grpid = 40 ORDER BY id; + +CREATE VIEW win_on_empty_vw AS + SELECT id, val, + sum(val, -111 ON EMPTY) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS sum_prev_row + FROM win_on_empty ORDER BY id; + +\d+ win_on_empty_vw + +SELECT * FROM win_on_empty_vw ORDER BY id; + +-- ON EMPTY is only meaningful for aggregates; reject it for non-aggregate +-- window functions (should fail) +SELECT lag(val, 1, 0 ON EMPTY) OVER (ORDER BY id) FROM win_on_empty; + +-- clean up +DROP VIEW win_on_empty_vw; +DROP TABLE win_on_empty; -- 2.43.0