From 1ebfac68080b10181086b4c9c9dcb3e3e1582cdb Mon Sep 17 00:00:00 2001 From: David Rowley Date: Mon, 3 May 2021 16:31:29 +1200 Subject: [PATCH v8] Add planner support for ORDER BY aggregates ORDER BY aggreagtes have, since implemented in Postgres, been executed by always performing a sort in nodeAgg.c to sort the tuples in the current group into the correct order before calling the transition function on the sorted results. This was not great as often there might be an index that could have provided pre-sorted input and allowed the transition functions to be called as the rows come in, rather than having to store them in a tuplestore in order to sort them later. Here we get the planner on-board with picking a plan that provides pre-sorted inputs to ORDER BY aggregates. Since there can be many ORDER BY aggregates in any given query level, it's very possible that we can't find an order that suits all aggregates. The sort order the the planner chooses is simply the one that suits the most aggregate functions. We take the most strictly sorted variation of each order and see how many aggregate functions can use that, then we try again with the order of the remaining aggregates to see if another order would suit more aggregate functions. For example: SELECT agg(a ORDER BY a),agg2(a ORDER BY a,b) ... would request the sort order to be {a, b} because {a} is a subset of the sort order of {a,b}, but; SELECT agg(a ORDER BY a),agg2(a ORDER BY c) ... would just pick a plan ordered by {a}. SELECT agg(a ORDER BY a),agg2(a ORDER BY b),agg3(a ORDER BY b) ... would choose to order by {b} since two aggregates suit that vs just one that requires order by {a}. Discussion: https://postgr.es/m/CAApHDvpHzfo92%3DR4W0%2BxVua3BUYCKMckWAmo-2t_KiXN-wYH%3Dw%40mail.gmail.com --- .../postgres_fdw/expected/postgres_fdw.out | 46 +-- src/backend/executor/execExpr.c | 52 ++- src/backend/executor/execExprInterp.c | 102 ++++++ src/backend/executor/nodeAgg.c | 34 +- src/backend/jit/llvm/llvmjit_expr.c | 48 +++ src/backend/jit/llvm/llvmjit_types.c | 2 + src/backend/optimizer/path/pathkeys.c | 44 ++- src/backend/optimizer/plan/planagg.c | 2 +- src/backend/optimizer/plan/planner.c | 306 ++++++++++++++++-- src/backend/optimizer/prep/prepagg.c | 7 +- src/include/executor/execExpr.h | 17 + src/include/executor/nodeAgg.h | 9 + src/include/nodes/pathnodes.h | 16 +- src/include/nodes/primnodes.h | 7 + src/include/optimizer/paths.h | 4 +- src/test/regress/expected/aggregates.out | 80 ++++- .../regress/expected/partition_aggregate.out | 118 +++---- src/test/regress/expected/sqljson.out | 12 +- src/test/regress/expected/tuplesort.out | 40 +-- src/test/regress/sql/aggregates.sql | 43 +++ 20 files changed, 845 insertions(+), 144 deletions(-) diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index ade797159d..a42e16e8d2 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -3295,15 +3295,18 @@ create operator class my_op_class for type int using btree family my_op_family a -- extension yet. explain (verbose, costs off) select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2; - QUERY PLAN --------------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------------------- GroupAggregate Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2 Group Key: ft2.c2 - -> Foreign Scan on public.ft2 - Output: c1, c2 - Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6)) -(6 rows) + -> Sort + Output: c2, c1 + Sort Key: ft2.c1 USING <^ + -> Foreign Scan on public.ft2 + Output: c2, c1 + Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6)) +(9 rows) -- This should not be pushed either. explain (verbose, costs off) @@ -3331,13 +3334,15 @@ alter server loopback options (set extensions 'postgres_fdw'); -- Now this will be pushed as sort operator is part of the extension. explain (verbose, costs off) select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------- - Foreign Scan - Output: (array_agg(c1 ORDER BY c1 USING <^ NULLS LAST)), c2 - Relations: Aggregate on (public.ft2) - Remote SQL: SELECT array_agg("C 1" ORDER BY "C 1" USING OPERATOR(public.<^) NULLS LAST), c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6)) GROUP BY 2 -(4 rows) + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------ + GroupAggregate + Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2 + Group Key: ft2.c2 + -> Foreign Scan on public.ft2 + Output: c1, c2 + Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6)) ORDER BY "C 1" USING OPERATOR(public.<^) NULLS LAST +(6 rows) select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2; array_agg @@ -3366,15 +3371,18 @@ alter server loopback options (set extensions 'postgres_fdw'); -- This will not be pushed as sort operator is now removed from the extension. explain (verbose, costs off) select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2; - QUERY PLAN --------------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------------------- GroupAggregate Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2 Group Key: ft2.c2 - -> Foreign Scan on public.ft2 - Output: c1, c2 - Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6)) -(6 rows) + -> Sort + Output: c2, c1 + Sort Key: ft2.c1 USING <^ + -> Foreign Scan on public.ft2 + Output: c2, c1 + Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6)) +(9 rows) -- Cleanup drop operator class my_op_class using btree; diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index c8d7145fe3..d0a57c7aae 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -3666,13 +3666,17 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase, scratch.resnull = &state->resnull; } argno++; + + Assert(pertrans->numInputs == argno); } - else if (pertrans->numSortCols == 0) + else if (!pertrans->aggsortrequired) { ListCell *arg; /* - * Normal transition function without ORDER BY / DISTINCT. + * Normal transition function without ORDER BY / DISTINCT or with + * ORDER BY / DISTINCT but the planner has given us pre-sorted + * input. */ strictargs = trans_fcinfo->args + 1; @@ -3680,6 +3684,13 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase, { TargetEntry *source_tle = (TargetEntry *) lfirst(arg); + /* + * Don't initialize args for any ORDER BY clause that might + * exist in a presorted aggregate. + */ + if (argno == pertrans->numTransInputs) + break; + /* * Start from 1, since the 0th arg will be the transition * value @@ -3689,11 +3700,13 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase, &trans_fcinfo->args[argno + 1].isnull); argno++; } + Assert(pertrans->numTransInputs == argno); } else if (pertrans->numInputs == 1) { /* - * DISTINCT and/or ORDER BY case, with a single column sorted on. + * Non-presorted DISTINCT and/or ORDER BY case, with a single + * column sorted on. */ TargetEntry *source_tle = (TargetEntry *) linitial(pertrans->aggref->args); @@ -3705,11 +3718,14 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase, &state->resnull); strictnulls = &state->resnull; argno++; + + Assert(pertrans->numInputs == argno); } else { /* - * DISTINCT and/or ORDER BY case, with multiple columns sorted on. + * Non-presorted DISTINCT and/or ORDER BY case, with multiple + * columns sorted on. */ Datum *values = pertrans->sortslot->tts_values; bool *nulls = pertrans->sortslot->tts_isnull; @@ -3725,8 +3741,8 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase, &values[argno], &nulls[argno]); argno++; } + Assert(pertrans->numInputs == argno); } - Assert(pertrans->numInputs == argno); /* * For a strict transfn, nothing happens when there's a NULL input; we @@ -3748,6 +3764,21 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase, state->steps_len - 1); } + /* Handle DISTINCT aggregates which have pre-sorted input */ + if (pertrans->numDistinctCols > 0 && !pertrans->aggsortrequired) + { + if (pertrans->numDistinctCols > 1) + scratch.opcode = EEOP_AGG_PRESORTED_DISTINCT_MULTI; + else + scratch.opcode = EEOP_AGG_PRESORTED_DISTINCT_SINGLE; + + scratch.d.agg_presorted_distinctcheck.pertrans = pertrans; + scratch.d.agg_presorted_distinctcheck.jumpdistinct = -1; /* adjust later */ + ExprEvalPushStep(state, &scratch); + adjust_bailout = lappend_int(adjust_bailout, + state->steps_len - 1); + } + /* * Call transition function (once for each concurrently evaluated * grouping set). Do so for both sort and hash based computations, as @@ -3808,6 +3839,12 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase, Assert(as->d.agg_deserialize.jumpnull == -1); as->d.agg_deserialize.jumpnull = state->steps_len; } + else if (as->opcode == EEOP_AGG_PRESORTED_DISTINCT_SINGLE || + as->opcode == EEOP_AGG_PRESORTED_DISTINCT_MULTI) + { + Assert(as->d.agg_presorted_distinctcheck.jumpdistinct == -1); + as->d.agg_presorted_distinctcheck.jumpdistinct = state->steps_len; + } else Assert(false); } @@ -3857,7 +3894,8 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate, /* * Determine appropriate transition implementation. * - * For non-ordered aggregates: + * For non-ordered aggregates and ORDER BY / DISTINCT aggregates with + * presorted input: * * If the initial value for the transition state doesn't exist in the * pg_aggregate table then we will let the first non-NULL value returned @@ -3887,7 +3925,7 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate, * process_ordered_aggregate_{single, multi} and * advance_transition_function. */ - if (pertrans->numSortCols == 0) + if (!pertrans->aggsortrequired) { if (pertrans->transtypeByVal) { diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 723770fda0..e8d77929a9 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -502,6 +502,8 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull) &&CASE_EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYREF, &&CASE_EEOP_AGG_PLAIN_TRANS_STRICT_BYREF, &&CASE_EEOP_AGG_PLAIN_TRANS_BYREF, + &&CASE_EEOP_AGG_PRESORTED_DISTINCT_SINGLE, + &&CASE_EEOP_AGG_PRESORTED_DISTINCT_MULTI, &&CASE_EEOP_AGG_ORDERED_TRANS_DATUM, &&CASE_EEOP_AGG_ORDERED_TRANS_TUPLE, &&CASE_EEOP_LAST @@ -1786,6 +1788,28 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull) EEO_NEXT(); } + EEO_CASE(EEOP_AGG_PRESORTED_DISTINCT_SINGLE) + { + AggStatePerTrans pertrans = op->d.agg_presorted_distinctcheck.pertrans; + AggState *aggstate = castNode(AggState, state->parent); + + if (ExecEvalPreOrderedDistinctSingle(aggstate, pertrans)) + EEO_NEXT(); + else + EEO_JUMP(op->d.agg_presorted_distinctcheck.jumpdistinct); + } + + EEO_CASE(EEOP_AGG_PRESORTED_DISTINCT_MULTI) + { + AggState *aggstate = castNode(AggState, state->parent); + AggStatePerTrans pertrans = op->d.agg_presorted_distinctcheck.pertrans; + + if (ExecEvalPreOrderedDistinctMulti(aggstate, pertrans)) + EEO_NEXT(); + else + EEO_JUMP(op->d.agg_presorted_distinctcheck.jumpdistinct); + } + /* process single-column ordered aggregate datum */ EEO_CASE(EEOP_AGG_ORDERED_TRANS_DATUM) { @@ -4402,6 +4426,84 @@ ExecAggTransReparent(AggState *aggstate, AggStatePerTrans pertrans, return newValue; } +/* + * ExecEvalPreOrderedDistinctSingle + * Returns true when the aggregate transition value Datum is distinct + * from the previous input Datum and returns false when the input Datum + * matches the previous input Datum. + */ +bool +ExecEvalPreOrderedDistinctSingle(AggState *aggstate, AggStatePerTrans pertrans) +{ + Datum value = pertrans->transfn_fcinfo->args[1].value; + bool isnull = pertrans->transfn_fcinfo->args[1].isnull; + + if (!pertrans->haslast || + pertrans->lastisnull != isnull || + !DatumGetBool(FunctionCall2Coll(&pertrans->equalfnOne, + pertrans->aggCollation, + pertrans->lastdatum, value))) + { + if (pertrans->haslast && !pertrans->inputtypeByVal) + pfree(DatumGetPointer(pertrans->lastdatum)); + + pertrans->haslast = true; + if (!isnull) + { + MemoryContext oldContext; + + oldContext = MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory); + + pertrans->lastdatum = datumCopy(value, pertrans->inputtypeByVal, + pertrans->inputtypeLen); + + MemoryContextSwitchTo(oldContext); + } + else + pertrans->lastdatum = (Datum) 0; + pertrans->lastisnull = isnull; + return true; + } + + return false; +} + +/* + * ExecEvalPreOrderedDistinctMulti + * Returns true when the aggregate input is distinct from the previous + * input and returns false when the input matches the previous input. + */ +bool +ExecEvalPreOrderedDistinctMulti(AggState *aggstate, AggStatePerTrans pertrans) +{ + ExprContext *tmpcontext = aggstate->tmpcontext; + + for (int i = 0; i < pertrans->numTransInputs; i++) + { + pertrans->sortslot->tts_values[i] = pertrans->transfn_fcinfo->args[i + 1].value; + pertrans->sortslot->tts_isnull[i] = pertrans->transfn_fcinfo->args[i + 1].isnull; + } + + ExecClearTuple(pertrans->sortslot); + pertrans->sortslot->tts_nvalid = pertrans->numInputs; + ExecStoreVirtualTuple(pertrans->sortslot); + + tmpcontext->ecxt_outertuple = pertrans->sortslot; + tmpcontext->ecxt_innertuple = pertrans->uniqslot; + + if (!pertrans->haslast || + !ExecQual(pertrans->equalfnMulti, tmpcontext)) + { + if (pertrans->haslast) + ExecClearTuple(pertrans->uniqslot); + + pertrans->haslast = true; + ExecCopySlot(pertrans->uniqslot, pertrans->sortslot); + return true; + } + return false; +} + /* * Invoke ordered transition function, with a datum argument. */ diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 2fc606cf29..96d200e446 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -583,7 +583,7 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans, /* * Start a fresh sort operation for each DISTINCT/ORDER BY aggregate. */ - if (pertrans->numSortCols > 0) + if (pertrans->aggsortrequired) { /* * In case of rescan, maybe there could be an uncompleted sort @@ -1309,7 +1309,7 @@ finalize_aggregates(AggState *aggstate, pergroupstate = &pergroup[transno]; - if (pertrans->numSortCols > 0) + if (pertrans->aggsortrequired) { Assert(aggstate->aggstrategy != AGG_HASHED && aggstate->aggstrategy != AGG_MIXED); @@ -1323,6 +1323,21 @@ finalize_aggregates(AggState *aggstate, pertrans, pergroupstate); } + else if (pertrans->numDistinctCols > 0 && pertrans->haslast) + { + pertrans->haslast = false; + + if (pertrans->numDistinctCols == 1) + { + if (!pertrans->inputtypeByVal && !pertrans->lastisnull) + pfree(DatumGetPointer(pertrans->lastdatum)); + + pertrans->lastisnull = false; + pertrans->lastdatum = (Datum) 0; + } + else + ExecClearTuple(pertrans->uniqslot); + } } /* @@ -4127,6 +4142,12 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans, * stick them into arrays. We ignore ORDER BY for an ordered-set agg, * however; the agg's transfn and finalfn are responsible for that. * + * When the planner has set the aggpresorted flag, the input to the + * aggregate is already correctly sorted. For ORDER BY aggregates we can + * simply treat these as normal aggregates. For presorted DISTINCT + * aggregates an extra step must be added to remove duplicate consecutive + * inputs. + * * Note that by construction, if there is a DISTINCT clause then the ORDER * BY clause is a prefix of it (see transformDistinctClause). */ @@ -4134,18 +4155,27 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans, { sortlist = NIL; numSortCols = numDistinctCols = 0; + pertrans->aggsortrequired = false; + } + else if (aggref->aggpresorted && aggref->aggdistinct == NIL) + { + sortlist = NIL; + numSortCols = numDistinctCols = 0; + pertrans->aggsortrequired = false; } else if (aggref->aggdistinct) { sortlist = aggref->aggdistinct; numSortCols = numDistinctCols = list_length(sortlist); Assert(numSortCols >= list_length(aggref->aggorder)); + pertrans->aggsortrequired = !aggref->aggpresorted; } else { sortlist = aggref->aggorder; numSortCols = list_length(sortlist); numDistinctCols = 0; + pertrans->aggsortrequired = (numSortCols > 0); } pertrans->numSortCols = numSortCols; diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c index b6b6512ef1..c6eb2e9207 100644 --- a/src/backend/jit/llvm/llvmjit_expr.c +++ b/src/backend/jit/llvm/llvmjit_expr.c @@ -2335,6 +2335,54 @@ llvm_compile_expr(ExprState *state) break; } + case EEOP_AGG_PRESORTED_DISTINCT_SINGLE: + { + AggState *aggstate = castNode(AggState, state->parent); + AggStatePerTrans pertrans = op->d.agg_presorted_distinctcheck.pertrans; + int jumpdistinct = op->d.agg_presorted_distinctcheck.jumpdistinct; + + LLVMValueRef v_fn = llvm_pg_func(mod, "ExecEvalPreOrderedDistinctSingle"); + LLVMValueRef v_args[2]; + LLVMValueRef v_ret; + + v_args[0] = l_ptr_const(aggstate, l_ptr(StructAggState)); + v_args[1] = l_ptr_const(pertrans, l_ptr(StructAggStatePerTransData)); + + v_ret = LLVMBuildCall(b, v_fn, v_args, 2, ""); + v_ret = LLVMBuildZExt(b, v_ret, TypeStorageBool, ""); + + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntEQ, v_ret, + l_sbool_const(1), ""), + opblocks[opno + 1], + opblocks[jumpdistinct]); + break; + } + + case EEOP_AGG_PRESORTED_DISTINCT_MULTI: + { + AggState *aggstate = castNode(AggState, state->parent); + AggStatePerTrans pertrans = op->d.agg_presorted_distinctcheck.pertrans; + int jumpdistinct = op->d.agg_presorted_distinctcheck.jumpdistinct; + + LLVMValueRef v_fn = llvm_pg_func(mod, "ExecEvalPreOrderedDistinctMulti"); + LLVMValueRef v_args[2]; + LLVMValueRef v_ret; + + v_args[0] = l_ptr_const(aggstate, l_ptr(StructAggState)); + v_args[1] = l_ptr_const(pertrans, l_ptr(StructAggStatePerTransData)); + + v_ret = LLVMBuildCall(b, v_fn, v_args, 2, ""); + v_ret = LLVMBuildZExt(b, v_ret, TypeStorageBool, ""); + + LLVMBuildCondBr(b, + LLVMBuildICmp(b, LLVMIntEQ, v_ret, + l_sbool_const(1), ""), + opblocks[opno + 1], + opblocks[jumpdistinct]); + break; + } + case EEOP_AGG_ORDERED_TRANS_DATUM: build_EvalXFunc(b, mod, "ExecEvalAggOrderedTransDatum", v_state, op, v_econtext); diff --git a/src/backend/jit/llvm/llvmjit_types.c b/src/backend/jit/llvm/llvmjit_types.c index b2bda86889..373471ad27 100644 --- a/src/backend/jit/llvm/llvmjit_types.c +++ b/src/backend/jit/llvm/llvmjit_types.c @@ -103,6 +103,8 @@ void *referenced_functions[] = { ExecAggInitGroup, ExecAggTransReparent, + ExecEvalPreOrderedDistinctSingle, + ExecEvalPreOrderedDistinctMulti, ExecEvalAggOrderedTransDatum, ExecEvalAggOrderedTransTuple, ExecEvalArrayCoerce, diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index b5d6c977ee..11de286e60 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -103,6 +103,27 @@ make_canonical_pathkey(PlannerInfo *root, return pk; } +/* + * append_pathkeys + * Append all non-redundant PathKeys in 'source' onto 'target' + */ +List * +append_pathkeys(List *target, List *source) +{ + ListCell *lc; + + Assert(target != NIL); + + foreach(lc, source) + { + PathKey *pk = lfirst_node(PathKey, lc); + + if (!pathkey_is_redundant(pk, target)) + target = lappend(target, pk); + } + return target; +} + /* * pathkey_is_redundant * Is a pathkey redundant with one already in the given list? @@ -746,7 +767,8 @@ get_cheapest_group_keys_order(PlannerInfo *root, double nrows, List * get_useful_group_keys_orderings(PlannerInfo *root, double nrows, List *path_pathkeys, - List *group_pathkeys, List *group_clauses) + List *group_pathkeys, List *group_clauses, + List *aggregate_pathkeys) { Query *parse = root->parse; List *infos = NIL; @@ -758,7 +780,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows, /* always return at least the original pathkeys/clauses */ info = makeNode(PathKeyInfo); - info->pathkeys = pathkeys; + if (aggregate_pathkeys != NIL) + info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys); + else + info->pathkeys = pathkeys; info->clauses = clauses; infos = lappend(infos, info); @@ -783,7 +808,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows, n_preordered)) { info = makeNode(PathKeyInfo); - info->pathkeys = pathkeys; + if (aggregate_pathkeys != NIL) + info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys); + else + info->pathkeys = pathkeys; info->clauses = clauses; infos = lappend(infos, info); @@ -822,7 +850,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows, * still want to keep the keys reordered to path_pathkeys. */ info = makeNode(PathKeyInfo); - info->pathkeys = pathkeys; + if (aggregate_pathkeys != NIL) + info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys); + else + info->pathkeys = pathkeys; info->clauses = clauses; infos = lappend(infos, info); @@ -850,7 +881,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows, /* keep the group keys reordered to match ordering of input path */ info = makeNode(PathKeyInfo); - info->pathkeys = pathkeys; + if (aggregate_pathkeys != NIL) + info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys); + else + info->pathkeys = pathkeys; info->clauses = clauses; infos = lappend(infos, info); diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index e0e357960f..ab3f7abba1 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -245,7 +245,7 @@ can_minmax_aggs(PlannerInfo *root, List **context) foreach(lc, root->agginfos) { AggInfo *agginfo = lfirst_node(AggInfo, lc); - Aggref *aggref = agginfo->representative_aggref; + Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs); Oid aggsortop; TargetEntry *curTarget; MinMaxAggInfo *mminfo; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 06ad856eac..2f7e5e2fd2 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -24,6 +24,7 @@ #include "access/sysattr.h" #include "access/table.h" #include "access/xact.h" +#include "catalog/pg_aggregate.h" #include "catalog/pg_constraint.h" #include "catalog/pg_inherits.h" #include "catalog/pg_proc.h" @@ -3125,6 +3126,217 @@ reorder_grouping_sets(List *groupingsets, List *sortclause) return result; } +/* + * make_pathkeys_for_groupagg + * Determine the pathkeys for the GROUP BY clause and/or any ordered + * aggregates. We expect at least one of these here. + * + * Building the pathkeys for the GROUP BY is simple. Most of the complexity + * involved here comes from calculating the best pathkeys for ordered + * aggregates. We define "best" as the pathkeys that suit the most number of + * aggregate functions. We find these by looking at the first ORDER BY / + * DISTINCT aggregate and taking the pathkeys for that before searching for + * other aggregates that require the same or a more strict variation of the + * same pathkeys. We then repeat that process for any remaining aggregates + * with different pathkeys and if we find another set of pathkeys that suits a + * larger number of aggregates then we return those pathkeys instead. + * + * *number_groupby_pathkeys gets set to the number of elemenents in the + * returned list that belong to the GROUP BY clause. Any elements above this + * number must belong to ORDER BY / DISTINCT aggregates. + * + * When the best pathkeys are found we also mark each Aggref that can use + * those pathkeys as aggpresorted = true. + */ +static List * +make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist, + int *number_groupby_pathkeys) +{ + List *grouppathkeys = NIL; + List *bestpathkeys; + Bitmapset *bestaggs; + Bitmapset *unprocessed_aggs; + ListCell *lc; + int i; + + Assert(groupClause != NIL || root->numOrderedAggs > 0); + + if (groupClause != NIL) + { + /* no pathkeys possible if there's an unsortable GROUP BY */ + if (!grouping_is_sortable(groupClause)) + { + *number_groupby_pathkeys = 0; + return NIL; + } + + grouppathkeys = make_pathkeys_for_sortclauses(root, groupClause, + tlist); + *number_groupby_pathkeys = list_length(grouppathkeys); + } + else + *number_groupby_pathkeys = 0; + + /* + * We can't add pathkeys for ordered aggregates if there are any grouping + * sets. All handling specific to ordered aggregates must be done by the + * executor in that case. + */ + if (root->numOrderedAggs == 0 || root->parse->groupingSets != NIL) + return grouppathkeys; + + /* + * Make a first pass over all AggInfos to collect a Bitmapset containing + * the indexes of all AggInfos to be processed below. + */ + unprocessed_aggs = NULL; + foreach(lc, root->agginfos) + { + AggInfo *agginfo = (AggInfo *) lfirst(lc); + Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs); + + if (AGGKIND_IS_ORDERED_SET(aggref->aggkind)) + continue; + + /* only add aggregates with a DISTINCT or ORDER BY */ + if (aggref->aggdistinct != NIL || aggref->aggorder != NIL) + unprocessed_aggs = bms_add_member(unprocessed_aggs, + foreach_current_index(lc)); + } + + /* + * Now process all the unprocessed_aggs to find the best set of pathkeys + * for the given set of aggregates. + * + * On the first outer loop here 'bestaggs' will be empty. We'll populate + * this during the first loop using the pathkeys for the very first + * AggInfo then taking any stronger pathkeys from any other AggInfos with + * a more strict set of compatible pathkeys. Once the outer loop is + * complete, we mark off all the aggregates with compatible pathkeys then + * remove those from the unprocessed_aggs and repeat the process to try to + * find another set of pathkeys that are suitable for a larger number of + * aggregates. The outer loop will stop when there are not enough + * unprocessed aggregates for it to be possible to find a set of pathkeys + * to suit a larger number of aggregates. + */ + bestpathkeys = NIL; + bestaggs = NULL; + while (bms_num_members(unprocessed_aggs) > bms_num_members(bestaggs)) + { + Bitmapset *aggindexes = NULL; + List *currpathkeys = NIL; + + i = -1; + while ((i = bms_next_member(unprocessed_aggs, i)) >= 0) + { + AggInfo *agginfo = list_nth(root->agginfos, i); + Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs); + List *sortlist; + + if (aggref->aggdistinct != NIL) + sortlist = aggref->aggdistinct; + else + sortlist = aggref->aggorder; + + /* + * When not set yet, take the pathkeys from the first unprocessed + * aggregate. + */ + if (currpathkeys == NIL) + { + currpathkeys = make_pathkeys_for_sortclauses(root, sortlist, + aggref->args); + + /* include the GROUP BY pathkeys, if they exist */ + if (grouppathkeys != NIL) + currpathkeys = append_pathkeys(list_copy(grouppathkeys), + currpathkeys); + + /* record that we found pathkeys for this aggregate */ + aggindexes = bms_add_member(aggindexes, i); + } + else + { + List *pathkeys; + + /* now look for a stronger set of matching pathkeys */ + pathkeys = make_pathkeys_for_sortclauses(root, sortlist, + aggref->args); + + /* include the GROUP BY pathkeys, if they exist */ + if (grouppathkeys != NIL) + pathkeys = append_pathkeys(list_copy(grouppathkeys), + pathkeys); + + /* are 'pathkeys' compatible or better than 'currpathkeys'? */ + switch (compare_pathkeys(currpathkeys, pathkeys)) + { + case PATHKEYS_BETTER2: + /* 'pathkeys' are stronger, use these ones instead */ + currpathkeys = pathkeys; + /* FALLTHROUGH */ + + case PATHKEYS_BETTER1: + /* 'pathkeys' are less strict */ + /* FALLTHROUGH */ + + case PATHKEYS_EQUAL: + /* mark this aggregate is covered by 'currpathkeys' */ + aggindexes = bms_add_member(aggindexes, i); + break; + + case PATHKEYS_DIFFERENT: + break; + } + } + } + + /* remove the aggregates that we've just processed */ + unprocessed_aggs = bms_del_members(unprocessed_aggs, aggindexes); + + /* + * If this pass included more aggregates than the previous best then + * use these ones as the best set. + */ + if (bms_num_members(aggindexes) > bms_num_members(bestaggs)) + { + bestaggs = aggindexes; + bestpathkeys = currpathkeys; + } + } + + /* + * Now that we've found the best set of aggregates we can set the + * presorted flag to indicate to the executor that it needn't bother + * performing a sort for these Aggrefs. We're able to do this now + * as there's no chance of a Hash Aggregate plan as create_grouping_paths + * will not mark the group by as GROUPING_CAN_USE_HASH due to the + * presence of ordered aggregates. + */ + i = -1; + while ((i = bms_next_member(bestaggs, i)) >= 0) + { + AggInfo *agginfo = list_nth(root->agginfos, i); + + foreach(lc, agginfo->aggrefs) + { + Aggref *aggref = lfirst_node(Aggref, lc); + + aggref->aggpresorted = true; + } + } + + /* + * bestpathkeys includes the GROUP BY pathkeys, so if we found any ordered + * aggregates, then return bestpathkeys, otherwise return the + * grouppathkeys. + */ + if (bestpathkeys != NIL) + return bestpathkeys; + + return grouppathkeys; +} + /* * Compute query_pathkeys and other pathkeys during plan generation */ @@ -3137,18 +3349,19 @@ standard_qp_callback(PlannerInfo *root, void *extra) List *activeWindows = qp_extra->activeWindows; /* - * Calculate pathkeys that represent grouping/ordering requirements. The - * sortClause is certainly sort-able, but GROUP BY and DISTINCT might not - * be, in which case we just leave their pathkeys empty. + * Calculate pathkeys that represent grouping/ordering and/or ordered + * aggregate requirements. */ - if (qp_extra->groupClause && - grouping_is_sortable(qp_extra->groupClause)) - root->group_pathkeys = - make_pathkeys_for_sortclauses(root, - qp_extra->groupClause, - tlist); + if (qp_extra->groupClause != NIL || root->numOrderedAggs > 0) + root->group_pathkeys = make_pathkeys_for_groupagg(root, + qp_extra->groupClause, + tlist, + &root->num_groupby_pathkeys); else + { root->group_pathkeys = NIL; + root->num_groupby_pathkeys = 0; + } /* We consider only the first (bottom) window in pathkeys logic */ if (activeWindows != NIL) @@ -6222,6 +6435,26 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, if (can_sort) { + List *group_pathkeys; + List *orderAggPathkeys; + int numAggPathkeys; + + numAggPathkeys = list_length(root->group_pathkeys) - + root->num_groupby_pathkeys; + + if (numAggPathkeys > 0) + { + group_pathkeys = list_copy_head(root->group_pathkeys, + root->num_groupby_pathkeys); + orderAggPathkeys = list_copy_tail(root->group_pathkeys, + root->num_groupby_pathkeys); + } + else + { + group_pathkeys = root->group_pathkeys; + orderAggPathkeys = NIL; + } + /* * Use any available suitably-sorted path as input, and also consider * sorting the cheapest-total path. @@ -6234,7 +6467,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, List *pathkey_orderings = NIL; - List *group_pathkeys = root->group_pathkeys; List *group_clauses = parse->groupClause; /* generate alternative group orderings that might be useful */ @@ -6242,7 +6474,8 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, path->rows, path->pathkeys, group_pathkeys, - group_clauses); + group_clauses, + orderAggPathkeys); Assert(list_length(pathkey_orderings) > 0); @@ -6414,7 +6647,8 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, path->rows, path->pathkeys, group_pathkeys, - group_clauses); + group_clauses, + orderAggPathkeys); Assert(list_length(pathkey_orderings) > 0); @@ -6717,6 +6951,24 @@ create_partial_grouping_paths(PlannerInfo *root, if (can_sort && cheapest_total_path != NULL) { + List *group_pathkeys; + List *orderAggPathkeys; + int numAggPathkeys; + + numAggPathkeys = list_length(root->group_pathkeys) - + root->num_groupby_pathkeys; + + if (numAggPathkeys > 0) + { + group_pathkeys = list_copy_head(root->group_pathkeys, root->num_groupby_pathkeys); + orderAggPathkeys = list_copy_tail(root->group_pathkeys, root->num_groupby_pathkeys); + } + else + { + group_pathkeys = root->group_pathkeys; + orderAggPathkeys = NIL; + } + /* This should have been checked previously */ Assert(parse->hasAggs || parse->groupClause); @@ -6729,10 +6981,7 @@ create_partial_grouping_paths(PlannerInfo *root, ListCell *lc2; Path *path = (Path *) lfirst(lc); Path *path_save = path; - List *pathkey_orderings = NIL; - - List *group_pathkeys = root->group_pathkeys; List *group_clauses = parse->groupClause; /* generate alternative group orderings that might be useful */ @@ -6740,7 +6989,8 @@ create_partial_grouping_paths(PlannerInfo *root, path->rows, path->pathkeys, group_pathkeys, - group_clauses); + group_clauses, + orderAggPathkeys); Assert(list_length(pathkey_orderings) > 0); @@ -6856,16 +7106,31 @@ create_partial_grouping_paths(PlannerInfo *root, if (can_sort && cheapest_partial_path != NULL) { + List *group_pathkeys; + List *orderAggPathkeys; + int numAggPathkeys; + + numAggPathkeys = list_length(root->group_pathkeys) - + root->num_groupby_pathkeys; + + if (numAggPathkeys > 0) + { + group_pathkeys = list_copy_head(root->group_pathkeys, root->num_groupby_pathkeys); + orderAggPathkeys = list_copy_tail(root->group_pathkeys, root->num_groupby_pathkeys); + } + else + { + group_pathkeys = root->group_pathkeys; + orderAggPathkeys = NIL; + } + /* Similar to above logic, but for partial paths. */ foreach(lc, input_rel->partial_pathlist) { ListCell *lc2; Path *path = (Path *) lfirst(lc); Path *path_original = path; - List *pathkey_orderings = NIL; - - List *group_pathkeys = root->group_pathkeys; List *group_clauses = parse->groupClause; /* generate alternative group orderings that might be useful */ @@ -6873,7 +7138,8 @@ create_partial_grouping_paths(PlannerInfo *root, path->rows, path->pathkeys, group_pathkeys, - group_clauses); + group_clauses, + orderAggPathkeys); Assert(list_length(pathkey_orderings) > 0); diff --git a/src/backend/optimizer/prep/prepagg.c b/src/backend/optimizer/prep/prepagg.c index 5b12937ead..da89b55402 100644 --- a/src/backend/optimizer/prep/prepagg.c +++ b/src/backend/optimizer/prep/prepagg.c @@ -225,6 +225,7 @@ preprocess_aggref(Aggref *aggref, PlannerInfo *root) { AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, aggno); + agginfo->aggrefs = lappend(agginfo->aggrefs, aggref); transno = agginfo->transno; } else @@ -232,7 +233,7 @@ preprocess_aggref(Aggref *aggref, PlannerInfo *root) AggInfo *agginfo = makeNode(AggInfo); agginfo->finalfn_oid = aggfinalfn; - agginfo->representative_aggref = aggref; + agginfo->aggrefs = list_make1(aggref); agginfo->shareable = shareable; aggno = list_length(root->agginfos); @@ -386,7 +387,7 @@ find_compatible_agg(PlannerInfo *root, Aggref *newagg, aggno++; - existingRef = agginfo->representative_aggref; + existingRef = linitial_node(Aggref, agginfo->aggrefs); /* all of the following must be the same or it's no match */ if (newagg->inputcollid != existingRef->inputcollid || @@ -648,7 +649,7 @@ get_agg_clause_costs(PlannerInfo *root, AggSplit aggsplit, AggClauseCosts *costs foreach(lc, root->agginfos) { AggInfo *agginfo = lfirst_node(AggInfo, lc); - Aggref *aggref = agginfo->representative_aggref; + Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs); /* * Add the appropriate component function execution costs to diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h index 1e3f1bbee8..0739b389f3 100644 --- a/src/include/executor/execExpr.h +++ b/src/include/executor/execExpr.h @@ -258,6 +258,8 @@ typedef enum ExprEvalOp EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYREF, EEOP_AGG_PLAIN_TRANS_STRICT_BYREF, EEOP_AGG_PLAIN_TRANS_BYREF, + EEOP_AGG_PRESORTED_DISTINCT_SINGLE, + EEOP_AGG_PRESORTED_DISTINCT_MULTI, EEOP_AGG_ORDERED_TRANS_DATUM, EEOP_AGG_ORDERED_TRANS_TUPLE, @@ -659,6 +661,17 @@ typedef struct ExprEvalStep int jumpnull; } agg_plain_pergroup_nullcheck; + /* for EEOP_AGG_PRESORTED_DISTINCT_{SINGLE,MULTI} */ + struct + { + AggStatePerTrans pertrans; + ExprContext *aggcontext; + int setno; + int transno; + int setoff; + int jumpdistinct; + } agg_presorted_distinctcheck; + /* for EEOP_AGG_PLAIN_TRANS_[INIT_][STRICT_]{BYVAL,BYREF} */ /* for EEOP_AGG_ORDERED_TRANS_{DATUM,TUPLE} */ struct @@ -868,6 +881,10 @@ extern void ExecAggInitGroup(AggState *aggstate, AggStatePerTrans pertrans, AggS extern Datum ExecAggTransReparent(AggState *aggstate, AggStatePerTrans pertrans, Datum newValue, bool newValueIsNull, Datum oldValue, bool oldValueIsNull); +extern bool ExecEvalPreOrderedDistinctSingle(AggState *aggstate, + AggStatePerTrans pertrans); +extern bool ExecEvalPreOrderedDistinctMulti(AggState *aggstate, + AggStatePerTrans pertrans); extern void ExecEvalAggOrderedTransDatum(ExprState *state, ExprEvalStep *op, ExprContext *econtext); extern void ExecEvalAggOrderedTransTuple(ExprState *state, ExprEvalStep *op, diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h index 4d1bd92999..d46ff98c6d 100644 --- a/src/include/executor/nodeAgg.h +++ b/src/include/executor/nodeAgg.h @@ -48,6 +48,12 @@ typedef struct AggStatePerTransData */ bool aggshared; + /* + * True for ORDER BY and DISTINCT aggregates that are not + * Aggref->aggpresorted. + */ + bool aggsortrequired; + /* * Number of aggregated input columns. This includes ORDER BY expressions * in both the plain-agg and ordered-set cases. Ordered-set direct args @@ -136,6 +142,9 @@ typedef struct AggStatePerTransData TupleTableSlot *sortslot; /* current input tuple */ TupleTableSlot *uniqslot; /* used for multi-column DISTINCT */ TupleDesc sortdesc; /* descriptor of input tuples */ + Datum lastdatum; /* used for single-column DISTINCT */ + bool lastisnull; /* used for single-column DISTINCT */ + bool haslast; /* got a last value for DISTINCT check */ /* * These values are working state that is initialized at the start of an diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index e2081db4ed..3b065139e6 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -365,6 +365,14 @@ struct PlannerInfo /* groupClause pathkeys, if any */ List *group_pathkeys; + + /* + * The number of elements in the group_pathkeys list which belong to the + * GROUP BY clause. Additional ones belong to ORDER BY / DISTINCT + * aggregates. + */ + int num_groupby_pathkeys; + /* pathkeys of bottom window, if any */ List *window_pathkeys; /* distinctClause pathkeys, if any */ @@ -3134,12 +3142,12 @@ typedef struct AggInfo NodeTag type; /* - * Link to an Aggref expr this state value is for. + * List of Aggref exprs that this state value is for. * - * There can be multiple identical Aggref's sharing the same per-agg. This - * points to the first one of them. + * There will always be at least one, but there can be multiple identical + * Aggref's sharing the same per-agg. */ - Aggref *representative_aggref; + List *aggrefs; /* Transition state number for this aggregate */ int transno; diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 1fc2fbffa3..854ae4ba5d 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -357,6 +357,10 @@ typedef struct Param * replaced with a single argument representing the partial-aggregate * transition values. * + * aggpresorted is set by the query planner for ORDER BY and DISTINCT + * aggregates where the plan chosen provides presorted input for this + * aggregate during execution + * * aggsplit indicates the expected partial-aggregation mode for the Aggref's * parent plan node. It's always set to AGGSPLIT_SIMPLE in the parser, but * the planner might change it to something else. We use this mainly as @@ -422,6 +426,9 @@ typedef struct Aggref /* aggregate kind (see pg_aggregate.h) */ char aggkind; + /* aggregate input already sorted */ + bool aggpresorted pg_node_attr(equal_ignore); + /* > 0 if agg belongs to outer query */ Index agglevelsup; diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 54ab709c67..d11cdac7f8 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -208,7 +208,8 @@ extern int group_keys_reorder_by_pathkeys(List *pathkeys, List **group_clauses); extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows, List *path_pathkeys, - List *group_pathkeys, List *group_clauses); + List *group_pathkeys, List *group_clauses, + List *aggregate_pathkeys); extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion, @@ -255,6 +256,7 @@ extern List *truncate_useless_pathkeys(PlannerInfo *root, RelOptInfo *rel, List *pathkeys); extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel); +extern List *append_pathkeys(List *target, List *source); extern PathKey *make_canonical_pathkey(PlannerInfo *root, EquivalenceClass *eclass, Oid opfamily, int strategy, bool nulls_first); diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 601047fa3d..e141919600 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1392,6 +1392,84 @@ ERROR: column "t1.f1" must appear in the GROUP BY clause or be used in an aggre LINE 1: select t1.f1 from t1 left join t2 using (f1) group by f1; ^ drop table t1, t2; +-- +-- Test planner's selection of pathkeys for ORDER BY aggregates +-- +-- Ensure we order by four. This suits the most aggregate functions. +explain (costs off) +select sum(two order by two),max(four order by four), min(four order by four) +from tenk1; + QUERY PLAN +------------------------------- + Aggregate + -> Sort + Sort Key: four + -> Seq Scan on tenk1 +(4 rows) + +-- Ensure we order by two. It's a tie between ordering by two and four but +-- we tiebreak on the aggregate's position. +explain (costs off) +select + sum(two order by two), max(four order by four), + min(four order by four), max(two order by two) +from tenk1; + QUERY PLAN +------------------------------- + Aggregate + -> Sort + Sort Key: two + -> Seq Scan on tenk1 +(4 rows) + +-- Similar to above, but tiebreak on ordering by four +explain (costs off) +select + max(four order by four), sum(two order by two), + min(four order by four), max(two order by two) +from tenk1; + QUERY PLAN +------------------------------- + Aggregate + -> Sort + Sort Key: four + -> Seq Scan on tenk1 +(4 rows) + +-- Ensure this one orders by ten since there are 3 aggregates that require ten +-- vs two that suit two and four. +explain (costs off) +select + max(four order by four), sum(two order by two), + min(four order by four), max(two order by two), + sum(ten order by ten), min(ten order by ten), max(ten order by ten) +from tenk1; + QUERY PLAN +------------------------------- + Aggregate + -> Sort + Sort Key: ten + -> Seq Scan on tenk1 +(4 rows) + +-- Try a case involving a GROUP BY clause where the GROUP BY column is also +-- part of an aggregate's ORDER BY clause. We want a sort order that works +-- for the GROUP BY and first and last aggregate. +explain (costs off) +select + sum(unique1 order by ten, two), sum(unique1 order by four), + sum(unique1 order by two, four) +from tenk1 +group by ten; + QUERY PLAN +---------------------------------- + GroupAggregate + Group Key: ten + -> Sort + Sort Key: ten, two, four + -> Seq Scan on tenk1 +(5 rows) + -- -- Test combinations of DISTINCT and/or ORDER BY -- @@ -2263,8 +2341,8 @@ NOTICE: avg_transfn called with 3 -- shouldn't share states due to the distinctness not matching. select my_avg(distinct one),my_sum(one) from (values(1),(3)) t(one); NOTICE: avg_transfn called with 1 -NOTICE: avg_transfn called with 3 NOTICE: avg_transfn called with 1 +NOTICE: avg_transfn called with 3 NOTICE: avg_transfn called with 3 my_avg | my_sum --------+-------- diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out index a08a3825ff..4b41ccf1aa 100644 --- a/src/test/regress/expected/partition_aggregate.out +++ b/src/test/regress/expected/partition_aggregate.out @@ -367,17 +367,17 @@ SELECT c, sum(b order by a) FROM pagg_tab GROUP BY c ORDER BY 1, 2; -> GroupAggregate Group Key: pagg_tab.c -> Sort - Sort Key: pagg_tab.c + Sort Key: pagg_tab.c, pagg_tab.a -> Seq Scan on pagg_tab_p1 pagg_tab -> GroupAggregate Group Key: pagg_tab_1.c -> Sort - Sort Key: pagg_tab_1.c + Sort Key: pagg_tab_1.c, pagg_tab_1.a -> Seq Scan on pagg_tab_p2 pagg_tab_1 -> GroupAggregate Group Key: pagg_tab_2.c -> Sort - Sort Key: pagg_tab_2.c + Sort Key: pagg_tab_2.c, pagg_tab_2.a -> Seq Scan on pagg_tab_p3 pagg_tab_2 (18 rows) @@ -948,34 +948,36 @@ SET max_parallel_workers_per_gather TO 2; -- is not partial agg safe. EXPLAIN (COSTS OFF) SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; - QUERY PLAN --------------------------------------------------------------------------------------- - Sort - Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c)) - -> Append - -> GroupAggregate - Group Key: pagg_tab_ml.a - Filter: (avg(pagg_tab_ml.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml.a - -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml - -> GroupAggregate - Group Key: pagg_tab_ml_2.a - Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml_2.a - -> Append - -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 - -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 - -> GroupAggregate - Group Key: pagg_tab_ml_5.a - Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml_5.a - -> Append - -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 - -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 -(25 rows) + QUERY PLAN +-------------------------------------------------------------------------------------------- + Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c)) + -> Parallel Append + -> GroupAggregate + Group Key: pagg_tab_ml.a + Filter: (avg(pagg_tab_ml.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml.a, pagg_tab_ml.c + -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml + -> GroupAggregate + Group Key: pagg_tab_ml_5.a + Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_5.a, pagg_tab_ml_5.c + -> Append + -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 + -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 + -> GroupAggregate + Group Key: pagg_tab_ml_2.a + Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_2.a, pagg_tab_ml_2.c + -> Append + -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 + -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 +(27 rows) SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; a | sum | array_agg | count @@ -994,32 +996,34 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA -- Without ORDER BY clause, to test Gather at top-most path EXPLAIN (COSTS OFF) SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3; - QUERY PLAN ---------------------------------------------------------------------- - Append - -> GroupAggregate - Group Key: pagg_tab_ml.a - Filter: (avg(pagg_tab_ml.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml.a - -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml - -> GroupAggregate - Group Key: pagg_tab_ml_2.a - Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml_2.a - -> Append - -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 - -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 - -> GroupAggregate - Group Key: pagg_tab_ml_5.a - Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml_5.a - -> Append - -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 - -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 -(23 rows) + QUERY PLAN +--------------------------------------------------------------------------- + Gather + Workers Planned: 2 + -> Parallel Append + -> GroupAggregate + Group Key: pagg_tab_ml.a + Filter: (avg(pagg_tab_ml.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml.a, pagg_tab_ml.c + -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml + -> GroupAggregate + Group Key: pagg_tab_ml_5.a + Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_5.a, pagg_tab_ml_5.c + -> Append + -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 + -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 + -> GroupAggregate + Group Key: pagg_tab_ml_2.a + Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_2.a, pagg_tab_ml_2.c + -> Append + -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 + -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 +(25 rows) -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY -- for level 1 only. For subpartitions, GROUP BY clause does not match with diff --git a/src/test/regress/expected/sqljson.out b/src/test/regress/expected/sqljson.out index bdd0969a50..748dfdb04d 100644 --- a/src/test/regress/expected/sqljson.out +++ b/src/test/regress/expected/sqljson.out @@ -866,14 +866,14 @@ FROM (VALUES (NULL), (3), (1), (NULL), (NULL), (5), (2), (4), (NULL)) foo(bar); json_arrayagg | json_arrayagg | json_arrayagg | json_arrayagg | json_arrayagg | json_arrayagg | json_arrayagg | json_arrayagg | json_arrayagg | json_arrayagg -----------------+-----------------+-----------------+-----------------+-----------------------------------------+-----------------------------------------+-----------------+--------------------------------------------------------------------------------------------------------------------------+---------------+-------------------------------------- - [3, 1, 5, 2, 4] | [3, 1, 5, 2, 4] | [3, 1, 5, 2, 4] | [3, 1, 5, 2, 4] | [null, 3, 1, null, null, 5, 2, 4, null] | [null, 3, 1, null, null, 5, 2, 4, null] | [{"bar":null}, +| [{"bar": null}, {"bar": 3}, {"bar": 1}, {"bar": null}, {"bar": null}, {"bar": 5}, {"bar": 2}, {"bar": 4}, {"bar": null}] | [{"bar":3}, +| [{"bar": 3}, {"bar": 4}, {"bar": 5}] - | | | | | | {"bar":3}, +| | {"bar":4}, +| - | | | | | | {"bar":1}, +| | {"bar":5}] | + [1, 2, 3, 4, 5] | [1, 2, 3, 4, 5] | [1, 2, 3, 4, 5] | [1, 2, 3, 4, 5] | [1, 2, 3, 4, 5, null, null, null, null] | [1, 2, 3, 4, 5, null, null, null, null] | [{"bar":1}, +| [{"bar": 1}, {"bar": 2}, {"bar": 3}, {"bar": 4}, {"bar": 5}, {"bar": null}, {"bar": null}, {"bar": null}, {"bar": null}] | [{"bar":3}, +| [{"bar": 3}, {"bar": 4}, {"bar": 5}] + | | | | | | {"bar":2}, +| | {"bar":4}, +| + | | | | | | {"bar":3}, +| | {"bar":5}] | + | | | | | | {"bar":4}, +| | | + | | | | | | {"bar":5}, +| | | + | | | | | | {"bar":null}, +| | | | | | | | | {"bar":null}, +| | | | | | | | | {"bar":null}, +| | | - | | | | | | {"bar":5}, +| | | - | | | | | | {"bar":2}, +| | | - | | | | | | {"bar":4}, +| | | | | | | | | {"bar":null}] | | | (1 row) diff --git a/src/test/regress/expected/tuplesort.out b/src/test/regress/expected/tuplesort.out index 418f296a3f..ef79574ecf 100644 --- a/src/test/regress/expected/tuplesort.out +++ b/src/test/regress/expected/tuplesort.out @@ -622,15 +622,17 @@ EXPLAIN (COSTS OFF) :qry; -> GroupAggregate Group Key: a.col12 Filter: (count(*) > 1) - -> Merge Join - Merge Cond: (a.col12 = b.col12) - -> Sort - Sort Key: a.col12 DESC - -> Seq Scan on test_mark_restore a - -> Sort - Sort Key: b.col12 DESC - -> Seq Scan on test_mark_restore b -(14 rows) + -> Sort + Sort Key: a.col12 DESC, a.col1 + -> Merge Join + Merge Cond: (a.col12 = b.col12) + -> Sort + Sort Key: a.col12 + -> Seq Scan on test_mark_restore a + -> Sort + Sort Key: b.col12 + -> Seq Scan on test_mark_restore b +(16 rows) :qry; col12 | count | count | count | count | count @@ -658,15 +660,17 @@ EXPLAIN (COSTS OFF) :qry; -> GroupAggregate Group Key: a.col12 Filter: (count(*) > 1) - -> Merge Join - Merge Cond: (a.col12 = b.col12) - -> Sort - Sort Key: a.col12 DESC - -> Seq Scan on test_mark_restore a - -> Sort - Sort Key: b.col12 DESC - -> Seq Scan on test_mark_restore b -(14 rows) + -> Sort + Sort Key: a.col12 DESC, a.col1 + -> Merge Join + Merge Cond: (a.col12 = b.col12) + -> Sort + Sort Key: a.col12 + -> Seq Scan on test_mark_restore a + -> Sort + Sort Key: b.col12 + -> Seq Scan on test_mark_restore b +(16 rows) :qry; col12 | count | count | count | count | count diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index c6e0d7ba2b..d719cd1836 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -503,6 +503,49 @@ select t1.f1 from t1 left join t2 using (f1) group by f1; drop table t1, t2; +-- +-- Test planner's selection of pathkeys for ORDER BY aggregates +-- + +-- Ensure we order by four. This suits the most aggregate functions. +explain (costs off) +select sum(two order by two),max(four order by four), min(four order by four) +from tenk1; + +-- Ensure we order by two. It's a tie between ordering by two and four but +-- we tiebreak on the aggregate's position. +explain (costs off) +select + sum(two order by two), max(four order by four), + min(four order by four), max(two order by two) +from tenk1; + +-- Similar to above, but tiebreak on ordering by four +explain (costs off) +select + max(four order by four), sum(two order by two), + min(four order by four), max(two order by two) +from tenk1; + +-- Ensure this one orders by ten since there are 3 aggregates that require ten +-- vs two that suit two and four. +explain (costs off) +select + max(four order by four), sum(two order by two), + min(four order by four), max(two order by two), + sum(ten order by ten), min(ten order by ten), max(ten order by ten) +from tenk1; + +-- Try a case involving a GROUP BY clause where the GROUP BY column is also +-- part of an aggregate's ORDER BY clause. We want a sort order that works +-- for the GROUP BY and first and last aggregate. +explain (costs off) +select + sum(unique1 order by ten, two), sum(unique1 order by four), + sum(unique1 order by two, four) +from tenk1 +group by ten; + -- -- Test combinations of DISTINCT and/or ORDER BY -- -- 2.34.1