From 33953655c9ac3f9ec64b80c9f2a2ff38bd178745 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 13 Sep 2023 11:20:03 +0700 Subject: [PATCH] Explore alternative orderings of group-by pathkeys during optimization. When evaluating a query with a multi-column GROUP BY clause using sort, the cost may depend heavily on the order in which the keys are compared when building the groups. Grouping does not imply any ordering, so we can compare the keys in arbitrary order, and a Hash Agg leverages this. But for Group Agg, we simply compared keys in the order specified in the query. This commit explores alternative ordering of the keys, trying to find a cheaper one. In principle, we might generate grouping paths for all permutations of the keys and leave the rest to the optimizer. But that might get very expensive, so we try to pick only a couple interesting orderings based on both local and global information. When planning the grouping path, we explore statistics (number of distinct values, cost of the comparison function) for the keys and reorder them to minimize comparison costs. Intuitively, it may be better to perform more expensive comparisons (for complex data types, etc.) last because maybe the cheaper comparisons will be enough. Similarly, the higher the cardinality of a key, the lower the probability we'll need to compare more keys. The patch generates and costs various orderings, picking the cheapest ones. The ordering of group keys may interact with other parts of the query, some of which may not be known while planning the grouping. For example, there may be an explicit ORDER BY clause or some other ordering-dependent operation higher up in the query, and using the same ordering may allow using either incremental sort or even eliminating the sort entirely. The patch generates orderings and picks those, minimizing the comparison cost (for various path keys), and then adds orderings that might be useful for operations higher up in the plan (ORDER BY, etc.). Finally, it always keeps the ordering specified in the query, assuming the user might have additional insights. This introduces a new GUC enable_group_by_reordering so that the optimization may be disabled if needed. --- .../postgres_fdw/expected/postgres_fdw.out | 36 +- src/backend/optimizer/path/costsize.c | 363 ++++++++++- src/backend/optimizer/path/equivclass.c | 13 +- src/backend/optimizer/path/pathkeys.c | 602 ++++++++++++++++++ src/backend/optimizer/plan/planner.c | 465 ++++++++------ src/backend/optimizer/util/pathnode.c | 4 +- src/backend/utils/adt/selfuncs.c | 38 +- src/backend/utils/misc/guc_tables.c | 10 + src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/nodes/pathnodes.h | 10 + src/include/optimizer/cost.h | 4 +- src/include/optimizer/paths.h | 7 + src/include/utils/selfuncs.h | 5 + src/test/regress/expected/aggregates.out | 244 ++++++- .../regress/expected/incremental_sort.out | 2 +- src/test/regress/expected/join.out | 51 +- src/test/regress/expected/merge.out | 15 +- .../regress/expected/partition_aggregate.out | 44 +- src/test/regress/expected/partition_join.out | 75 +-- src/test/regress/expected/sysviews.out | 3 +- src/test/regress/expected/union.out | 60 +- src/test/regress/sql/aggregates.sql | 99 +++ src/test/regress/sql/incremental_sort.sql | 2 +- 23 files changed, 1771 insertions(+), 382 deletions(-) diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 144c114d0f..63af7feabe 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -2319,18 +2319,21 @@ SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM -- join with pseudoconstant quals EXPLAIN (VERBOSE, COSTS OFF) SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1 AND CURRENT_USER = SESSION_USER) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------- Limit Output: t1.c1, t2.c1, t1.c3 - -> Result + -> Sort Output: t1.c1, t2.c1, t1.c3 - One-Time Filter: (CURRENT_USER = SESSION_USER) - -> Foreign Scan - Output: t1.c1, t1.c3, t2.c1 - Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2) - Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST -(9 rows) + Sort Key: t1.c3, t1.c1 + -> Result + Output: t1.c1, t2.c1, t1.c3 + One-Time Filter: (CURRENT_USER = SESSION_USER) + -> Foreign Scan + Output: t1.c1, t1.c3, t2.c1 + Relations: (public.ft1 t1) INNER JOIN (public.ft2 t2) + Remote SQL: SELECT r1."C 1", r2."C 1", r1.c3 FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r2."C 1" = r1."C 1")))) +(12 rows) -- non-Var items in targetlist of the nullable rel of a join preventing -- push-down in some cases @@ -9609,13 +9612,16 @@ SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER J -- left outer join + nullable clause EXPLAIN (VERBOSE, COSTS OFF) SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Foreign Scan + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Sort Output: t1.a, fprt2.b, fprt2.c - Relations: (public.ftprt1_p1 t1) LEFT JOIN (public.ftprt2_p1 fprt2) - Remote SQL: SELECT r5.a, r6.b, r6.c FROM (public.fprt1_p1 r5 LEFT JOIN public.fprt2_p1 r6 ON (((r5.a = r6.b)) AND ((r5.b = r6.a)) AND ((r6.a < 10)))) WHERE ((r5.a < 10)) ORDER BY r5.a ASC NULLS LAST, r6.b ASC NULLS LAST, r6.c ASC NULLS LAST -(4 rows) + Sort Key: t1.a, fprt2.b, fprt2.c + -> Foreign Scan + Output: t1.a, fprt2.b, fprt2.c + Relations: (public.ftprt1_p1 t1) LEFT JOIN (public.ftprt2_p1 fprt2) + Remote SQL: SELECT r5.a, r6.b, r6.c FROM (public.fprt1_p1 r5 LEFT JOIN public.fprt2_p1 r6 ON (((r5.a = r6.b)) AND ((r5.b = r6.a)) AND ((r6.a < 10)))) WHERE ((r5.a < 10)) +(7 rows) SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3; a | b | c diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index d6ceafd51c..6f11f679ba 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1816,6 +1816,327 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm) rterm->pathtarget->width); } +/* + * is_fake_var + * Workaround for generate_append_tlist() which generates fake Vars with + * varno == 0, that will cause a fail of estimate_num_group() call + * + * XXX Ummm, why would estimate_num_group fail with this? + */ +static bool +is_fake_var(Expr *expr) +{ + if (IsA(expr, RelabelType)) + expr = (Expr *) ((RelabelType *) expr)->arg; + + return (IsA(expr, Var) && ((Var *) expr)->varno == 0); +} + +/* + * get_width_cost_multiplier + * Returns relative complexity of comparing two values based on its width. + * The idea behind is that the comparison becomes more expensive the longer the + * value is. Return value is in cpu_operator_cost units. + */ +static double +get_width_cost_multiplier(PlannerInfo *root, Expr *expr) +{ + double width = -1.0; /* fake value */ + + if (IsA(expr, RelabelType)) + expr = (Expr *) ((RelabelType *) expr)->arg; + + /* Try to find actual stat in corresponding relation */ + if (IsA(expr, Var)) + { + Var *var = (Var *) expr; + + if (var->varno > 0 && var->varno < root->simple_rel_array_size) + { + RelOptInfo *rel = root->simple_rel_array[var->varno]; + + if (rel != NULL && + var->varattno >= rel->min_attr && + var->varattno <= rel->max_attr) + { + int ndx = var->varattno - rel->min_attr; + + if (rel->attr_widths[ndx] > 0) + width = rel->attr_widths[ndx]; + } + } + } + + /* Didn't find any actual stats, try using type width instead. */ + if (width < 0.0) + { + Node *node = (Node *) expr; + + width = get_typavgwidth(exprType(node), exprTypmod(node)); + } + + /* + * Values are passed as Datum type, so comparisons can't be cheaper than + * comparing a Datum value. + * + * FIXME I find this reasoning questionable. We may pass int2, and + * comparing it is probably a bit cheaper than comparing a bigint. + */ + if (width <= sizeof(Datum)) + return 1.0; + + /* + * We consider the cost of a comparison not to be directly proportional to + * width of the argument, because widths of the arguments could be + * slightly different (we only know the average width for the whole + * column). So we use log16(width) as an estimate. + */ + return 1.0 + 0.125 * LOG2(width / sizeof(Datum)); +} + +/* + * compute_cpu_sort_cost + * compute CPU cost of sort (i.e. in-memory) + * + * The main thing we need to calculate to estimate sort CPU costs is the number + * of calls to the comparator functions. The difficulty is that for multi-column + * sorts there may be different data types involved (for some of which the calls + * may be much more expensive). Furthermore, columns may have a very different + * number of distinct values - the higher the number, the fewer comparisons will + * be needed for the following columns. + * + * The algorithm is incremental - we add pathkeys one by one, and at each step we + * estimate the number of necessary comparisons (based on the number of distinct + * groups in the current pathkey prefix and the new pathkey), and the comparison + * costs (which is data type specific). + * + * Estimation of the number of comparisons is based on ideas from: + * + * "Quicksort Is Optimal", Robert Sedgewick, Jon Bentley, 2002 + * [https://www.cs.princeton.edu/~rs/talks/QuicksortIsOptimal.pdf] + * + * In term of that paper, let N - number of tuples, Xi - number of identical + * tuples with value Ki, then the estimate of number of comparisons is: + * + * log(N! / (X1! * X2! * ..)) ~ sum(Xi * log(N/Xi)) + * + * We assume all Xi the same because now we don't have any estimation of + * group sizes, we have only know the estimate of number of groups (distinct + * values). In that case, formula becomes: + * + * N * log(NumberOfGroups) + * + * For multi-column sorts we need to estimate the number of comparisons for + * each individual column - for example with columns (c1, c2, ..., ck) we + * can estimate that number of comparisons on ck is roughly + * + * ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1)) + * + * Let k be a column number, Gk - number of groups defined by k columns, and Fk + * the cost of the comparison is + * + * N * sum( Fk * log(Gk) ) + * + * Note: We also consider column width, not just the comparator cost. + * + * NOTE: some callers currently pass NIL for pathkeys because they + * can't conveniently supply the sort keys. In this case, it will fallback to + * simple comparison cost estimate. + */ +static Cost +compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys, + Cost comparison_cost, double tuples, double output_tuples, + bool heapSort) +{ + Cost per_tuple_cost = 0.0; + ListCell *lc; + List *pathkeyExprs = NIL; + double tuplesPerPrevGroup = tuples; + double totalFuncCost = 1.0; + bool has_fake_var = false; + int i = 0; + Oid prev_datatype = InvalidOid; + List *cache_varinfos = NIL; + + /* fallback if pathkeys is unknown */ + if (list_length(pathkeys) == 0) + { + /* + * If we'll use a bounded heap-sort keeping just K tuples in memory, + * for a total number of tuple comparisons of N log2 K; but the + * constant factor is a bit higher than for quicksort. Tweak it so + * that the cost curve is continuous at the crossover point. + */ + output_tuples = (heapSort) ? 2.0 * output_tuples : tuples; + per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples); + + /* add cost provided by caller */ + per_tuple_cost += comparison_cost; + + return per_tuple_cost * tuples; + } + + /* + * Computing total cost of sorting takes into account the per-column + * comparison function cost. We try to compute the needed number of + * comparisons per column. + */ + foreach(lc, pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(lc); + EquivalenceMember *em; + double nGroups, + correctedNGroups; + Cost funcCost = 1.0; + + /* + * We believe that equivalence members aren't very different, so, to + * estimate cost we consider just the first member. + */ + em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members); + + if (em->em_datatype != InvalidOid) + { + /* do not lookup funcCost if the data type is the same */ + if (prev_datatype != em->em_datatype) + { + Oid sortop; + QualCost cost; + + sortop = get_opfamily_member(pathkey->pk_opfamily, + em->em_datatype, em->em_datatype, + pathkey->pk_strategy); + + cost.startup = 0; + cost.per_tuple = 0; + add_function_cost(root, get_opcode(sortop), NULL, &cost); + + /* + * add_function_cost returns the product of cpu_operator_cost + * and procost, but we need just procost, co undo that. + */ + funcCost = cost.per_tuple / cpu_operator_cost; + + prev_datatype = em->em_datatype; + } + } + + /* factor in the width of the values in this column */ + funcCost *= get_width_cost_multiplier(root, em->em_expr); + + /* now we have per-key cost, so add to the running total */ + totalFuncCost += funcCost; + + /* remember if we have found a fake Var in pathkeys */ + has_fake_var |= is_fake_var(em->em_expr); + pathkeyExprs = lappend(pathkeyExprs, em->em_expr); + + /* + * We need to calculate the number of comparisons for this column, + * which requires knowing the group size. So we estimate the number of + * groups by calling estimate_num_groups_incremental(), which + * estimates the group size for "new" pathkeys. + * + * Note: estimate_num_groups_incremental does not handle fake Vars, so + * use a default estimate otherwise. + */ + if (!has_fake_var) + nGroups = estimate_num_groups_incremental(root, pathkeyExprs, + tuplesPerPrevGroup, NULL, NULL, + &cache_varinfos, + list_length(pathkeyExprs) - 1); + else if (tuples > 4.0) + + /* + * Use geometric mean as estimation if there are no stats. + * + * We don't use DEFAULT_NUM_DISTINCT here, because that's used for + * a single column, but here we're dealing with multiple columns. + */ + nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys)); + else + nGroups = tuples; + + /* + * Presorted keys are not considered in the cost above, but we still + * do have to compare them in the qsort comparator. So make sure to + * factor in the cost in that case. + */ + if (i >= nPresortedKeys) + { + if (heapSort) + { + /* + * have to keep at least one group, and a multiple of group + * size + */ + correctedNGroups = ceil(output_tuples / tuplesPerPrevGroup); + } + else + /* all groups in the input */ + correctedNGroups = nGroups; + + correctedNGroups = Max(1.0, ceil(correctedNGroups)); + + per_tuple_cost += totalFuncCost * LOG2(correctedNGroups); + } + + i++; + + /* + * Once we get single-row group, it means tuples in the group are + * unique and we can skip all remaining columns. + */ + if (tuplesPerPrevGroup <= nGroups) + break; + + /* + * Uniform distributions with all groups being of the same size are + * the best case, with nice smooth behavior. Real-world distributions + * tend not to be uniform, though, and we don't have any reliable + * easy-to-use information. As a basic defense against skewed + * distributions, we use a 1.5 factor to make the expected group a bit + * larger, but we need to be careful not to make the group larger than + * in the preceding step. + */ + tuplesPerPrevGroup = Min(tuplesPerPrevGroup, + ceil(1.5 * tuplesPerPrevGroup / nGroups)); + } + + list_free(pathkeyExprs); + + /* per_tuple_cost is in cpu_operator_cost units */ + per_tuple_cost *= cpu_operator_cost; + + /* + * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles + * E. Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort + * estimation formula has additional term proportional to number of tuples + * (see Chapter 8.2 and Theorem 4.1). That affects cases with a low number + * of tuples, approximately less than 1e4. We could implement it as an + * additional multiplier under the logarithm, but we use a bit more + * complex formula which takes into account the number of unique tuples + * and it's not clear how to combine the multiplier with the number of + * groups. Estimate it as 10 cpu_operator_cost units. + */ + per_tuple_cost += 10 * cpu_operator_cost; + + per_tuple_cost += comparison_cost; + + return tuples * per_tuple_cost; +} + +/* + * simple wrapper just to estimate best sort path + */ +Cost +cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys, + double tuples) +{ + return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys, + 0, tuples, tuples, false); +} + /* * cost_tuplesort * Determines and returns the cost of sorting a relation using tuplesort, @@ -1832,7 +2153,7 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm) * number of initial runs formed and M is the merge order used by tuplesort.c. * Since the average initial run should be about sort_mem, we have * disk traffic = 2 * relsize * ceil(logM(p / sort_mem)) - * cpu = comparison_cost * t * log2(t) + * and cpu cost (computed by compute_cpu_sort_cost()). * * If the sort is bounded (i.e., only the first k result tuples are needed) * and k tuples can fit into sort_mem, we use a heap method that keeps only @@ -1851,9 +2172,11 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm) * 'comparison_cost' is the extra cost per comparison, if any * 'sort_mem' is the number of kilobytes of work memory allowed for the sort * 'limit_tuples' is the bound on the number of output tuples; -1 if no bound + * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should + * be added by caller later */ static void -cost_tuplesort(Cost *startup_cost, Cost *run_cost, +cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost, double tuples, int width, Cost comparison_cost, int sort_mem, double limit_tuples) @@ -1870,9 +2193,6 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost, if (tuples < 2.0) tuples = 2.0; - /* Include the default cost-per-comparison */ - comparison_cost += 2.0 * cpu_operator_cost; - /* Do we have a useful LIMIT? */ if (limit_tuples > 0 && limit_tuples < tuples) { @@ -1896,12 +2216,10 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost, double log_runs; double npageaccesses; - /* - * CPU costs - * - * Assume about N log2 N comparisons - */ - *startup_cost = comparison_cost * tuples * LOG2(tuples); + /* CPU costs */ + *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0, + comparison_cost, tuples, + tuples, false); /* Disk costs */ @@ -1917,18 +2235,17 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost, } else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes) { - /* - * We'll use a bounded heap-sort keeping just K tuples in memory, for - * a total number of tuple comparisons of N log2 K; but the constant - * factor is a bit higher than for quicksort. Tweak it so that the - * cost curve is continuous at the crossover point. - */ - *startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples); + /* We'll use a bounded heap-sort keeping just K tuples in memory. */ + *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0, + comparison_cost, tuples, + output_tuples, true); } else { /* We'll use plain quicksort on all the input tuples */ - *startup_cost = comparison_cost * tuples * LOG2(tuples); + *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0, + comparison_cost, tuples, + tuples, false); } /* @@ -2042,7 +2359,7 @@ cost_incremental_sort(Path *path, * Estimate the average cost of sorting of one group where presorted keys * are equal. */ - cost_tuplesort(&group_startup_cost, &group_run_cost, + cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost, group_tuples, width, comparison_cost, sort_mem, limit_tuples); @@ -2102,7 +2419,7 @@ cost_sort(Path *path, PlannerInfo *root, Cost startup_cost; Cost run_cost; - cost_tuplesort(&startup_cost, &run_cost, + cost_tuplesort(root, pathkeys, &startup_cost, &run_cost, tuples, width, comparison_cost, sort_mem, limit_tuples); @@ -2200,7 +2517,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers) * Determines and returns the cost of an Append node. */ void -cost_append(AppendPath *apath) +cost_append(AppendPath *apath, PlannerInfo *root) { ListCell *l; @@ -2268,7 +2585,7 @@ cost_append(AppendPath *apath) * any child. */ cost_sort(&sort_path, - NULL, /* doesn't currently need root */ + root, pathkeys, subpath->total_cost, subpath->rows, diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 7fa502d6e2..07edd4f38e 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -652,7 +652,18 @@ get_eclass_for_sort_expr(PlannerInfo *root, if (opcintype == cur_em->em_datatype && equal(expr, cur_em->em_expr)) - return cur_ec; /* Match! */ + { + /* + * Match! + * + * Copy the sortref if it wasn't set yet. That may happen if + * the ec was constructed from WHERE clause, i.e. it doesn't + * have a target reference at all. + */ + if (cur_ec->ec_sortref == 0 && sortref > 0) + cur_ec->ec_sortref = sortref; + return cur_ec; + } } } diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index fdb60aaa8d..2dfc983a6d 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -17,17 +17,24 @@ */ #include "postgres.h" +#include + +#include "miscadmin.h" #include "access/stratnum.h" #include "catalog/pg_opfamily.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "nodes/plannodes.h" +#include "optimizer/cost.h" #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "partitioning/partbounds.h" #include "utils/lsyscache.h" +#include "utils/selfuncs.h" +/* Consider reordering of GROUP BY keys? */ +bool enable_group_by_reordering = true; static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); static bool matches_boolean_partition_clause(RestrictInfo *rinfo, @@ -350,6 +357,548 @@ pathkeys_contained_in(List *keys1, List *keys2) return false; } +/* + * group_keys_reorder_by_pathkeys + * Reorder GROUP BY keys to match pathkeys of input path. + * + * Function returns new lists (pathkeys and clauses), original GROUP BY lists + * stay untouched. + * + * Returns the number of GROUP BY keys with a matching pathkey. + */ +int +group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys, + List **group_clauses, + int num_groupby_pathkeys) +{ + List *new_group_pathkeys = NIL, + *new_group_clauses = NIL; + ListCell *lc; + int n; + + if (pathkeys == NIL || *group_pathkeys == NIL) + return 0; + + /* + * Walk the pathkeys (determining ordering of the input path) and see if + * there's a matching GROUP BY key. If we find one, we append it to the + * list, and do the same for the clauses. + * + * Once we find the first pathkey without a matching GROUP BY key, the + * rest of the pathkeys are useless and can't be used to evaluate the + * grouping, so we abort the loop and ignore the remaining pathkeys. + * + * XXX Pathkeys are built in a way to allow simply comparing pointers. + */ + foreach(lc, pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(lc); + SortGroupClause *sgc; + + if (foreach_current_index(lc) >= num_groupby_pathkeys || + !list_member_ptr(*group_pathkeys, pathkey)) + /* */ + break; + + /* abort on first mismatch */ + sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref, + *group_clauses); + if (!sgc) + /* The grouping clause is not cover this paathkey */ + break; + + new_group_pathkeys = lappend(new_group_pathkeys, pathkey); + new_group_clauses = lappend(new_group_clauses, sgc); + } + + /* remember the number of pathkeys with a matching GROUP BY key */ + n = list_length(new_group_pathkeys); + + /* append the remaining group pathkeys (will be treated as not sorted) */ + *group_pathkeys = list_concat_unique_ptr(new_group_pathkeys, + *group_pathkeys); + *group_clauses = list_concat_unique_ptr(new_group_clauses, + *group_clauses); + + return n; +} + +/* + * Used to generate all permutations of a pathkey list. + */ +typedef struct PathkeyMutatorState +{ + List *elemsList; + ListCell **elemCells; + void **elems; + int *positions; + int mutatorNColumns; + int count; +} PathkeyMutatorState; + + +/* + * PathkeyMutatorInit + * Initialize state of the permutation generator. + * + * We want to generate permutations of elements in the "elems" list. We may want + * to skip some number of elements at the beginning (when treating as presorted) + * or at the end (we only permute a limited number of group keys). + * + * The list is decomposed into elements, and we also keep pointers to individual + * cells. This allows us to build the permuted list quickly and cheaply, without + * creating any copies. + */ +static void +PathkeyMutatorInit(PathkeyMutatorState *state, List *elems, int start, int end) +{ + int i; + int n = end - start; + ListCell *lc; + + memset(state, 0, sizeof(*state)); + + state->mutatorNColumns = n; + + state->elemsList = list_copy(elems); + + state->elems = palloc(sizeof(void *) * n); + state->elemCells = palloc(sizeof(ListCell *) * n); + state->positions = palloc(sizeof(int) * n); + + i = 0; + for_each_cell(lc, state->elemsList, list_nth_cell(state->elemsList, start)) + { + state->elemCells[i] = lc; + state->elems[i] = lfirst(lc); + state->positions[i] = i + 1; + i++; + + if (i >= n) + break; + } +} + +/* Swap two elements of an array. */ +static void +PathkeyMutatorSwap(int *a, int i, int j) +{ + int s = a[i]; + + a[i] = a[j]; + a[j] = s; +} + +/* + * Generate the next permutation of elements. + */ +static bool +PathkeyMutatorNextSet(int *a, int n) +{ + int j, + k, + l, + r; + + j = n - 2; + + while (j >= 0 && a[j] >= a[j + 1]) + j--; + + if (j < 0) + return false; + + k = n - 1; + + while (k > 0 && a[j] >= a[k]) + k--; + + PathkeyMutatorSwap(a, j, k); + + l = j + 1; + r = n - 1; + + while (l < r) + PathkeyMutatorSwap(a, l++, r--); + + return true; +} + +/* + * PathkeyMutatorNext + * Generate the next permutation of list of elements. + * + * Returns the next permutation (as a list of elements) or NIL if there are no + * more permutations. + */ +static List * +PathkeyMutatorNext(PathkeyMutatorState *state) +{ + int i; + + state->count++; + + /* first permutation is original list */ + if (state->count == 1) + return state->elemsList; + + /* when there are no more permutations, return NIL */ + if (!PathkeyMutatorNextSet(state->positions, state->mutatorNColumns)) + { + pfree(state->elems); + pfree(state->elemCells); + pfree(state->positions); + + list_free(state->elemsList); + + return NIL; + } + + /* update the list cells to point to the right elements */ + for (i = 0; i < state->mutatorNColumns; i++) + lfirst(state->elemCells[i]) = + (void *) state->elems[state->positions[i] - 1]; + + return state->elemsList; +} + +/* + * Cost of comparing pathkeys. + */ +typedef struct PathkeySortCost +{ + Cost cost; + PathKey *pathkey; +} PathkeySortCost; + +static int +pathkey_sort_cost_comparator(const void *_a, const void *_b) +{ + const PathkeySortCost *a = (PathkeySortCost *) _a; + const PathkeySortCost *b = (PathkeySortCost *) _b; + + if (a->cost < b->cost) + return -1; + else if (a->cost == b->cost) + return 0; + return 1; +} + +/* + * get_cheapest_group_keys_order + * Reorders the group pathkeys / clauses to minimize the comparison cost. + * + * Given the list of pathkeys in '*group_pathkeys', we try to arrange these + * in an order that minimizes the sort costs that will be incurred by the + * GROUP BY. The costs mainly depend on the cost of the sort comparator + * function(s) and the number of distinct values in each column of the GROUP + * BY clause (*group_clauses). Sorting on subsequent columns is only required + * for tiebreak situations where two values sort equally. + * + * In case the input is partially sorted, only the remaining pathkeys are + * considered. 'n_preordered' denotes how many of the leading *group_pathkeys + * the input is presorted by. + * + * Returns true and sets *group_pathkeys and *group_clauses to the newly + * ordered versions of the lists that were passed in via these parameters. + * If no reordering was deemed necessary then we return false, in which case + * the *group_pathkeys and *group_clauses lists are left untouched. The + * original *group_pathkeys and *group_clauses parameter values are never + * destructively modified in place. + */ +static bool +get_cheapest_group_keys_order(PlannerInfo *root, double nrows, + List **group_pathkeys, List **group_clauses, + int n_preordered) +{ + List *new_group_pathkeys = NIL; + List *new_group_clauses = NIL; + List *var_group_pathkeys; + ListCell *cell; + PathkeyMutatorState mstate; + double cheapest_sort_cost = DBL_MAX; + int nFreeKeys; + int nToPermute; + + /* If there are less than 2 unsorted pathkeys, we're done. */ + if (root->num_groupby_pathkeys - n_preordered < 2) + return false; + + /* + * We could exhaustively cost all possible orderings of the pathkeys, but + * for a large number of pathkeys it might be prohibitively expensive. So + * we try to apply simple cheap heuristics first - we sort the pathkeys by + * sort cost (as if the pathkey was sorted independently) and then check + * only the four cheapest pathkeys. The remaining pathkeys are kept + * ordered by cost. + * + * XXX This is a very simple heuristics, but likely to work fine for most + * cases (because the number of GROUP BY clauses tends to be lower than + * 4). But it ignores how the number of distinct values in each pathkey + * affects the following steps. It might be better to use "more expensive" + * pathkey first if it has many distinct values, because it then limits + * the number of comparisons for the remaining pathkeys. But evaluating + * that is likely quite the expensive. + */ + nFreeKeys = root->num_groupby_pathkeys - n_preordered; + nToPermute = 4; + if (nFreeKeys > nToPermute) + { + PathkeySortCost *costs = palloc(sizeof(PathkeySortCost) * nFreeKeys); + PathkeySortCost *cost = costs; + int i = 0; + + /* + * Estimate cost for sorting individual pathkeys skipping the + * pre-ordered pathkeys. + */ + for_each_from(cell, *group_pathkeys, n_preordered) + { + PathKey *pathkey = (PathKey *) lfirst(cell); + List *to_cost; + + if (foreach_current_index(cell) >= root->num_groupby_pathkeys) + break; + + i++; + to_cost = list_make1(pathkey); + cost->pathkey = pathkey; + cost->cost = cost_sort_estimate(root, to_cost, 0, nrows); + cost++; + + list_free(to_cost); + } + + Assert(i == nFreeKeys); + /* sort the pathkeys by sort cost in ascending order */ + qsort(costs, nFreeKeys, sizeof(*costs), pathkey_sort_cost_comparator); + + /* + * Rebuild the list of pathkeys - first the preordered ones, then the + * rest ordered by cost. + */ + new_group_pathkeys = list_copy_head(*group_pathkeys, n_preordered); + + for (int i = 0; i < nFreeKeys; i++) + new_group_pathkeys = lappend(new_group_pathkeys, costs[i].pathkey); + + new_group_pathkeys = list_copy_tail(*group_pathkeys, + root->num_groupby_pathkeys); + pfree(costs); + } + else + { + /* Copy the list, so that we can free the new list by list_free. */ + new_group_pathkeys = list_copy(*group_pathkeys); + nToPermute = nFreeKeys; + } + + Assert(list_length(new_group_pathkeys) == list_length(*group_pathkeys)); + + /* + * Generate pathkey lists with permutations of the first nToPermute + * pathkeys. + * + * XXX We simply calculate sort cost for each individual pathkey list, but + * there's room for two dynamic programming optimizations here. Firstly, + * we may pass the current "best" cost to cost_sort_estimate so that it + * can "abort" if the estimated pathkeys list exceeds it. Secondly, it + * could pass the return information about the position when it exceeded + * the cost, and we could skip all permutations with the same prefix. + * + * Imagine we've already found ordering with cost C1, and we're evaluating + * another ordering - cost_sort_estimate() calculates cost by adding the + * pathkeys one by one (more or less), and the cost only grows. If at any + * point it exceeds C1, it can't possibly be "better" so we can discard + * it. But we also know that we can discard all ordering with the same + * prefix, because if we're estimating (a,b,c,d) and we exceed C1 at (a,b) + * then the same thing will happen for any ordering with this prefix. + */ + PathkeyMutatorInit(&mstate, new_group_pathkeys, n_preordered, + n_preordered + nToPermute); + + while ((var_group_pathkeys = PathkeyMutatorNext(&mstate)) != NIL) + { + Cost cost; + + cost = cost_sort_estimate(root, var_group_pathkeys, n_preordered, nrows); + + if (cost < cheapest_sort_cost) + { + list_free(new_group_pathkeys); + new_group_pathkeys = list_copy(var_group_pathkeys); + cheapest_sort_cost = cost; + } + } + + Assert(list_length(new_group_pathkeys) == list_length(root->group_pathkeys)); + + /* Reorder the group clauses according to the reordered pathkeys. */ + foreach(cell, new_group_pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(cell); + + if (foreach_current_index(cell) >= root->num_groupby_pathkeys) + break; + + new_group_clauses = + lappend(new_group_clauses, + get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref, + *group_clauses)); + } + + /* Just append the rest GROUP BY clauses */ + new_group_clauses = list_concat_unique_ptr(new_group_clauses, + *group_clauses); + *group_pathkeys = new_group_pathkeys; + *group_clauses = new_group_clauses; + Assert(list_length(new_group_clauses) == list_length(root->processed_groupClause)); + + return true; +} + +/* + * get_useful_group_keys_orderings + * Determine which orderings of GROUP BY keys are potentially interesting. + * + * Returns list of PathKeyInfo items, each representing an interesting ordering + * of GROUP BY keys. Each item stores pathkeys and clauses in matching order. + * + * The function considers (and keeps) multiple group by orderings: + * + * - the original ordering, as specified by the GROUP BY clause + * + * - GROUP BY keys reordered to minimize the sort cost + * + * - GROUP BY keys reordered to match path ordering (as much as possible), with + * the tail reordered to minimize the sort cost + * + * - GROUP BY keys to match target ORDER BY clause (as much as possible), with + * the tail reordered to minimize the sort cost + * + * There are other potentially interesting orderings (e.g. it might be best to + * match the first ORDER BY key, order the remaining keys differently and then + * rely on the incremental sort to fix this), but we ignore those for now. To + * make this work we'd have to pretty much generate all possible permutations. + */ +List * +get_useful_group_keys_orderings(PlannerInfo *root, double nrows, + List *path_pathkeys) +{ + Query *parse = root->parse; + List *infos = NIL; + PathKeyInfo *info; + int n_preordered = 0; + + List *pathkeys = root->group_pathkeys; + List *clauses = root->processed_groupClause; + + /* always return at least the original pathkeys/clauses */ + info = makeNode(PathKeyInfo); + info->pathkeys = pathkeys; + info->clauses = clauses; + infos = lappend(infos, info); + + /* + * Should we try generating alternative orderings of the group keys? If + * not, we produce only the order specified in the query, i.e. the + * optimization is effectively disabled. + */ + if (!enable_group_by_reordering) + return infos; + + /* for grouping sets we can't do any reordering */ + if (parse->groupingSets) + return infos; + + /* + * Try reordering pathkeys to minimize the sort cost, ignoring both the + * target ordering (ORDER BY) and ordering of the input path. + */ + if (get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses, + n_preordered)) + { + Assert(list_length(pathkeys) == list_length(clauses)); + + info = makeNode(PathKeyInfo); + info->pathkeys = pathkeys; + info->clauses = clauses; + + infos = lappend(infos, info); + } + + /* + * If the path is sorted in some way, try reordering the group keys to + * match as much of the ordering as possible - we get this sort for free + * (mostly). + * + * We must not do this when there are no grouping sets, because those use + * more complex logic to decide the ordering. + * + * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's + * more a complement, because it allows benefiting from incremental sort + * as much as possible. + * + * XXX This does nothing if (n_preordered == 0). We shouldn't create the + * info in this case. + */ + if (path_pathkeys) + { + n_preordered = group_keys_reorder_by_pathkeys(path_pathkeys, + &pathkeys, + &clauses, + root->num_groupby_pathkeys); + + /* reorder the tail to minimize sort cost */ + get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses, + n_preordered); + + /* + * reorder the tail to minimize sort cost + * + * XXX Ignore the return value - there may be nothing to reorder, in + * which case get_cheapest_group_keys_order returns false. But we + * still want to keep the keys reordered to path_pathkeys. + */ + info = makeNode(PathKeyInfo); + info->pathkeys = pathkeys; + info->clauses = clauses; + + infos = lappend(infos, info); + } + + /* + * Try reordering pathkeys to minimize the sort cost (this time consider + * the ORDER BY clause, but only if set debug_group_by_match_order_by). + */ + if (root->sort_pathkeys) + { + n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys, + &pathkeys, + &clauses, + root->num_groupby_pathkeys); + + /* + * reorder the tail to minimize sort cost + * + * XXX Ignore the return value - there may be nothing to reorder, in + * which case get_cheapest_group_keys_order returns false. But we + * still want to keep the keys reordered to sort_pathkeys. + */ + get_cheapest_group_keys_order(root, nrows, &pathkeys, &clauses, + n_preordered); + + /* keep the group keys reordered to match ordering of input path */ + info = makeNode(PathKeyInfo); + info->pathkeys = pathkeys; + info->clauses = clauses; + + infos = lappend(infos, info); + } + + return infos; +} + /* * pathkeys_count_contained_in * Same as pathkeys_contained_in, but also sets length of longest @@ -1939,6 +2488,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys) return n_common_pathkeys; } +/* + * pathkeys_useful_for_grouping + * Count the number of pathkeys that are useful for grouping (instead of + * explicit sort) + * + * Group pathkeys could be reordered to benefit from the ordering. The + * ordering may not be "complete" and may require incremental sort, but that's + * fine. So we simply count prefix pathkeys with a matching group key, and + * stop once we find the first pathkey without a match. + * + * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b) + * pathkeys are useful for grouping, and we might do incremental sort to get + * path ordered by (a,b,e). + * + * This logic is necessary to retain paths with ordering not matching grouping + * keys directly, without the reordering. + * + * Returns the length of pathkey prefix with matching group keys. + */ +static int +pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys) +{ + ListCell *key; + int n = 0; + + /* no special ordering requested for grouping */ + if (root->group_pathkeys == NIL) + return 0; + + /* unordered path */ + if (pathkeys == NIL) + return 0; + + /* walk the pathkeys and search for matching group key */ + foreach(key, pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(key); + + /* no matching group key, we're done */ + if (!list_member_ptr(root->group_pathkeys, pathkey)) + break; + + n++; + } + + return n; +} + /* * truncate_useless_pathkeys * Shorten the given pathkey list to just the useful pathkeys. @@ -1953,6 +2550,9 @@ truncate_useless_pathkeys(PlannerInfo *root, nuseful = pathkeys_useful_for_merging(root, rel, pathkeys); nuseful2 = pathkeys_useful_for_ordering(root, pathkeys); + if (nuseful2 > nuseful) + nuseful = nuseful2; + nuseful2 = pathkeys_useful_for_grouping(root, pathkeys); if (nuseful2 > nuseful) nuseful = nuseful2; @@ -1988,6 +2588,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel) { if (rel->joininfo != NIL || rel->has_eclass_joins) return true; /* might be able to use pathkeys for merging */ + if (root->group_pathkeys != NIL) + return true; /* might be able to use pathkeys for grouping */ if (root->query_pathkeys != NIL) return true; /* might be able to use them for ordering */ return false; /* definitely useless */ diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 44efb1f4eb..4a7c9d9440 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -2810,8 +2810,9 @@ remove_useless_groupby_columns(PlannerInfo *root) * * In principle it might be interesting to consider other orderings of the * GROUP BY elements, which could match the sort ordering of other - * possible plans (eg an indexscan) and thereby reduce cost. We don't - * bother with that, though. Hashed grouping will frequently win anyway. + * possible plans (eg an indexscan) and thereby reduce cost. However, we + * don't yet have sufficient information to do that here, so that's left until + * later in planning. See get_useful_group_keys_orderings(). * * Note: we need no comparable processing of the distinctClause because * the parser already enforced that that matches ORDER BY. @@ -3468,6 +3469,7 @@ standard_qp_callback(PlannerInfo *root, void *extra) tlist, true, &sortable); + if (!sortable) { /* Can't sort; no point in considering aggregate ordering either */ @@ -6842,90 +6844,108 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, */ foreach(lc, input_rel->pathlist) { + ListCell *lc2; Path *path = (Path *) lfirst(lc); - bool is_sorted; - int presorted_keys; + Path *path_save = path; + List *pathkey_orderings = NIL; - is_sorted = pathkeys_count_contained_in(root->group_pathkeys, - path->pathkeys, - &presorted_keys); + /* generate alternative group orderings that might be useful */ + pathkey_orderings = get_useful_group_keys_orderings(root, + path->rows, + path->pathkeys); + + Assert(list_length(pathkey_orderings) > 0); - if (!is_sorted) + foreach(lc2, pathkey_orderings) { - /* - * Try at least sorting the cheapest path and also try - * incrementally sorting any path which is partially sorted - * already (no need to deal with paths which have presorted - * keys when incremental sort is disabled unless it's the - * cheapest input path). - */ - if (path != cheapest_path && - (presorted_keys == 0 || !enable_incremental_sort)) - continue; + bool is_sorted; + int presorted_keys; + PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); - /* - * We've no need to consider both a sort and incremental sort. - * We'll just do a sort if there are no presorted keys and an - * incremental sort when there are presorted keys. - */ - if (presorted_keys == 0 || !enable_incremental_sort) - path = (Path *) create_sort_path(root, - grouped_rel, - path, - root->group_pathkeys, - -1.0); - else - path = (Path *) create_incremental_sort_path(root, - grouped_rel, - path, - root->group_pathkeys, - presorted_keys, - -1.0); - } + /* restore the path (we replace it in the loop) */ + path = path_save; - /* Now decide what to stick atop it */ - if (parse->groupingSets) - { - consider_groupingsets_paths(root, grouped_rel, - path, true, can_hash, - gd, agg_costs, dNumGroups); - } - else if (parse->hasAggs) - { - /* - * We have aggregation, possibly with plain GROUP BY. Make an - * AggPath. - */ - add_path(grouped_rel, (Path *) + is_sorted = pathkeys_count_contained_in(info->pathkeys, + path->pathkeys, + &presorted_keys); + + if (!is_sorted) + { + /* + * Try at least sorting the cheapest path and also try + * incrementally sorting any path which is partially sorted + * already (no need to deal with paths which have presorted + * keys when incremental sort is disabled unless it's the + * cheapest input path). + */ + if (path != cheapest_path && + (presorted_keys == 0 || !enable_incremental_sort)) + continue; + + /* + * We've no need to consider both a sort and incremental sort. + * We'll just do a sort if there are no presorted keys and an + * incremental sort when there are presorted keys. + */ + if (presorted_keys == 0 || !enable_incremental_sort) + path = (Path *) create_sort_path(root, + grouped_rel, + path, + info->pathkeys, + -1.0); + else + path = (Path *) create_incremental_sort_path(root, + grouped_rel, + path, + info->pathkeys, + presorted_keys, + -1.0); + } + + /* Now decide what to stick atop it */ + if (parse->groupingSets) + { + consider_groupingsets_paths(root, grouped_rel, + path, true, can_hash, + gd, agg_costs, dNumGroups); + } + else if (parse->hasAggs) + { + /* + * We have aggregation, possibly with plain GROUP BY. Make an + * AggPath. + */ + add_path(grouped_rel, (Path *) create_agg_path(root, grouped_rel, path, grouped_rel->reltarget, parse->groupClause ? AGG_SORTED : AGG_PLAIN, AGGSPLIT_SIMPLE, - root->processed_groupClause, + info->clauses, havingQual, agg_costs, dNumGroups)); - } - else if (parse->groupClause) - { - /* - * We have GROUP BY without aggregation or grouping sets. Make - * a GroupPath. - */ - add_path(grouped_rel, (Path *) + } + else if (parse->groupClause) + { + /* + * We have GROUP BY without aggregation or grouping sets. Make + * a GroupPath. + */ + add_path(grouped_rel, (Path *) create_group_path(root, grouped_rel, path, - root->processed_groupClause, + info->clauses, havingQual, dNumGroups)); - } - else - { - /* Other cases should have been handled above */ - Assert(false); + } + else + { + /* Other cases should have been handled above */ + Assert(false); + } } } @@ -6937,69 +6957,84 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, { foreach(lc, partially_grouped_rel->pathlist) { + ListCell *lc2; Path *path = (Path *) lfirst(lc); - bool is_sorted; - int presorted_keys; + List *pathkey_orderings = NIL; - is_sorted = pathkeys_count_contained_in(root->group_pathkeys, - path->pathkeys, - &presorted_keys); + /* generate alternative group orderings that might be useful */ + pathkey_orderings = get_useful_group_keys_orderings(root, + path->rows, + path->pathkeys); - if (!is_sorted) + Assert(list_length(pathkey_orderings) > 0); + + /* process all potentially interesting grouping reorderings */ + foreach(lc2, pathkey_orderings) { - /* - * Try at least sorting the cheapest path and also try - * incrementally sorting any path which is partially - * sorted already (no need to deal with paths which have - * presorted keys when incremental sort is disabled unless - * it's the cheapest input path). - */ - if (path != partially_grouped_rel->cheapest_total_path && - (presorted_keys == 0 || !enable_incremental_sort)) - continue; + bool is_sorted; + int presorted_keys; + PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); - /* - * We've no need to consider both a sort and incremental - * sort. We'll just do a sort if there are no pre-sorted - * keys and an incremental sort when there are presorted - * keys. - */ - if (presorted_keys == 0 || !enable_incremental_sort) - path = (Path *) create_sort_path(root, - grouped_rel, - path, - root->group_pathkeys, - -1.0); - else - path = (Path *) create_incremental_sort_path(root, - grouped_rel, - path, - root->group_pathkeys, - presorted_keys, - -1.0); - } + is_sorted = pathkeys_count_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); - if (parse->hasAggs) - add_path(grouped_rel, (Path *) + if (!is_sorted) + { + /* + * Try at least sorting the cheapest path and also try + * incrementally sorting any path which is partially + * sorted already (no need to deal with paths which have + * presorted keys when incremental sort is disabled unless + * it's the cheapest input path). + */ + if (path != partially_grouped_rel->cheapest_total_path && + (presorted_keys == 0 || !enable_incremental_sort)) + continue; + + /* + * We've no need to consider both a sort and incremental + * sort. We'll just do a sort if there are no pre-sorted + * keys and an incremental sort when there are presorted + * keys. + */ + if (presorted_keys == 0 || !enable_incremental_sort) + path = (Path *) create_sort_path(root, + grouped_rel, + path, + info->pathkeys, + -1.0); + else + path = (Path *) create_incremental_sort_path(root, + grouped_rel, + path, + info->pathkeys, + presorted_keys, + -1.0); + } + + if (parse->hasAggs) + add_path(grouped_rel, (Path *) create_agg_path(root, grouped_rel, path, grouped_rel->reltarget, parse->groupClause ? AGG_SORTED : AGG_PLAIN, AGGSPLIT_FINAL_DESERIAL, - root->processed_groupClause, + info->clauses, havingQual, agg_final_costs, dNumGroups)); - else - add_path(grouped_rel, (Path *) + else + add_path(grouped_rel, (Path *) create_group_path(root, grouped_rel, path, - root->processed_groupClause, + info->clauses, havingQual, dNumGroups)); + } } } } @@ -7202,66 +7237,85 @@ create_partial_grouping_paths(PlannerInfo *root, */ foreach(lc, input_rel->pathlist) { + ListCell *lc2; Path *path = (Path *) lfirst(lc); - bool is_sorted; - int presorted_keys; + Path *path_save = path; + List *pathkey_orderings = NIL; - is_sorted = pathkeys_count_contained_in(root->group_pathkeys, - path->pathkeys, - &presorted_keys); - if (!is_sorted) + /* generate alternative group orderings that might be useful */ + pathkey_orderings = get_useful_group_keys_orderings(root, + path->rows, + path->pathkeys); + + Assert(list_length(pathkey_orderings) > 0); + + /* process all potentially interesting grouping reorderings */ + foreach(lc2, pathkey_orderings) { - /* - * Try at least sorting the cheapest path and also try - * incrementally sorting any path which is partially sorted - * already (no need to deal with paths which have presorted - * keys when incremental sort is disabled unless it's the - * cheapest input path). - */ - if (path != cheapest_total_path && - (presorted_keys == 0 || !enable_incremental_sort)) - continue; + bool is_sorted; + int presorted_keys; + PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); - /* - * We've no need to consider both a sort and incremental sort. - * We'll just do a sort if there are no presorted keys and an - * incremental sort when there are presorted keys. - */ - if (presorted_keys == 0 || !enable_incremental_sort) - path = (Path *) create_sort_path(root, - partially_grouped_rel, - path, - root->group_pathkeys, - -1.0); - else - path = (Path *) create_incremental_sort_path(root, - partially_grouped_rel, - path, - root->group_pathkeys, - presorted_keys, - -1.0); - } + /* restore the path (we replace it in the loop) */ + path = path_save; - if (parse->hasAggs) - add_path(partially_grouped_rel, (Path *) + is_sorted = pathkeys_count_contained_in(info->pathkeys, + path->pathkeys, + &presorted_keys); + if (!is_sorted) + { + /* + * Try at least sorting the cheapest path and also try + * incrementally sorting any path which is partially sorted + * already (no need to deal with paths which have presorted + * keys when incremental sort is disabled unless it's the + * cheapest input path). + */ + if (path != cheapest_total_path && + (presorted_keys == 0 || !enable_incremental_sort)) + continue; + + /* + * We've no need to consider both a sort and incremental sort. + * We'll just do a sort if there are no presorted keys and an + * incremental sort when there are presorted keys. + */ + if (presorted_keys == 0 || !enable_incremental_sort) + path = (Path *) create_sort_path(root, + partially_grouped_rel, + path, + info->pathkeys, + -1.0); + else + path = (Path *) create_incremental_sort_path(root, + partially_grouped_rel, + path, + info->pathkeys, + presorted_keys, + -1.0); + } + + if (parse->hasAggs) + add_path(partially_grouped_rel, (Path *) create_agg_path(root, partially_grouped_rel, path, partially_grouped_rel->reltarget, parse->groupClause ? AGG_SORTED : AGG_PLAIN, AGGSPLIT_INITIAL_SERIAL, - root->processed_groupClause, + info->clauses, NIL, agg_partial_costs, dNumPartialGroups)); - else - add_path(partially_grouped_rel, (Path *) - create_group_path(root, - partially_grouped_rel, - path, - root->processed_groupClause, - NIL, - dNumPartialGroups)); + else + add_path(partially_grouped_rel, (Path *) + create_group_path(root, + partially_grouped_rel, + path, + info->clauses, + NIL, + dNumPartialGroups)); + } } } @@ -7270,67 +7324,86 @@ create_partial_grouping_paths(PlannerInfo *root, /* Similar to above logic, but for partial paths. */ foreach(lc, input_rel->partial_pathlist) { + ListCell *lc2; Path *path = (Path *) lfirst(lc); - bool is_sorted; - int presorted_keys; + Path *path_save = path; + List *pathkey_orderings = NIL; - is_sorted = pathkeys_count_contained_in(root->group_pathkeys, - path->pathkeys, - &presorted_keys); + /* generate alternative group orderings that might be useful */ + pathkey_orderings = get_useful_group_keys_orderings(root, + path->rows, + path->pathkeys); + + Assert(list_length(pathkey_orderings) > 0); - if (!is_sorted) + /* process all potentially interesting grouping reorderings */ + foreach(lc2, pathkey_orderings) { - /* - * Try at least sorting the cheapest path and also try - * incrementally sorting any path which is partially sorted - * already (no need to deal with paths which have presorted - * keys when incremental sort is disabled unless it's the - * cheapest input path). - */ - if (path != cheapest_partial_path && - (presorted_keys == 0 || !enable_incremental_sort)) - continue; + bool is_sorted; + int presorted_keys; + PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2); - /* - * We've no need to consider both a sort and incremental sort. - * We'll just do a sort if there are no presorted keys and an - * incremental sort when there are presorted keys. - */ - if (presorted_keys == 0 || !enable_incremental_sort) - path = (Path *) create_sort_path(root, - partially_grouped_rel, - path, - root->group_pathkeys, - -1.0); - else - path = (Path *) create_incremental_sort_path(root, - partially_grouped_rel, - path, - root->group_pathkeys, - presorted_keys, - -1.0); - } + /* restore the path (we replace it in the loop) */ + path = path_save; - if (parse->hasAggs) - add_partial_path(partially_grouped_rel, (Path *) + is_sorted = pathkeys_count_contained_in(info->pathkeys, + path->pathkeys, + &presorted_keys); + + if (!is_sorted) + { + /* + * Try at least sorting the cheapest path and also try + * incrementally sorting any path which is partially sorted + * already (no need to deal with paths which have presorted + * keys when incremental sort is disabled unless it's the + * cheapest input path). + */ + if (path != cheapest_partial_path && + (presorted_keys == 0 || !enable_incremental_sort)) + continue; + + /* + * We've no need to consider both a sort and incremental sort. + * We'll just do a sort if there are no presorted keys and an + * incremental sort when there are presorted keys. + */ + if (presorted_keys == 0 || !enable_incremental_sort) + path = (Path *) create_sort_path(root, + partially_grouped_rel, + path, + info->pathkeys, + -1.0); + else + path = (Path *) create_incremental_sort_path(root, + partially_grouped_rel, + path, + info->pathkeys, + presorted_keys, + -1.0); + } + + if (parse->hasAggs) + add_partial_path(partially_grouped_rel, (Path *) create_agg_path(root, partially_grouped_rel, path, partially_grouped_rel->reltarget, parse->groupClause ? AGG_SORTED : AGG_PLAIN, AGGSPLIT_INITIAL_SERIAL, - root->processed_groupClause, + info->clauses, NIL, agg_partial_costs, dNumPartialPartialGroups)); - else - add_partial_path(partially_grouped_rel, (Path *) + else + add_partial_path(partially_grouped_rel, (Path *) create_group_path(root, partially_grouped_rel, path, - root->processed_groupClause, + info->clauses, NIL, dNumPartialPartialGroups)); + } } } @@ -7444,6 +7517,8 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel) * We can also skip the entire loop when we only have a single-item * group_pathkeys because then we can't possibly have a presorted prefix * of the list without having the list be fully sorted. + * + * XXX Shouldn't this also consider the group-key-reordering? */ if (!enable_incremental_sort || list_length(root->group_pathkeys) == 1) return; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 211ba65389..7b8d0cd294 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1346,12 +1346,12 @@ create_append_path(PlannerInfo *root, pathnode->path.total_cost = child->total_cost; } else - cost_append(pathnode); + cost_append(pathnode, root); /* Must do this last, else cost_append complains */ pathnode->path.pathkeys = child->pathkeys; } else - cost_append(pathnode); + cost_append(pathnode, root); /* If the caller provided a row estimate, override the computed value. */ if (rows >= 0) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index c4fcd0076e..cb7e67099e 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -3386,11 +3386,28 @@ double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, List **pgset, EstimationInfo *estinfo) { - List *varinfos = NIL; + return estimate_num_groups_incremental(root, groupExprs, + input_rows, pgset, estinfo, + NULL, 0); +} + +/* + * estimate_num_groups_incremental + * An estimate_num_groups variant, optimized for cases that are adding the + * expressions incrementally (e.g. one by one). + */ +double +estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs, + double input_rows, + List **pgset, EstimationInfo *estinfo, + List **cache_varinfos, int prevNExprs) +{ + List *varinfos = (cache_varinfos) ? *cache_varinfos : NIL; double srf_multiplier = 1.0; double numdistinct; ListCell *l; - int i; + int i, + j; /* Zero the estinfo output parameter, if non-NULL */ if (estinfo != NULL) @@ -3421,7 +3438,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, */ numdistinct = 1.0; - i = 0; + i = j = 0; foreach(l, groupExprs) { Node *groupexpr = (Node *) lfirst(l); @@ -3430,6 +3447,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, List *varshere; ListCell *l2; + /* was done on previous call */ + if (cache_varinfos && j++ < prevNExprs) + { + if (pgset) + i++; /* to keep in sync with lines below */ + continue; + } + /* is expression in this grouping set? */ if (pgset && !list_member_int(*pgset, i++)) continue; @@ -3499,7 +3524,11 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, if (varshere == NIL) { if (contain_volatile_functions(groupexpr)) + { + if (cache_varinfos) + *cache_varinfos = varinfos; return input_rows; + } continue; } @@ -3516,6 +3545,9 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, } } + if (cache_varinfos) + *cache_varinfos = varinfos; + /* * If now no Vars, we must have an all-constant or all-boolean GROUP BY * list. diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index bdb26e2b77..385f160ff8 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -1026,6 +1026,16 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { + {"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enables reordering of GROUP BY keys."), + NULL, + GUC_EXPLAIN + }, + &enable_group_by_reordering, + true, + NULL, NULL, NULL + }, { {"geqo", PGC_USERSET, QUERY_TUNING_GEQO, gettext_noop("Enables genetic query optimization."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 6bb39d39ba..751511745a 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -391,6 +391,7 @@ #enable_seqscan = on #enable_sort = on #enable_tidscan = on +#enable_group_by_reordering = on # - Planner Cost Constants - diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 5702fbba60..15ba2f8218 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -1456,6 +1456,16 @@ typedef struct PathKey bool pk_nulls_first; /* do NULLs come before normal values? */ } PathKey; +/* + * Combines information about pathkeys and the associated clauses. + */ +typedef struct PathKeyInfo +{ + NodeTag type; + List *pathkeys; + List *clauses; +} PathKeyInfo; + /* * VolatileFunctionStatus -- allows nodes to cache their * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index bee090ffc2..bbb1132473 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -116,7 +116,9 @@ extern void cost_incremental_sort(Path *path, Cost input_startup_cost, Cost input_total_cost, double input_tuples, int width, Cost comparison_cost, int sort_mem, double limit_tuples); -extern void cost_append(AppendPath *apath); +extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys, + int nPresortedKeys, double tuples); +extern void cost_append(AppendPath *path, PlannerInfo *root); extern void cost_merge_append(Path *path, PlannerInfo *root, List *pathkeys, int n_streams, Cost input_startup_cost, Cost input_total_cost, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 50bc3b503a..e4ff16d0c4 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -24,6 +24,7 @@ extern PGDLLIMPORT bool enable_geqo; extern PGDLLIMPORT int geqo_threshold; extern PGDLLIMPORT int min_parallel_table_scan_size; extern PGDLLIMPORT int min_parallel_index_scan_size; +extern PGDLLIMPORT bool enable_group_by_reordering; /* Hook for plugins to get control in set_rel_pathlist() */ typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root, @@ -205,6 +206,12 @@ typedef enum extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2); extern bool pathkeys_contained_in(List *keys1, List *keys2); extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common); +extern int group_keys_reorder_by_pathkeys(List *pathkeys, + List **group_pathkeys, + List **group_clauses, + int num_groupby_pathkeys); +extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows, + List *path_pathkeys); extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion, diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index 2f76c473db..2b21c06cc9 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -214,6 +214,11 @@ extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, List **pgset, EstimationInfo *estinfo); +extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs, + double input_rows, List **pgset, + EstimationInfo *estinfo, + List **cache_varinfos, int prevNExprs); + extern void estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets, Selectivity *mcv_freq, diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index d8271da4d1..205be28b3d 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1234,7 +1234,8 @@ explain (costs off) select distinct min(f1), max(f1) from minmaxtest; QUERY PLAN --------------------------------------------------------------------------------------------- - Unique + HashAggregate + Group Key: $0, $1 InitPlan 1 (returns $0) -> Limit -> Merge Append @@ -1257,10 +1258,8 @@ explain (costs off) -> Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8 Index Cond: (f1 IS NOT NULL) -> Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9 - -> Sort - Sort Key: ($0), ($1) - -> Result -(26 rows) + -> Result +(25 rows) select distinct min(f1), max(f1) from minmaxtest; min | max @@ -2715,6 +2714,241 @@ SELECT balk(hundred) FROM tenk1; (1 row) ROLLBACK; +-- GROUP BY optimization by reorder columns +SELECT + i AS id, + i/2 AS p, + format('%60s', i%2) AS v, + i/4 AS c, + i/8 AS d, + (random() * (10000/8))::int as e --the same as d but no correlation with p + INTO btg +FROM + generate_series(1, 10000) i; +VACUUM btg; +ANALYZE btg; +-- GROUP BY optimization by reorder columns by frequency +SET enable_hashagg=off; +SET max_parallel_workers= 0; +SET max_parallel_workers_per_gather = 0; +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, v; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: p, v + -> Sort + Sort Key: p, v + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: p, v + -> Sort + Sort Key: p, v + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: p, c, v + -> Sort + Sort Key: p, c, v + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: v, p, c + -> Sort + Sort Key: v, p, c + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, d, c; + QUERY PLAN +------------------------------ + GroupAggregate + Group Key: p, d, v, c + -> Sort + Sort Key: p, d, v, c + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c; + QUERY PLAN +------------------------------ + GroupAggregate + Group Key: v, p, d, c + -> Sort + Sort Key: v, p, d, c + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c; + QUERY PLAN +------------------------------ + GroupAggregate + Group Key: p, v, d, c + -> Sort + Sort Key: p, v, d, c + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, d, e; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: p, d, e + -> Sort + Sort Key: p, d, e + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, e, d; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: p, e, d + -> Sort + Sort Key: p, e, d + -> Seq Scan on btg +(5 rows) + +CREATE STATISTICS btg_dep ON d, e, p FROM btg; +ANALYZE btg; +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, d, e; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: p, d, e + -> Sort + Sort Key: p, d, e + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, e, d; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: p, e, d + -> Sort + Sort Key: p, e, d + -> Seq Scan on btg +(5 rows) + +-- GROUP BY optimization by reorder columns by index scan +CREATE INDEX ON btg(p, v); +SET enable_seqscan=off; +SET enable_bitmapscan=off; +VACUUM btg; +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, v; + QUERY PLAN +------------------------------------------------ + GroupAggregate + Group Key: p, v + -> Index Only Scan using btg_p_v_idx on btg +(3 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v; + QUERY PLAN +------------------------------------------------ + GroupAggregate + Group Key: p, v + -> Index Only Scan using btg_p_v_idx on btg +(3 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p; + QUERY PLAN +------------------------------------------------ + GroupAggregate + Group Key: p, v + -> Index Only Scan using btg_p_v_idx on btg +(3 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v; + QUERY PLAN +------------------------------------------------ + GroupAggregate + Group Key: p, v + -> Index Only Scan using btg_p_v_idx on btg +(3 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c; + QUERY PLAN +------------------------------------------------- + GroupAggregate + Group Key: p, c, v + -> Incremental Sort + Sort Key: p, c, v + Presorted Key: p + -> Index Scan using btg_p_v_idx on btg +(6 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v; + QUERY PLAN +------------------------------------------------- + GroupAggregate + Group Key: p, v, c + -> Incremental Sort + Sort Key: p, v, c + Presorted Key: p, v + -> Index Scan using btg_p_v_idx on btg +(6 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, c, p, d; + QUERY PLAN +------------------------------------------------- + GroupAggregate + Group Key: p, c, v, d + -> Incremental Sort + Sort Key: p, c, v, d + Presorted Key: p + -> Index Scan using btg_p_v_idx on btg +(6 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v; + QUERY PLAN +------------------------------------------------- + GroupAggregate + Group Key: p, v, c, d + -> Incremental Sort + Sort Key: p, v, c, d + Presorted Key: p, v + -> Index Scan using btg_p_v_idx on btg +(6 rows) + +DROP TABLE btg; +RESET enable_hashagg; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; +RESET enable_seqscan; +RESET enable_bitmapscan; -- Secondly test the case of a parallel aggregate combiner function -- returning NULL. For that use normal transition function, but a -- combiner function returning NULL. diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out index 7fdb685313..b8a0968b06 100644 --- a/src/test/regress/expected/incremental_sort.out +++ b/src/test/regress/expected/incremental_sort.out @@ -1426,7 +1426,7 @@ set parallel_setup_cost = 0; set parallel_tuple_cost = 0; set max_parallel_workers_per_gather = 2; create table t (a int, b int, c int); -insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i); +insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i); create index on t (a); analyze t; set enable_incremental_sort = off; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 9b8638f286..27684bfce2 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -1984,8 +1984,8 @@ USING (name); ------+----+---- bb | 12 | 13 cc | 22 | 23 - dd | | 33 ee | 42 | + dd | | 33 (4 rows) -- Cases with non-nullable expressions in subquery results; @@ -2019,8 +2019,8 @@ NATURAL FULL JOIN ------+------+------+------+------ bb | 12 | 2 | 13 | 3 cc | 22 | 2 | 23 | 3 - dd | | | 33 | 3 ee | 42 | 2 | | + dd | | | 33 | 3 (4 rows) SELECT * FROM @@ -5645,18 +5645,20 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s explain (costs off) select d.* from d left join (select distinct * from b) s on d.a = s.id; - QUERY PLAN --------------------------------------- - Merge Right Join - Merge Cond: (b.id = d.a) - -> Unique - -> Sort - Sort Key: b.id, b.c_id - -> Seq Scan on b + QUERY PLAN +--------------------------------------------- + Merge Left Join + Merge Cond: (d.a = s.id) -> Sort Sort Key: d.a -> Seq Scan on d -(9 rows) + -> Sort + Sort Key: s.id + -> Subquery Scan on s + -> HashAggregate + Group Key: b.id, b.c_id + -> Seq Scan on b +(11 rows) -- join removal is not possible here explain (costs off) @@ -7597,44 +7599,39 @@ select * from j1 natural join j2; explain (verbose, costs off) select * from j1 inner join (select distinct id from j3) j3 on j1.id = j3.id; - QUERY PLAN ------------------------------------------ + QUERY PLAN +----------------------------------- Nested Loop Output: j1.id, j3.id Inner Unique: true Join Filter: (j1.id = j3.id) - -> Unique + -> HashAggregate Output: j3.id - -> Sort + Group Key: j3.id + -> Seq Scan on public.j3 Output: j3.id - Sort Key: j3.id - -> Seq Scan on public.j3 - Output: j3.id -> Seq Scan on public.j1 Output: j1.id -(13 rows) +(11 rows) -- ensure group by clause allows the inner to become unique explain (verbose, costs off) select * from j1 inner join (select id from j3 group by id) j3 on j1.id = j3.id; - QUERY PLAN ------------------------------------------ + QUERY PLAN +----------------------------------- Nested Loop Output: j1.id, j3.id Inner Unique: true Join Filter: (j1.id = j3.id) - -> Group + -> HashAggregate Output: j3.id Group Key: j3.id - -> Sort + -> Seq Scan on public.j3 Output: j3.id - Sort Key: j3.id - -> Seq Scan on public.j3 - Output: j3.id -> Seq Scan on public.j1 Output: j1.id -(14 rows) +(11 rows) drop table j1; drop table j2; diff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out index 133d42117c..f7c4cc4833 100644 --- a/src/test/regress/expected/merge.out +++ b/src/test/regress/expected/merge.out @@ -1459,18 +1459,15 @@ WHEN MATCHED AND t.a < 10 THEN explain_merge -------------------------------------------------------------------- Merge on ex_mtarget t (actual rows=0 loops=1) - -> Merge Join (actual rows=0 loops=1) - Merge Cond: (t.a = s.a) - -> Sort (actual rows=0 loops=1) - Sort Key: t.a - Sort Method: quicksort Memory: xxx + -> Hash Join (actual rows=0 loops=1) + Hash Cond: (s.a = t.a) + -> Seq Scan on ex_msource s (actual rows=1 loops=1) + -> Hash (actual rows=0 loops=1) + Buckets: xxx Batches: xxx Memory Usage: xxx -> Seq Scan on ex_mtarget t (actual rows=0 loops=1) Filter: (a < '-1000'::integer) Rows Removed by Filter: 54 - -> Sort (never executed) - Sort Key: s.a - -> Seq Scan on ex_msource s (never executed) -(12 rows) +(9 rows) DROP TABLE ex_msource, ex_mtarget; DROP FUNCTION explain_merge(text); diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out index 1b900fddf8..800114dfde 100644 --- a/src/test/regress/expected/partition_aggregate.out +++ b/src/test/regress/expected/partition_aggregate.out @@ -948,12 +948,12 @@ SET parallel_setup_cost = 0; -- is not partial agg safe. EXPLAIN (COSTS OFF) SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; - QUERY PLAN --------------------------------------------------------------------------------------- - Sort - Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c)) - -> Gather - Workers Planned: 2 + QUERY PLAN +-------------------------------------------------------------------------------------------- + Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c)) -> Parallel Append -> GroupAggregate Group Key: pagg_tab_ml.a @@ -1380,28 +1380,26 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < -- When GROUP BY clause does not match; partial aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; - QUERY PLAN -------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x)) - -> Finalize GroupAggregate + -> Finalize HashAggregate Group Key: pagg_tab_para.y Filter: (avg(pagg_tab_para.x) < '12'::numeric) - -> Gather Merge + -> Gather Workers Planned: 2 - -> Sort - Sort Key: pagg_tab_para.y - -> Parallel Append - -> Partial HashAggregate - Group Key: pagg_tab_para.y - -> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para - -> Partial HashAggregate - Group Key: pagg_tab_para_1.y - -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 - -> Partial HashAggregate - Group Key: pagg_tab_para_2.y - -> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 -(19 rows) + -> Parallel Append + -> Partial HashAggregate + Group Key: pagg_tab_para.y + -> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para + -> Partial HashAggregate + Group Key: pagg_tab_para_1.y + -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 + -> Partial HashAggregate + Group Key: pagg_tab_para_2.y + -> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 +(17 rows) SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; y | sum | avg | count diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index 6560fe2416..680e82cfc5 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -512,52 +512,41 @@ EXPLAIN (COSTS OFF) SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) WHERE a BETWEEN 490 AND 510 GROUP BY 1, 2 ORDER BY 1, 2; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------ + QUERY PLAN +----------------------------------------------------------------------------------------------------------- Group Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b)) - -> Merge Append + -> Sort Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b)) - -> Group - Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b)) - -> Sort - Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b)) - -> Merge Full Join - Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b)) - Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510)) - -> Sort - Sort Key: prt1.a, prt1.b - -> Seq Scan on prt1_p1 prt1 - -> Sort - Sort Key: p2.a, p2.b - -> Seq Scan on prt2_p1 p2 - -> Group - Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b)) - -> Sort - Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b)) - -> Merge Full Join - Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b)) - Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510)) - -> Sort - Sort Key: prt1_1.a, prt1_1.b - -> Seq Scan on prt1_p2 prt1_1 - -> Sort - Sort Key: p2_1.a, p2_1.b - -> Seq Scan on prt2_p2 p2_1 - -> Group - Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b)) - -> Sort - Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b)) - -> Merge Full Join - Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b)) - Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510)) - -> Sort - Sort Key: prt1_2.a, prt1_2.b - -> Seq Scan on prt1_p3 prt1_2 - -> Sort - Sort Key: p2_2.a, p2_2.b - -> Seq Scan on prt2_p3 p2_2 -(43 rows) + -> Append + -> Merge Full Join + Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b)) + Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510)) + -> Sort + Sort Key: prt1_1.a, prt1_1.b + -> Seq Scan on prt1_p1 prt1_1 + -> Sort + Sort Key: p2_1.a, p2_1.b + -> Seq Scan on prt2_p1 p2_1 + -> Merge Full Join + Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b)) + Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510)) + -> Sort + Sort Key: prt1_2.a, prt1_2.b + -> Seq Scan on prt1_p2 prt1_2 + -> Sort + Sort Key: p2_2.a, p2_2.b + -> Seq Scan on prt2_p2 p2_2 + -> Merge Full Join + Merge Cond: ((prt1_3.a = p2_3.a) AND (prt1_3.b = p2_3.b)) + Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510)) + -> Sort + Sort Key: prt1_3.a, prt1_3.b + -> Seq Scan on prt1_p3 prt1_3 + -> Sort + Sort Key: p2_3.a, p2_3.b + -> Seq Scan on prt2_p3 p2_3 +(32 rows) SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) WHERE a BETWEEN 490 AND 510 diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index aae5d51e1c..487fe26906 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -114,6 +114,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_async_append | on enable_bitmapscan | on enable_gathermerge | on + enable_group_by_reordering | on enable_hashagg | on enable_hashjoin | on enable_incremental_sort | on @@ -132,7 +133,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_seqscan | on enable_sort | on enable_tidscan | on -(21 rows) +(22 rows) -- There are always wait event descriptions for various types. select type, count(*) > 0 as ok FROM pg_wait_events diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out index e2613d6777..220f0c1629 100644 --- a/src/test/regress/expected/union.out +++ b/src/test/regress/expected/union.out @@ -1303,24 +1303,22 @@ select distinct q1 from union all select distinct * from int8_tbl i82) ss where q2 = q2; - QUERY PLAN ----------------------------------------------------------- - Unique - -> Merge Append - Sort Key: "*SELECT* 1".q1 + QUERY PLAN +---------------------------------------------------- + HashAggregate + Group Key: "*SELECT* 1".q1 + -> Append -> Subquery Scan on "*SELECT* 1" - -> Unique - -> Sort - Sort Key: i81.q1, i81.q2 - -> Seq Scan on int8_tbl i81 - Filter: (q2 IS NOT NULL) + -> HashAggregate + Group Key: i81.q1, i81.q2 + -> Seq Scan on int8_tbl i81 + Filter: (q2 IS NOT NULL) -> Subquery Scan on "*SELECT* 2" - -> Unique - -> Sort - Sort Key: i82.q1, i82.q2 - -> Seq Scan on int8_tbl i82 - Filter: (q2 IS NOT NULL) -(15 rows) + -> HashAggregate + Group Key: i82.q1, i82.q2 + -> Seq Scan on int8_tbl i82 + Filter: (q2 IS NOT NULL) +(13 rows) select distinct q1 from (select distinct * from int8_tbl i81 @@ -1339,24 +1337,22 @@ select distinct q1 from union all select distinct * from int8_tbl i82) ss where -q1 = q2; - QUERY PLAN --------------------------------------------------------- - Unique - -> Merge Append - Sort Key: "*SELECT* 1".q1 + QUERY PLAN +-------------------------------------------------- + HashAggregate + Group Key: "*SELECT* 1".q1 + -> Append -> Subquery Scan on "*SELECT* 1" - -> Unique - -> Sort - Sort Key: i81.q1, i81.q2 - -> Seq Scan on int8_tbl i81 - Filter: ((- q1) = q2) + -> HashAggregate + Group Key: i81.q1, i81.q2 + -> Seq Scan on int8_tbl i81 + Filter: ((- q1) = q2) -> Subquery Scan on "*SELECT* 2" - -> Unique - -> Sort - Sort Key: i82.q1, i82.q2 - -> Seq Scan on int8_tbl i82 - Filter: ((- q1) = q2) -(15 rows) + -> HashAggregate + Group Key: i82.q1, i82.q2 + -> Seq Scan on int8_tbl i82 + Filter: ((- q1) = q2) +(13 rows) select distinct q1 from (select distinct * from int8_tbl i81 diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 75c78be640..5c4e768825 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -1172,6 +1172,105 @@ SELECT balk(hundred) FROM tenk1; ROLLBACK; +-- GROUP BY optimization by reorder columns + +SELECT + i AS id, + i/2 AS p, + format('%60s', i%2) AS v, + i/4 AS c, + i/8 AS d, + (random() * (10000/8))::int as e --the same as d but no correlation with p + INTO btg +FROM + generate_series(1, 10000) i; + +VACUUM btg; +ANALYZE btg; + +-- GROUP BY optimization by reorder columns by frequency + +SET enable_hashagg=off; +SET max_parallel_workers= 0; +SET max_parallel_workers_per_gather = 0; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, v; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, d, c; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, d, e; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, e, d; + +CREATE STATISTICS btg_dep ON d, e, p FROM btg; +ANALYZE btg; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, d, e; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, e, d; + + +-- GROUP BY optimization by reorder columns by index scan + +CREATE INDEX ON btg(p, v); +SET enable_seqscan=off; +SET enable_bitmapscan=off; +VACUUM btg; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, v; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, c, p, d; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v; + +DROP TABLE btg; + +RESET enable_hashagg; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; +RESET enable_seqscan; +RESET enable_bitmapscan; + + -- Secondly test the case of a parallel aggregate combiner function -- returning NULL. For that use normal transition function, but a -- combiner function returning NULL. diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql index ab471bdfff..70f311a6f2 100644 --- a/src/test/regress/sql/incremental_sort.sql +++ b/src/test/regress/sql/incremental_sort.sql @@ -208,7 +208,7 @@ set parallel_tuple_cost = 0; set max_parallel_workers_per_gather = 2; create table t (a int, b int, c int); -insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i); +insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i); create index on t (a); analyze t; -- 2.42.0