diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 4f93afdebc..586f8e6e79 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -380,6 +380,157 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
return n;
}
+/*************************************************************/
+bool debug_group_by_match_order_by = true;
+bool debug_cheapest_group_by = true;
+/************************************************************/
+
+/*
+ * Order tail of list of group pathkeys by uniqueness descendetly. It allows to
+ * speedup sorting. Returns newly allocated lists, old ones stay untouched.
+ * n_preordered defines a head of list which order should be prevented.
+ */
+void
+get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
+ List *target_list,
+ List **group_pathkeys, List **group_clauses,
+ int n_preordered)
+{
+ struct
+ {
+ PathKey *pathkey;
+ SortGroupClause *sgc;
+ Node *pathkeyExpr;
+ }
+ *keys, tmp;
+ int nkeys = list_length(*group_pathkeys) - n_preordered;
+ List *pathkeyExprList = NIL,
+ *new_group_pathkeys = NIL,
+ *new_group_clauses = NIL;
+ ListCell *cell;
+ int i = 0, n_keys_to_est;
+
+ if (nkeys < 2)
+ return; /* nothing to do */
+
+ /*
+ * Will try to match ORDER BY pathkeys in hope that one sort is cheaper than
+ * two
+ */
+ if (debug_group_by_match_order_by &&
+ n_preordered == 0 && root->sort_pathkeys)
+ {
+ n_preordered = group_keys_reorder_by_pathkeys(root->sort_pathkeys,
+ group_pathkeys,
+ group_clauses);
+
+ nkeys = list_length(*group_pathkeys) - n_preordered;
+ if (nkeys < 2)
+ return; /* nothing to do */
+ }
+
+ if (!debug_cheapest_group_by)
+ return;
+
+ keys = palloc(nkeys * sizeof(*keys));
+
+ /*
+ * Collect information about pathkey for subsequent usage
+ */
+ for_each_cell(cell, list_nth_cell(*group_pathkeys, n_preordered))
+ {
+ PathKey *pathkey = (PathKey *) lfirst(cell);
+
+ keys[i].pathkey = pathkey;
+ keys[i].sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+ *group_clauses);
+ keys[i].pathkeyExpr = get_sortgroupclause_expr(keys[i].sgc,
+ target_list);
+ i++;
+ }
+
+ /*
+ * Find the cheapest to sort order of columns. We will find a first column
+ * with bigger number of group, then pair (first column in pair is already
+ * defined in first step), them triple and so on.
+ */
+ for(n_keys_to_est = 1; n_keys_to_est <= nkeys - 1; n_keys_to_est++)
+ {
+ ListCell *tail_cell;
+ int best_i = 0;
+ double best_est_num_groups = -1;
+
+ /* expand list of columns and remeber last cell */
+ pathkeyExprList = lappend(pathkeyExprList, NULL);
+ tail_cell = list_tail(pathkeyExprList);
+
+ /*
+ * Find the best last column - the best means bigger number of groups,
+ * previous columns are already choosen
+ */
+ for(i = n_keys_to_est - 1; i < nkeys; i++)
+ {
+ double est_num_groups;
+
+ lfirst(tail_cell) = keys[i].pathkeyExpr;
+ est_num_groups = estimate_num_groups(root, pathkeyExprList,
+ nrows, NULL);
+
+ if (est_num_groups > best_est_num_groups)
+ {
+ best_est_num_groups = est_num_groups;
+ best_i = i;
+ }
+ }
+
+ /* Save the best choice */
+ lfirst(tail_cell) = keys[best_i].pathkeyExpr;
+ if (best_i != n_keys_to_est - 1)
+ {
+ tmp = keys[n_keys_to_est - 1];
+ keys[n_keys_to_est - 1] = keys[best_i];
+ keys[best_i] = tmp;
+ }
+ }
+ list_free(pathkeyExprList);
+
+ /*
+ * Construct result lists, keys array is already ordered to get a cheapest
+ * sort
+ */
+ i = 0;
+ foreach(cell, *group_pathkeys)
+ {
+ PathKey *pathkey;
+ SortGroupClause *sgc;
+
+ if (i < n_preordered)
+ {
+ pathkey = (PathKey *) lfirst(cell);
+ sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref,
+ *group_clauses);
+ }
+ else
+ {
+ pathkey = keys[i - n_preordered].pathkey;
+ sgc = keys[i - n_preordered].sgc;
+ }
+
+ new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+ new_group_clauses = lappend(new_group_clauses, sgc);
+
+ i++;
+ }
+
+ pfree(keys);
+
+ /* Just append the rest GROUP BY clauses */
+ new_group_clauses = list_concat_unique_ptr(new_group_clauses, *group_clauses);
+
+ *group_pathkeys = new_group_pathkeys;
+ *group_clauses = new_group_clauses;
+}
+
/*
* get_cheapest_path_for_pathkeys
* Find the cheapest path (according to the specified criterion) that
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 1e7809edf2..257aa5889c 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6208,11 +6208,20 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
{
/* Sort the cheapest-total path if it isn't already sorted */
if (!is_sorted)
+ {
+ if (!parse->groupingSets)
+ get_cheapest_group_keys_order(root,
+ path->rows,
+ extra->targetList,
+ &group_pathkeys,
+ &group_clauses,
+ n_preordered_groups);
path = (Path *) create_sort_path(root,
grouped_rel,
path,
group_pathkeys,
-1.0);
+ }
/* Now decide what to stick atop it */
if (parse->groupingSets)
@@ -6286,6 +6295,12 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
{
if (path != partially_grouped_rel->cheapest_total_path)
continue;
+ get_cheapest_group_keys_order(root,
+ path->rows,
+ extra->targetList,
+ &group_pathkeys,
+ &group_clauses,
+ n_preordered_groups);
path = (Path *) create_sort_path(root,
grouped_rel,
path,
@@ -6560,11 +6575,19 @@ create_partial_grouping_paths(PlannerInfo *root,
{
/* Sort the cheapest partial path, if it isn't already */
if (!is_sorted)
+ {
+ get_cheapest_group_keys_order(root,
+ path->rows,
+ extra->targetList,
+ &group_pathkeys,
+ &group_clauses,
+ n_preordered_groups);
path = (Path *) create_sort_path(root,
partially_grouped_rel,
path,
group_pathkeys,
-1.0);
+ }
if (parse->hasAggs)
add_path(partially_grouped_rel, (Path *)
@@ -6611,11 +6634,19 @@ create_partial_grouping_paths(PlannerInfo *root,
/* Sort the cheapest partial path, if it isn't already */
if (!is_sorted)
+ {
+ get_cheapest_group_keys_order(root,
+ path->rows,
+ extra->targetList,
+ &group_pathkeys,
+ &group_clauses,
+ n_preordered_groups);
path = (Path *) create_sort_path(root,
partially_grouped_rel,
path,
group_pathkeys,
-1.0);
+ }
if (parse->hasAggs)
add_partial_path(partially_grouped_rel, (Path *)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index ee1444c427..4236807433 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1822,7 +1822,26 @@ static struct config_bool ConfigureNamesBool[] =
true,
NULL, NULL, NULL
},
-
+/*********************************************************/
+ {
+ {"debug_enable_group_by_match_order_by", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("enable matching GROUP BY by ORDER BY."),
+ NULL
+ },
+ &debug_group_by_match_order_by,
+ true,
+ NULL, NULL, NULL
+ },
+ {
+ {"debug_enable_cheapest_group_by", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("find a cheapest order of columns in GROUP BY."),
+ NULL
+ },
+ &debug_cheapest_group_by,
+ true,
+ NULL, NULL, NULL
+ },
+/********************************************************/
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 226b293622..60d8d6c3a6 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -193,6 +193,16 @@ extern bool pathkeys_contained_in(List *keys1, List *keys2);
extern int group_keys_reorder_by_pathkeys(List *pathkeys,
List **group_pathkeys,
List **group_clauses);
+/*********************************************************/
+extern bool debug_group_by_match_order_by;
+extern bool debug_cheapest_group_by;
+/********************************************************/
+extern void get_cheapest_group_keys_order(PlannerInfo *root,
+ double nrows,
+ List *target_list,
+ List **group_pathkeys,
+ List **group_clauses,
+ int n_preordered);
extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
Relids required_outer,
CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index e302dfbdce..31dcf70e47 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2071,19 +2071,145 @@ SELECT
i/2 AS p,
format('%60s', i%2) AS v,
i/4 AS c,
- i/8 AS d
+ i/8 AS d,
+ (random() * (10000/8))::int as e --the same as d but no correlation with p
INTO btg
FROM
generate_series(1, 10000) i;
-CREATE INDEX ON btg(p, v);
VACUUM btg;
ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
SET enable_hashagg=off;
SET max_parallel_workers= 0;
SET max_parallel_workers_per_gather = 0;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+ QUERY PLAN
+-----------------------------
+ GroupAggregate
+ Group Key: p, v
+ -> Sort
+ Sort Key: p, v
+ -> Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+ QUERY PLAN
+-----------------------------
+ GroupAggregate
+ Group Key: p, v
+ -> Sort
+ Sort Key: p, v
+ -> Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+ QUERY PLAN
+-----------------------------
+ GroupAggregate
+ Group Key: p, v, c
+ -> Sort
+ Sort Key: p, v, c
+ -> Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+ QUERY PLAN
+-----------------------------
+ GroupAggregate
+ Group Key: v, p, c
+ -> Sort
+ Sort Key: v, p, c
+ -> Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+ QUERY PLAN
+------------------------------
+ GroupAggregate
+ Group Key: p, v, d, c
+ -> Sort
+ Sort Key: p, v, d, c
+ -> Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+ QUERY PLAN
+------------------------------
+ GroupAggregate
+ Group Key: v, p, d, c
+ -> Sort
+ Sort Key: v, p, d, c
+ -> Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+ QUERY PLAN
+------------------------------
+ GroupAggregate
+ Group Key: p, v, d, c
+ -> Sort
+ Sort Key: p, v, d, c
+ -> Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+ QUERY PLAN
+-----------------------------
+ GroupAggregate
+ Group Key: p, d, e
+ -> Sort
+ Sort Key: p, d, e
+ -> Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+ QUERY PLAN
+-----------------------------
+ GroupAggregate
+ Group Key: p, e, d
+ -> Sort
+ Sort Key: p, e, d
+ -> Seq Scan on btg
+(5 rows)
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+ QUERY PLAN
+-----------------------------
+ GroupAggregate
+ Group Key: p, e, d
+ -> Sort
+ Sort Key: p, e, d
+ -> Seq Scan on btg
+(5 rows)
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+ QUERY PLAN
+-----------------------------
+ GroupAggregate
+ Group Key: p, e, d
+ -> Sort
+ Sort Key: p, e, d
+ -> Seq Scan on btg
+(5 rows)
+
+-- GROUP BY optimization by reorder columns by index scan
+CREATE INDEX ON btg(p, v);
SET enable_seqscan=off;
SET enable_bitmapscan=off;
--- GROUP BY optimization by reorder columns by index scan
+VACUUM btg;
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY p, v;
QUERY PLAN
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index b983f9c506..3915a837f0 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -1140,7 +1140,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, pl
QUERY PLAN
--------------------------------------------------------------------------------
GroupAggregate
- Group Key: t1.c, t2.c, t3.c
+ Group Key: t1.c, t3.c, t2.c
-> Sort
Sort Key: t1.c, t3.c
-> Append
@@ -1284,7 +1284,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, ph
QUERY PLAN
--------------------------------------------------------------------------------
GroupAggregate
- Group Key: t1.c, t2.c, t3.c
+ Group Key: t1.c, t3.c, t2.c
-> Sort
Sort Key: t1.c, t3.c
-> Append
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 054a381dad..a686a75fb0 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -244,9 +244,9 @@ EXPLAIN (COSTS off)
QUERY PLAN
-----------------------------------
GroupAggregate
- Group Key: a, b, c, d
+ Group Key: a, d, c, b
-> Sort
- Sort Key: a, b, c, d
+ Sort Key: a, d, c, b
-> Seq Scan on ndistinct
(5 rows)
@@ -255,9 +255,9 @@ EXPLAIN (COSTS off)
QUERY PLAN
-----------------------------------
GroupAggregate
- Group Key: b, c, d
+ Group Key: b, d, c
-> Sort
- Sort Key: b, c, d
+ Sort Key: b, d, c
-> Seq Scan on ndistinct
(5 rows)
@@ -281,9 +281,9 @@ EXPLAIN (COSTS off)
QUERY PLAN
-----------------------------------
GroupAggregate
- Group Key: a, b
+ Group Key: b, a
-> Sort
- Sort Key: a, b
+ Sort Key: b, a
-> Seq Scan on ndistinct
(5 rows)
@@ -292,9 +292,9 @@ EXPLAIN (COSTS off)
QUERY PLAN
-----------------------------------
GroupAggregate
- Group Key: a, b, c
+ Group Key: b, a, c
-> Sort
- Sort Key: a, b, c
+ Sort Key: b, a, c
-> Seq Scan on ndistinct
(5 rows)
@@ -303,9 +303,9 @@ EXPLAIN (COSTS off)
QUERY PLAN
-----------------------------------
GroupAggregate
- Group Key: a, b, c, d
+ Group Key: d, b, a, c
-> Sort
- Sort Key: a, b, c, d
+ Sort Key: d, b, a, c
-> Seq Scan on ndistinct
(5 rows)
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 7ef703f3a7..e4415c8d84 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -915,22 +915,65 @@ SELECT
i/2 AS p,
format('%60s', i%2) AS v,
i/4 AS c,
- i/8 AS d
+ i/8 AS d,
+ (random() * (10000/8))::int as e --the same as d but no correlation with p
INTO btg
FROM
generate_series(1, 10000) i;
-CREATE INDEX ON btg(p, v);
VACUUM btg;
ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+
SET enable_hashagg=off;
SET max_parallel_workers= 0;
SET max_parallel_workers_per_gather = 0;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, v;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+CREATE STATISTICS btg_dep ON d, e, p FROM btg;
+ANALYZE btg;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, d, e;
+
+EXPLAIN (COSTS off)
+SELECT count(*) FROM btg GROUP BY p, e, d;
+
+
+-- GROUP BY optimization by reorder columns by index scan
+
+CREATE INDEX ON btg(p, v);
SET enable_seqscan=off;
SET enable_bitmapscan=off;
+VACUUM btg;
--- GROUP BY optimization by reorder columns by index scan
EXPLAIN (COSTS off)
SELECT count(*) FROM btg GROUP BY p, v;