From 45cfff5731b81c0df2af00f5e4212fc598f6a231 Mon Sep 17 00:00:00 2001
From: "Andrey V. Lepikhov" <a.lepikhov@postgrespro.ru>
Date: Tue, 9 Jan 2024 12:32:15 +0700
Subject: [PATCH 2/2] Explore alternative orderings of group-by pathkeys during
 optimization.

When evaluating a query with a multi-column GROUP BY clause, we can minimize
sort operations or avoid them if we synchronize the order of GROUP BY clauses
with the ORDER BY sort clause or sort order, which comes from the underlying
query tree. Grouping does not imply any ordering, so we can compare
the keys in arbitrary order, and a Hash Agg leverages this. But for Group Agg,
we simply compared keys in the order specified in the query. This commit
explores alternative ordering of the keys, trying to find a cheaper one.

The ordering of group keys may interact with other parts of the query, some of
which may not be known while planning the grouping. For example, there may be
an explicit ORDER BY clause or some other ordering-dependent operation higher up
in the query, and using the same ordering may allow using either incremental
sort or even eliminating the sort entirely.

The patch always keeps the ordering specified in the query, assuming the user
might have additional insights.

This introduces a new GUC enable_group_by_reordering so that the optimization
may be disabled if needed.
---
 src/backend/optimizer/path/equivclass.c       |  13 +-
 src/backend/optimizer/path/pathkeys.c         | 222 ++++++++++++++++++
 src/backend/optimizer/plan/planner.c          | 214 +++++++++++------
 src/backend/utils/misc/guc_tables.c           |  10 +
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 src/include/nodes/pathnodes.h                 |  10 +
 src/include/optimizer/paths.h                 |   3 +
 src/test/regress/expected/aggregates.out      | 132 +++++++++++
 src/test/regress/expected/sysviews.out        |   3 +-
 src/test/regress/sql/aggregates.sql           |  47 ++++
 10 files changed, 586 insertions(+), 69 deletions(-)

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index e86dfeaecd..7dd14d0a43 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -652,7 +652,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
 			if (opcintype == cur_em->em_datatype &&
 				equal(expr, cur_em->em_expr))
-				return cur_ec;	/* Match! */
+			{
+				/*
+				 * Match!
+				 *
+				 * Copy the sortref if it wasn't set yet. That may happen if
+				 * the ec was constructed from WHERE clause, i.e. it doesn't
+				 * have a target reference at all.
+				 */
+				if (cur_ec->ec_sortref == 0 && sortref > 0)
+					cur_ec->ec_sortref = sortref;
+				return cur_ec;
+			}
 		}
 	}
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index ca94a31f71..8bc4eb9f03 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -28,6 +28,8 @@
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
 
+/* Consider reordering of GROUP BY keys? */
+bool		enable_group_by_reordering = true;
 
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
 static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
@@ -350,6 +352,173 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }
 
+/*
+ * group_keys_reorder_by_pathkeys
+ *		Reorder GROUP BY keys to match pathkeys of input path.
+ *
+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists
+ * stay untouched.
+ *
+ * Returns the number of GROUP BY keys with a matching pathkey.
+ */
+static int
+group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
+							   List **group_clauses,
+							   int num_groupby_pathkeys)
+{
+	List	   *new_group_pathkeys = NIL,
+			   *new_group_clauses = NIL;
+	ListCell   *lc;
+	int			n;
+
+	if (pathkeys == NIL || *group_pathkeys == NIL)
+		return 0;
+
+	/*
+	 * Walk the pathkeys (determining ordering of the input path) and see if
+	 * there's a matching GROUP BY key. If we find one, we append it to the
+	 * list, and do the same for the clauses.
+	 *
+	 * Once we find the first pathkey without a matching GROUP BY key, the
+	 * rest of the pathkeys are useless and can't be used to evaluate the
+	 * grouping, so we abort the loop and ignore the remaining pathkeys.
+	 *
+	 * XXX Pathkeys are built in a way to allow simply comparing pointers.
+	 */
+	foreach(lc, pathkeys)
+	{
+		PathKey			   *pathkey = (PathKey *) lfirst(lc);
+		SortGroupClause	   *sgc;
+
+		if (foreach_current_index(lc) >= num_groupby_pathkeys ||
+			!list_member_ptr(*group_pathkeys, pathkey))
+			/* */
+			break;
+
+		/* abort on first mismatch */
+		sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
+											*group_clauses);
+		if (!sgc)
+			/* The grouping clause is not cover this pathkey */
+			break;
+
+		new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
+		new_group_clauses = lappend(new_group_clauses, sgc);
+	}
+
+	/* remember the number of pathkeys with a matching GROUP BY key */
+	n = list_length(new_group_pathkeys);
+
+	/* append the remaining group pathkeys (will be treated as not sorted) */
+	*group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
+											 *group_pathkeys);
+	*group_clauses = list_concat_unique_ptr(new_group_clauses,
+											*group_clauses);
+
+	return n;
+}
+
+/*
+ * get_useful_group_keys_orderings
+ *		Determine which orderings of GROUP BY keys are potentially interesting.
+ *
+ * Returns list of PathKeyInfo items, each representing an interesting ordering
+ * of GROUP BY keys. Each item stores pathkeys and clauses in matching order.
+ *
+ * The function considers (and keeps) multiple group by orderings:
+ *
+ * - the original ordering, as specified by the GROUP BY clause
+ *
+ * - GROUP BY keys reordered to match path ordering (as much as possible), with
+ *   the tail reordered to minimize the sort cost
+ *
+ * - GROUP BY keys to match target ORDER BY clause (as much as possible)
+ *
+ * There are other potentially interesting orderings. Just to memorize:
+ * 1. It might be best to match the first ORDER BY key, order the remaining keys
+ * differently and then rely on the incremental sort to fix this).
+ * 2. Place integer fixed-length columns in the front of text/bytea columns
+ * 3. Rearrange columns according to ndistinct estimations.
+ */
+List *
+get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+								List *path_pathkeys)
+{
+	Query	   *parse = root->parse;
+	List	   *infos = NIL;
+	PathKeyInfo *info;
+
+	List	   *pathkeys = root->group_pathkeys;
+	List	   *clauses = root->processed_groupClause;
+
+	/* always return at least the original pathkeys/clauses */
+	info = makeNode(PathKeyInfo);
+	info->pathkeys = pathkeys;
+	info->clauses = clauses;
+	infos = lappend(infos, info);
+
+	/*
+	 * Should we try generating alternative orderings of the group keys? If
+	 * not, we produce only the order specified in the query, i.e. the
+	 * optimization is effectively disabled.
+	 */
+	if (!enable_group_by_reordering)
+		return infos;
+
+	/* for grouping sets we can't do any reordering */
+	if (parse->groupingSets)
+		return infos;
+
+	/*
+	 * If the path is sorted in some way, try reordering the group keys to
+	 * match as much of the ordering as possible - we get this sort for free
+	 * (mostly).
+	 *
+	 * We must not do this when there are no grouping sets, because those use
+	 * more complex logic to decide the ordering.
+	 *
+	 * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's
+	 * more a complement, because it allows benefiting from incremental sort
+	 * as much as possible.
+	 */
+	if (path_pathkeys)
+	{
+		group_keys_reorder_by_pathkeys(path_pathkeys, &pathkeys, &clauses,
+									   root->num_groupby_pathkeys);
+
+		/*
+		 * reorder the tail to minimize sort cost
+		 *
+		 * XXX Ignore the return value - there may be nothing to reorder, But we
+		 * still want to keep the keys reordered to path_pathkeys.
+		 */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	/*
+	 * Try reordering pathkeys to minimize the sort cost (this time consider
+	 * the ORDER BY clause).
+	 */
+	if (root->sort_pathkeys)
+	{
+		group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys, &clauses,
+									   root->num_groupby_pathkeys);
+
+		/* keep the group keys reordered to match ordering of input path */
+		info = makeNode(PathKeyInfo);
+		info->pathkeys = pathkeys;
+		info->clauses = clauses;
+
+		infos = lappend(infos, info);
+	}
+
+	return infos;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -1939,6 +2108,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 	return n_common_pathkeys;
 }
 
+/*
+ * pathkeys_useful_for_grouping
+ *		Count the number of pathkeys that are useful for grouping (instead of
+ *		explicit sort)
+ *
+ * Group pathkeys could be reordered to benefit from the ordering. The
+ * ordering may not be "complete" and may require incremental sort, but that's
+ * fine. So we simply count prefix pathkeys with a matching group key, and
+ * stop once we find the first pathkey without a match.
+ *
+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
+ * pathkeys are useful for grouping, and we might do incremental sort to get
+ * path ordered by (a,b,e).
+ *
+ * This logic is necessary to retain paths with ordering not matching grouping
+ * keys directly, without the reordering.
+ *
+ * Returns the length of pathkey prefix with matching group keys.
+ */
+static int
+pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
+{
+	ListCell   *key;
+	int			n = 0;
+
+	/* no special ordering requested for grouping */
+	if (root->group_pathkeys == NIL)
+		return 0;
+
+	/* unordered path */
+	if (pathkeys == NIL)
+		return 0;
+
+	/* walk the pathkeys and search for matching group key */
+	foreach(key, pathkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(key);
+
+		/* no matching group key, we're done */
+		if (!list_member_ptr(root->group_pathkeys, pathkey))
+			break;
+
+		n++;
+	}
+
+	return n;
+}
+
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
@@ -1953,6 +2170,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	if (nuseful2 > nuseful)
+		nuseful = nuseful2;
+	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
@@ -1988,6 +2208,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
+	if (root->group_pathkeys != NIL)
+		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
 		return true;			/* might be able to use them for ordering */
 	return false;				/* definitely useless */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index fcf647940e..50453c2470 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -2805,8 +2805,9 @@ remove_useless_groupby_columns(PlannerInfo *root)
  *
  * In principle it might be interesting to consider other orderings of the
  * GROUP BY elements, which could match the sort ordering of other
- * possible plans (eg an indexscan) and thereby reduce cost.  We don't
- * bother with that, though.  Hashed grouping will frequently win anyway.
+ * possible plans (eg an indexscan) and thereby reduce cost.  However, we
+ * don't yet have sufficient information to do that here, so that's left until
+ * later in planning.  See get_useful_group_keys_orderings().
  *
  * Note: we need no comparable processing of the distinctClause because
  * the parser already enforced that that matches ORDER BY.
@@ -6811,12 +6812,12 @@ done:
 
 static Path *
 try_presorting(PlannerInfo *root, RelOptInfo *grouped_rel,
-					Path *path, Path *cheapest_path)
+					Path *path, Path *cheapest_path, PathKeyInfo *info)
 {
 	bool		is_sorted;
 	int			presorted_keys;
 
-	is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
+	is_sorted = pathkeys_count_contained_in(info->pathkeys,
 											path->pathkeys,
 											&presorted_keys);
 
@@ -6842,13 +6843,13 @@ try_presorting(PlannerInfo *root, RelOptInfo *grouped_rel,
 			path = (Path *) create_sort_path(root,
 											 grouped_rel,
 											 path,
-											 root->group_pathkeys,
+											 info->pathkeys,
 											 -1.0);
 		else
 			path = (Path *) create_incremental_sort_path(root,
 														 grouped_rel,
 														 path,
-														 root->group_pathkeys,
+														 info->pathkeys,
 														 presorted_keys,
 														 -1.0);
 	}
@@ -6885,56 +6886,74 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = try_presorting(root, grouped_rel, path, cheapest_path);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
 
-			/* Now decide what to stick atop it */
-			if (parse->groupingSets)
+			foreach(lc2, pathkey_orderings)
 			{
-				consider_groupingsets_paths(root, grouped_rel,
-											path, true, can_hash,
-											gd, agg_costs, dNumGroups);
-			}
-			else if (parse->hasAggs)
-			{
-				/*
-				 * We have aggregation, possibly with plain GROUP BY. Make an
-				 * AggPath.
-				 */
-				add_path(grouped_rel, (Path *)
+				PathKeyInfo	   *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
+
+				path = try_presorting(root, grouped_rel, path, cheapest_path, info);
+
+				if (path == NULL)
+				continue;
+
+				/* Now decide what to stick atop it */
+				if (parse->groupingSets)
+				{
+					consider_groupingsets_paths(root, grouped_rel,
+												path, true, can_hash,
+												gd, agg_costs, dNumGroups);
+				}
+				else if (parse->hasAggs)
+				{
+					/*
+					 * We have aggregation, possibly with plain GROUP BY. Make an
+					 * AggPath.
+					 */
+					add_path(grouped_rel, (Path *)
 						 create_agg_path(root,
 										 grouped_rel,
 										 path,
 										 grouped_rel->reltarget,
 										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 										 AGGSPLIT_SIMPLE,
-										 root->processed_groupClause,
+										 info->clauses,
 										 havingQual,
 										 agg_costs,
 										 dNumGroups));
-			}
-			else if (parse->groupClause)
-			{
-				/*
-				 * We have GROUP BY without aggregation or grouping sets. Make
-				 * a GroupPath.
-				 */
-				add_path(grouped_rel, (Path *)
+				}
+				else if (parse->groupClause)
+				{
+					/*
+					 * We have GROUP BY without aggregation or grouping sets. Make
+					 * a GroupPath.
+					 */
+					add_path(grouped_rel, (Path *)
 						 create_group_path(root,
 										   grouped_rel,
 										   path,
-										   root->processed_groupClause,
+										   info->clauses,
 										   havingQual,
 										   dNumGroups));
-			}
-			else
-			{
-				/* Other cases should have been handled above */
-				Assert(false);
+				}
+				else
+				{
+					/* Other cases should have been handled above */
+					Assert(false);
+				}
 			}
 		}
 
@@ -6946,35 +6965,55 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			foreach(lc, partially_grouped_rel->pathlist)
 			{
+				ListCell   *lc2;
 				Path	   *path = (Path *) lfirst(lc);
+				Path	   *path_save = path;
+				List	   *pathkey_orderings = NIL;
 
-				path = try_presorting(root, grouped_rel, path,
-									  partially_grouped_rel->cheapest_total_path);
+				/* generate alternative group orderings that might be useful */
+				pathkey_orderings = get_useful_group_keys_orderings(root,
+																	path->rows,
+																	path->pathkeys);
 
-				if (path == NULL)
-					continue;
+				Assert(list_length(pathkey_orderings) > 0);
 
-				if (parse->hasAggs)
-					add_path(grouped_rel, (Path *)
+				/* process all potentially interesting grouping reorderings */
+				foreach(lc2, pathkey_orderings)
+				{
+					PathKeyInfo    *info = (PathKeyInfo *) lfirst(lc2);
+
+					/* restore the path (we replace it in the loop) */
+					path = path_save;
+
+					path = try_presorting(root, grouped_rel, path,
+										  partially_grouped_rel->cheapest_total_path,
+										  info);
+
+					if (path == NULL)
+						continue;
+
+					if (parse->hasAggs)
+						add_path(grouped_rel, (Path *)
 							 create_agg_path(root,
 											 grouped_rel,
 											 path,
 											 grouped_rel->reltarget,
 											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_FINAL_DESERIAL,
-											 root->processed_groupClause,
+											 info->clauses,
 											 havingQual,
 											 agg_final_costs,
 											 dNumGroups));
-				else
-					add_path(grouped_rel, (Path *)
+					else
+						add_path(grouped_rel, (Path *)
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   root->processed_groupClause,
+											   info->clauses,
 											   havingQual,
 											   dNumGroups));
 
+				}
 			}
 		}
 	}
@@ -7177,34 +7216,53 @@ create_partial_grouping_paths(PlannerInfo *root,
 		 */
 		foreach(lc, input_rel->pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = try_presorting(root, partially_grouped_rel, path,
-								  cheapest_total_path);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
+			{
+				PathKeyInfo	   *info = (PathKeyInfo *) lfirst(lc2);
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
-			if (parse->hasAggs)
-				add_path(partially_grouped_rel, (Path *)
+				path = try_presorting(root, partially_grouped_rel, path,
+									  cheapest_total_path, info);
+
+				if (path == NULL)
+					continue;
+
+				if (parse->hasAggs)
+					add_path(partially_grouped_rel, (Path *)
 						 create_agg_path(root,
 										 partially_grouped_rel,
 										 path,
 										 partially_grouped_rel->reltarget,
 										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 										 AGGSPLIT_INITIAL_SERIAL,
-										 root->processed_groupClause,
+										 info->clauses,
 										 NIL,
 										 agg_partial_costs,
 										 dNumPartialGroups));
-			else
-				add_path(partially_grouped_rel, (Path *)
+				else
+					add_path(partially_grouped_rel, (Path *)
 						 create_group_path(root,
 										   partially_grouped_rel,
 										   path,
-										   root->processed_groupClause,
+										   info->clauses,
 										   NIL,
 										   dNumPartialGroups));
+			}
 		}
 	}
 
@@ -7213,34 +7271,54 @@ create_partial_grouping_paths(PlannerInfo *root,
 		/* Similar to above logic, but for partial paths. */
 		foreach(lc, input_rel->partial_pathlist)
 		{
+			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
+			Path	   *path_save = path;
+			List	   *pathkey_orderings = NIL;
 
-			path = try_presorting(root, partially_grouped_rel, path,
-								  cheapest_partial_path);
+			/* generate alternative group orderings that might be useful */
+			pathkey_orderings = get_useful_group_keys_orderings(root,
+																path->rows,
+																path->pathkeys);
 
-			if (path == NULL)
-				continue;
+			Assert(list_length(pathkey_orderings) > 0);
+
+			/* process all potentially interesting grouping reorderings */
+			foreach(lc2, pathkey_orderings)
+			{
+				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
+
+
+				/* restore the path (we replace it in the loop) */
+				path = path_save;
 
-			if (parse->hasAggs)
-				add_partial_path(partially_grouped_rel, (Path *)
+				path = try_presorting(root, partially_grouped_rel, path,
+									  cheapest_partial_path, info);
+
+				if (path == NULL)
+					continue;
+
+				if (parse->hasAggs)
+					add_partial_path(partially_grouped_rel, (Path *)
 								 create_agg_path(root,
 												 partially_grouped_rel,
 												 path,
 												 partially_grouped_rel->reltarget,
 												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 												 AGGSPLIT_INITIAL_SERIAL,
-												 root->processed_groupClause,
+												 info->clauses,
 												 NIL,
 												 agg_partial_costs,
 												 dNumPartialPartialGroups));
-			else
-				add_partial_path(partially_grouped_rel, (Path *)
+				else
+					add_partial_path(partially_grouped_rel, (Path *)
 								 create_group_path(root,
 												   partially_grouped_rel,
 												   path,
-												   root->processed_groupClause,
+												   info->clauses,
 												   NIL,
 												   dNumPartialPartialGroups));
+			}
 		}
 	}
 
@@ -7354,6 +7432,8 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
 	 * We can also skip the entire loop when we only have a single-item
 	 * group_pathkeys because then we can't possibly have a presorted prefix
 	 * of the list without having the list be fully sorted.
+	 *
+	 * XXX Shouldn't this also consider the group-key-reordering?
 	 */
 	if (!enable_incremental_sort || list_length(root->group_pathkeys) == 1)
 		return;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index e53ebc6dc2..7fe58518d7 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1050,6 +1050,16 @@ struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables reordering of GROUP BY keys."),
+			NULL,
+			GUC_EXPLAIN
+		},
+		&enable_group_by_reordering,
+		true,
+		NULL, NULL, NULL
+	},
 	{
 		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
 			gettext_noop("Enables genetic query optimization."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index b2809c711a..dc82fceb05 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -399,6 +399,7 @@
 #enable_seqscan = on
 #enable_sort = on
 #enable_tidscan = on
+#enable_group_by_reordering = on
 
 # - Planner Cost Constants -
 
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index b9713ec9aa..0a251984c1 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1456,6 +1456,16 @@ typedef struct PathKey
 	bool		pk_nulls_first; /* do NULLs come before normal values? */
 } PathKey;
 
+/*
+ * Combines information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+	NodeTag		type;
+	List	   *pathkeys;
+	List	   *clauses;
+} PathKeyInfo;
+
 /*
  * VolatileFunctionStatus -- allows nodes to cache their
  * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index efd4abc28f..e80156c49f 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -24,6 +24,7 @@ extern PGDLLIMPORT bool enable_geqo;
 extern PGDLLIMPORT int geqo_threshold;
 extern PGDLLIMPORT int min_parallel_table_scan_size;
 extern PGDLLIMPORT int min_parallel_index_scan_size;
+extern PGDLLIMPORT bool enable_group_by_reordering;
 
 /* Hook for plugins to get control in set_rel_pathlist() */
 typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
@@ -204,6 +205,8 @@ typedef enum
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
+											 List *path_pathkeys);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 											Relids required_outer,
 											CostSelector cost_criterion,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f635c5a1af..423c8ec3b6 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2728,6 +2728,138 @@ SELECT balk(hundred) FROM tenk1;
 (1 row)
 
 ROLLBACK;
+-- GROUP BY optimization by reorder columns
+CREATE TABLE btg AS SELECT
+  i % 100 AS x,
+  i % 100 AS y,
+  'abc' || i % 10 AS z,
+  i AS w
+FROM generate_series(1,10000) AS i;
+CREATE INDEX abc ON btg(x,y);
+ANALYZE btg;
+-- GROUP BY optimization by reorder columns by frequency
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+-- Utilize index scan ordering to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
+               QUERY PLAN               
+----------------------------------------
+ GroupAggregate
+   Group Key: x, y
+   ->  Index Only Scan using abc on btg
+(3 rows)
+
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+               QUERY PLAN               
+----------------------------------------
+ GroupAggregate
+   Group Key: x, y
+   ->  Index Only Scan using abc on btg
+(3 rows)
+
+-- Engage incremental sort
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, z, w
+   ->  Incremental Sort
+         Sort Key: x, y, z, w
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, z, w
+   ->  Incremental Sort
+         Sort Key: x, y, z, w
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, w, z
+   ->  Incremental Sort
+         Sort Key: x, y, w, z
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
+               QUERY PLAN                
+-----------------------------------------
+ Group
+   Group Key: x, y, w, z
+   ->  Incremental Sort
+         Sort Key: x, y, w, z
+         Presorted Key: x, y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+-- Subqueries
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
+GROUP BY (w,x,z,y);
+                  QUERY PLAN                  
+----------------------------------------------
+ Group
+   Group Key: btg.x, btg.y, btg.w, btg.z
+   ->  Incremental Sort
+         Sort Key: btg.x, btg.y, btg.w, btg.z
+         Presorted Key: btg.x, btg.y
+         ->  Index Scan using abc on btg
+(6 rows)
+
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
+GROUP BY (w,x,z,y);
+                     QUERY PLAN                     
+----------------------------------------------------
+ Group
+   Group Key: btg.x, btg.y, btg.w, btg.z
+   ->  Limit
+         ->  Incremental Sort
+               Sort Key: btg.x, btg.y, btg.w, btg.z
+               Presorted Key: btg.x, btg.y
+               ->  Index Scan using abc on btg
+(7 rows)
+
+-- Should work with and without GROUP-BY optimization
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
+          QUERY PLAN          
+------------------------------
+ Group
+   Group Key: y, x, z, w
+   ->  Sort
+         Sort Key: y, x, z, w
+         ->  Seq Scan on btg
+(5 rows)
+
+-- Utilize incremental sort to make the ORDER BY rule a bit cheaper
+explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Sort
+   Sort Key: ((x * x)), z
+   ->  Group
+         Group Key: x, y, w, z
+         ->  Incremental Sort
+               Sort Key: x, y, w, z
+               Presorted Key: x, y
+               ->  Index Scan using abc on btg
+(8 rows)
+
+DROP TABLE btg;
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 271313ebf8..9be7aca2b8 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -114,6 +114,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_async_append            | on
  enable_bitmapscan              | on
  enable_gathermerge             | on
+ enable_group_by_reordering     | on
  enable_hashagg                 | on
  enable_hashjoin                | on
  enable_incremental_sort        | on
@@ -133,7 +134,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_seqscan                 | on
  enable_sort                    | on
  enable_tidscan                 | on
-(22 rows)
+(23 rows)
 
 -- There are always wait event descriptions for various types.
 select type, count(*) > 0 as ok FROM pg_wait_events
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index cc8f0efad5..b9fcceedd7 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1181,6 +1181,53 @@ SELECT balk(hundred) FROM tenk1;
 
 ROLLBACK;
 
+-- GROUP BY optimization by reorder columns
+
+CREATE TABLE btg AS SELECT
+  i % 100 AS x,
+  i % 100 AS y,
+  'abc' || i % 10 AS z,
+  i AS w
+FROM generate_series(1,10000) AS i;
+CREATE INDEX abc ON btg(x,y);
+ANALYZE btg;
+
+-- GROUP BY optimization by reorder columns by frequency
+
+SET enable_hashagg=off;
+SET max_parallel_workers= 0;
+SET max_parallel_workers_per_gather = 0;
+
+-- Utilize index scan ordering to avoid a Sort operation
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY x,y;
+EXPLAIN (COSTS OFF) SELECT count(*) FROM btg GROUP BY y,x;
+
+-- Engage incremental sort
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY x,y,z,w;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY z,y,w,x;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,z,x,y;
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y;
+
+-- Subqueries
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z) AS q1
+GROUP BY (w,x,z,y);
+explain (COSTS OFF) SELECT x,y
+FROM (SELECT * FROM btg ORDER BY x,y,w,z LIMIT 100) AS q1
+GROUP BY (w,x,z,y);
+
+-- Should work with and without GROUP-BY optimization
+explain (COSTS OFF) SELECT x,y FROM btg GROUP BY w,x,z,y ORDER BY y,x,z,w;
+
+-- Utilize incremental sort to make the ORDER BY rule a bit cheaper
+explain (COSTS OFF) SELECT x,w FROM btg GROUP BY w,x,y,z ORDER BY x*x,z;
+
+DROP TABLE btg;
+
+RESET enable_hashagg;
+RESET max_parallel_workers;
+RESET max_parallel_workers_per_gather;
+
 -- Secondly test the case of a parallel aggregate combiner function
 -- returning NULL. For that use normal transition function, but a
 -- combiner function returning NULL.
-- 
2.43.0