diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index f4d4a4df66..7a5a3bb73b 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -142,6 +142,7 @@ bool enable_partitionwise_aggregate = false; bool enable_parallel_append = true; bool enable_parallel_hash = true; bool enable_partition_pruning = true; +bool enable_groupby_reorder = true; typedef struct { diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index ce9bf87e9b..9d0961dee2 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -22,6 +22,7 @@ #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "nodes/plannodes.h" +#include "optimizer/cost.h" #include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" @@ -388,6 +389,91 @@ pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common) return (key1 == NULL); } +/* + * pathkeys_maybe_reorder + * Reorder pathkeys in keys1 so that the order matches keys2 as much as + * possible, i.e. to maximize the common prefix length. + */ +List * +pathkeys_maybe_reorder(List *keys1, List *keys2) +{ + ListCell *key1, + *key2; + List *keys = NIL; + + int idx = 0; + Bitmapset *matched = NULL; + + if (!enable_groupby_reorder) + return keys1; + + /* + * See if we can avoiding looping through both lists. This optimization + * gains us several percent in planning time in a worst-case test. + */ + if (keys1 == keys2) + { + return keys1; + } + else if (keys1 == NIL) + { + return keys1; + } + else if (keys2 == NIL) + { + return keys1; + } + + /* + * First add all keys from keys2 with a match in keys1. We only care + * about prefix, so we stop at the first key without a keys1 match. + */ + foreach(key2, keys2) + { + bool found = false; + PathKey *pathkey2 = (PathKey *) lfirst(key2); + + idx = 0; + foreach(key1, keys1) + { + PathKey *pathkey1 = (PathKey *) lfirst(key1); + + if (pathkey1 == pathkey2) + { + found = true; + keys = lappend(keys, pathkey1); + matched = bms_add_member(matched, idx); + } + + idx++; + } + + /* No match in keys1, so we're done. */ + if (!found) + break; + } + + /* add all remaining unmatched keys from keys1 */ + idx = 0; + foreach(key1, keys1) + { + PathKey *pathkey1 = (PathKey *) lfirst(key1); + + if (!bms_is_member(idx, matched)) + keys = lappend(keys, pathkey1); + + idx++; + } + + /* + * Simple cross-check that we have all the keys. It might be a good + * idea to actually check that all keys from keys1 are included. + */ + Assert(list_length(keys1) == list_length(keys)); + + return keys; +} + /* * get_cheapest_path_for_pathkeys * Find the cheapest path (according to the specified criterion) that @@ -1862,6 +1948,41 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys) return n_common_pathkeys; } +/* + * pathkeys_useful_for_grouping + * Count the number of pathkeys that are useful for meeting the + * query's requested output ordering. + * + * Because we the have the possibility of incremental sort, a prefix list of + * keys is potentially useful for improving the performance of the requested + * ordering. Thus we return 0, if no valuable keys are found, or the number + * of leading keys shared by the list and the requested ordering.. + */ +static int +pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys) +{ + int n_common_pathkeys; + List *group_pathkeys; + + if (root->group_pathkeys == NIL) + return 0; /* no special ordering requested */ + + if (pathkeys == NIL) + return 0; /* unordered path */ + + /* + * FIXME Building the reordered list is unnecessary - we can simply + * count how many keys at the start of pathkeys are also found + * anywhere in root->group_pathkeys. + */ + group_pathkeys = pathkeys_maybe_reorder(root->group_pathkeys, pathkeys); + + (void) pathkeys_count_contained_in(group_pathkeys, pathkeys, + &n_common_pathkeys); + + return n_common_pathkeys; +} + /* * truncate_useless_pathkeys * Shorten the given pathkey list to just the useful pathkeys. @@ -1879,6 +2000,10 @@ truncate_useless_pathkeys(PlannerInfo *root, if (nuseful2 > nuseful) nuseful = nuseful2; + nuseful2 = pathkeys_useful_for_grouping(root, pathkeys); + if (nuseful2 > nuseful) + nuseful = nuseful2; + /* * Note: not safe to modify input list destructively, but we can avoid * copying the list if we're not actually going to change it diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 357850624c..3265ec400a 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -6507,8 +6507,12 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, Path *path_original = path; bool is_sorted; int presorted_keys; + List *group_pathkeys; - is_sorted = pathkeys_count_contained_in(root->group_pathkeys, + group_pathkeys = pathkeys_maybe_reorder(root->group_pathkeys, + path->pathkeys); + + is_sorted = pathkeys_count_contained_in(group_pathkeys, path->pathkeys, &presorted_keys); @@ -6519,7 +6523,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, path = (Path *) create_sort_path(root, grouped_rel, path, - root->group_pathkeys, + group_pathkeys, -1.0); /* Now decide what to stick atop it */ @@ -6592,7 +6596,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, path = (Path *) create_incremental_sort_path(root, grouped_rel, path, - root->group_pathkeys, + group_pathkeys, presorted_keys, -1.0); @@ -6654,8 +6658,12 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, Path *path_original = path; bool is_sorted; int presorted_keys; + List *group_pathkeys; + + group_pathkeys = pathkeys_maybe_reorder(root->group_pathkeys, + path->pathkeys); - is_sorted = pathkeys_count_contained_in(root->group_pathkeys, + is_sorted = pathkeys_count_contained_in(group_pathkeys, path->pathkeys, &presorted_keys); @@ -6670,7 +6678,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, path = (Path *) create_sort_path(root, grouped_rel, path, - root->group_pathkeys, + group_pathkeys, -1.0); } @@ -6720,7 +6728,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, path = (Path *) create_incremental_sort_path(root, grouped_rel, path, - root->group_pathkeys, + group_pathkeys, presorted_keys, -1.0); @@ -6980,6 +6988,10 @@ create_partial_grouping_paths(PlannerInfo *root, { Path *path = (Path *) lfirst(lc); bool is_sorted; + List *group_pathkeys = NIL; + + group_pathkeys = pathkeys_maybe_reorder(root->group_pathkeys, + path->pathkeys); is_sorted = pathkeys_contained_in(root->group_pathkeys, path->pathkeys); @@ -6990,7 +7002,7 @@ create_partial_grouping_paths(PlannerInfo *root, path = (Path *) create_sort_path(root, partially_grouped_rel, path, - root->group_pathkeys, + group_pathkeys, -1.0); if (parse->hasAggs) @@ -7030,8 +7042,12 @@ create_partial_grouping_paths(PlannerInfo *root, Path *path = (Path *) lfirst(lc); bool is_sorted; int presorted_keys; + List *group_pathkeys; + + group_pathkeys = pathkeys_maybe_reorder(root->group_pathkeys, + path->pathkeys); - is_sorted = pathkeys_count_contained_in(root->group_pathkeys, + is_sorted = pathkeys_count_contained_in(group_pathkeys, path->pathkeys, &presorted_keys); @@ -7046,7 +7062,7 @@ create_partial_grouping_paths(PlannerInfo *root, path = (Path *) create_incremental_sort_path(root, partially_grouped_rel, path, - root->group_pathkeys, + group_pathkeys, presorted_keys, -1.0); @@ -7084,8 +7100,13 @@ create_partial_grouping_paths(PlannerInfo *root, Path *path_original = path; bool is_sorted; int presorted_keys; + List *group_pathkeys; + + group_pathkeys = pathkeys_maybe_reorder(root->group_pathkeys, + path->pathkeys); - is_sorted = pathkeys_count_contained_in(root->group_pathkeys, + + is_sorted = pathkeys_count_contained_in(group_pathkeys, path->pathkeys, &presorted_keys); @@ -7096,7 +7117,7 @@ create_partial_grouping_paths(PlannerInfo *root, path = (Path *) create_sort_path(root, partially_grouped_rel, path, - root->group_pathkeys, + group_pathkeys, -1.0); if (parse->hasAggs) @@ -7145,7 +7166,7 @@ create_partial_grouping_paths(PlannerInfo *root, path = (Path *) create_incremental_sort_path(root, partially_grouped_rel, path, - root->group_pathkeys, + group_pathkeys, presorted_keys, -1.0); diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 2f3e0a70e0..50abf2344f 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -2060,6 +2060,15 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"enable_groupby_reorder", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enables reodering of expressions in the GROUP BY clause."), + }, + &enable_groupby_reorder, + true, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 9710e5c0a4..ef58998409 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -67,6 +67,7 @@ extern PGDLLIMPORT bool enable_partitionwise_aggregate; extern PGDLLIMPORT bool enable_parallel_append; extern PGDLLIMPORT bool enable_parallel_hash; extern PGDLLIMPORT bool enable_partition_pruning; +extern PGDLLIMPORT bool enable_groupby_reorder; extern PGDLLIMPORT int constraint_exclusion; extern double index_pages_fetched(double tuples_fetched, BlockNumber pages, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 10b6e81079..65200a106f 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -189,6 +189,7 @@ typedef enum extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2); extern bool pathkeys_contained_in(List *keys1, List *keys2); extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common); +extern List *pathkeys_maybe_reorder(List *keys1, List *keys2); extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion,