diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 4c6b1d1f55..9f4c011dac 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1959,8 +1959,8 @@ cost_incremental_sort(Path *path, double input_tuples, int width, Cost comparison_cost, int sort_mem, double limit_tuples) { - Cost startup_cost = 0, - run_cost = 0, + Cost startup_cost, + run_cost, input_run_cost = input_total_cost - input_startup_cost; double group_tuples, input_groups; @@ -1969,10 +1969,9 @@ cost_incremental_sort(Path *path, group_input_run_cost; List *presortedExprs = NIL; ListCell *l; - int i = 0; bool unknown_varno = false; - Assert(presorted_keys != 0); + Assert(presorted_keys > 0 && presorted_keys < list_length(pathkeys)); /* * We want to be sure the cost of a sort is never estimated as zero, even @@ -2025,8 +2024,7 @@ cost_incremental_sort(Path *path, /* expression not containing any Vars with "varno 0" */ presortedExprs = lappend(presortedExprs, member->em_expr); - i++; - if (i >= presorted_keys) + if (foreach_current_index(l) + 1 >= presorted_keys) break; } @@ -2040,21 +2038,18 @@ cost_incremental_sort(Path *path, /* * Estimate average cost of sorting of one group where presorted keys are - * equal. Incremental sort is sensitive to distribution of tuples to the - * groups, where we're relying on quite rough assumptions. Thus, we're - * pessimistic about incremental sort performance and increase its average - * group size by half. + * equal. */ cost_tuplesort(&group_startup_cost, &group_run_cost, - 1.5 * group_tuples, width, comparison_cost, sort_mem, + group_tuples, width, comparison_cost, sort_mem, limit_tuples); /* * Startup cost of incremental sort is the startup cost of its first group * plus the cost of its input. */ - startup_cost += group_startup_cost - + input_startup_cost + group_input_run_cost; + startup_cost = group_startup_cost + input_startup_cost + + group_input_run_cost; /* * After we started producing tuples from the first group, the cost of @@ -2062,17 +2057,20 @@ cost_incremental_sort(Path *path, * group, plus the total cost to process the remaining groups, plus the * remaining cost of input. */ - run_cost += group_run_cost - + (group_run_cost + group_startup_cost) * (input_groups - 1) - + group_input_run_cost * (input_groups - 1); + run_cost = group_run_cost + (group_run_cost + group_startup_cost) * + (input_groups - 1) + group_input_run_cost * (input_groups - 1); /* * Incremental sort adds some overhead by itself. Firstly, it has to * detect the sort groups. This is roughly equal to one extra copy and - * comparison per tuple. Secondly, it has to reset the tuplesort context - * for every group. + * comparison per tuple. */ run_cost += (cpu_tuple_cost + comparison_cost) * input_tuples; + + /* + * Additionally, we charge double cpu_tuple_cost for each input group to + * account for the tuplesort_reset that's performed after each group. + */ run_cost += 2.0 * cpu_tuple_cost * input_groups; path->rows = input_tuples; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 493a3af0fa..355abeaefb 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -6496,7 +6496,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, foreach(lc, input_rel->pathlist) { Path *path = (Path *) lfirst(lc); - Path *path_original = path; bool is_sorted; int presorted_keys; @@ -6504,90 +6503,28 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, path->pathkeys, &presorted_keys); - if (path == cheapest_path || is_sorted) + if (!is_sorted) { - /* Sort the cheapest-total path if it isn't already sorted */ - if (!is_sorted) + /* + * We've no need to consider both a sort and incremental sort. + * We'll just do a sort if there are no pre-sorted keys and + * an incremental sort when there are pre-sorted keys. + */ + if (presorted_keys == 0 || !enable_incremental_sort) path = (Path *) create_sort_path(root, grouped_rel, path, root->group_pathkeys, -1.0); - - /* Now decide what to stick atop it */ - if (parse->groupingSets) - { - consider_groupingsets_paths(root, grouped_rel, - path, true, can_hash, - gd, agg_costs, dNumGroups); - } - else if (parse->hasAggs) - { - /* - * We have aggregation, possibly with plain GROUP BY. Make - * an AggPath. - */ - add_path(grouped_rel, (Path *) - create_agg_path(root, - grouped_rel, - path, - grouped_rel->reltarget, - parse->groupClause ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_SIMPLE, - parse->groupClause, - havingQual, - agg_costs, - dNumGroups)); - } - else if (parse->groupClause) - { - /* - * We have GROUP BY without aggregation or grouping sets. - * Make a GroupPath. - */ - add_path(grouped_rel, (Path *) - create_group_path(root, - grouped_rel, - path, - parse->groupClause, - havingQual, - dNumGroups)); - } else - { - /* Other cases should have been handled above */ - Assert(false); - } + path = (Path *) create_incremental_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); } - /* - * Now we may consider incremental sort on this path, but only - * when the path is not already sorted and when incremental sort - * is enabled. - */ - if (is_sorted || !enable_incremental_sort) - continue; - - /* Restore the input path (we might have added Sort on top). */ - path = path_original; - - /* no shared prefix, no point in building incremental sort */ - if (presorted_keys == 0) - continue; - - /* - * We should have already excluded pathkeys of length 1 because - * then presorted_keys > 0 would imply is_sorted was true. - */ - Assert(list_length(root->group_pathkeys) != 1); - - path = (Path *) create_incremental_sort_path(root, - grouped_rel, - path, - root->group_pathkeys, - presorted_keys, - -1.0); - /* Now decide what to stick atop it */ if (parse->groupingSets) { @@ -6602,16 +6539,16 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, * AggPath. */ add_path(grouped_rel, (Path *) - create_agg_path(root, - grouped_rel, - path, - grouped_rel->reltarget, - parse->groupClause ? AGG_SORTED : AGG_PLAIN, - AGGSPLIT_SIMPLE, - parse->groupClause, - havingQual, - agg_costs, - dNumGroups)); + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_SIMPLE, + parse->groupClause, + havingQual, + agg_costs, + dNumGroups)); } else if (parse->groupClause) { @@ -6620,12 +6557,12 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, * a GroupPath. */ add_path(grouped_rel, (Path *) - create_group_path(root, - grouped_rel, - path, - parse->groupClause, - havingQual, - dNumGroups)); + create_group_path(root, + grouped_rel, + path, + parse->groupClause, + havingQual, + dNumGroups)); } else { diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out index 0a631124c2..1a1e8b2365 100644 --- a/src/test/regress/expected/incremental_sort.out +++ b/src/test/regress/expected/incremental_sort.out @@ -1,16 +1,3 @@ --- When we have to sort the entire table, incremental sort will --- be slower than plain sort, so it should not be used. -explain (costs off) -select * from (select * from tenk1 order by four) t order by four, ten; - QUERY PLAN ------------------------------------ - Sort - Sort Key: tenk1.four, tenk1.ten - -> Sort - Sort Key: tenk1.four - -> Seq Scan on tenk1 -(5 rows) - -- When there is a LIMIT clause, incremental sort is beneficial because -- it only has to sort some of the groups, and not the entire table. explain (costs off) diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql index 284a354dbb..071f8a5268 100644 --- a/src/test/regress/sql/incremental_sort.sql +++ b/src/test/regress/sql/incremental_sort.sql @@ -1,8 +1,3 @@ --- When we have to sort the entire table, incremental sort will --- be slower than plain sort, so it should not be used. -explain (costs off) -select * from (select * from tenk1 order by four) t order by four, ten; - -- When there is a LIMIT clause, incremental sort is beneficial because -- it only has to sort some of the groups, and not the entire table. explain (costs off)