diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 000d757bdd..c908944071 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4654,22 +4654,63 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, cheapest_partial_path->rows, NULL, NULL); - /* first try adding unique paths atop of sorted paths */ + /* + * First try sorting the cheapest path and incrementally sorting any paths + * with presorted keys and put a unique paths atop of those. + */ if (grouping_is_sortable(parse->distinctClause)) { foreach(lc, input_rel->partial_pathlist) { - Path *path = (Path *) lfirst(lc); + Path *input_path = (Path *) lfirst(lc); + Path *sorted_path; + bool is_sorted; + int presorted_keys; - if (pathkeys_contained_in(root->distinct_pathkeys, path->pathkeys)) + is_sorted = pathkeys_count_contained_in(root->distinct_pathkeys, + input_path->pathkeys, + &presorted_keys); + + if (is_sorted) + sorted_path = input_path; + else { - add_partial_path(partial_distinct_rel, (Path *) - create_upper_unique_path(root, - partial_distinct_rel, - path, - list_length(root->distinct_pathkeys), - numDistinctRows)); + /* + * Try at least sorting the cheapest path and also try + * incrementally sorting any path which is partially sorted + * already (no need to deal with paths which have presorted keys + * when incremental sort is disabled unless it's the cheapest + * partial path). + */ + if (input_path != cheapest_partial_path && + (presorted_keys == 0 || !enable_incremental_sort)) + continue; + + /* + * We've no need to consider both a sort and incremental sort. + * We'll just do a sort if there are no presorted keys and an + * incremental sort when there are presorted keys. + */ + if (presorted_keys == 0 || !enable_incremental_sort) + sorted_path = (Path *) create_sort_path(root, + partial_distinct_rel, + input_path, + root->distinct_pathkeys, + -1.0); + else + sorted_path = (Path *) create_incremental_sort_path(root, + partial_distinct_rel, + input_path, + root->distinct_pathkeys, + presorted_keys, + -1.0); } + + add_partial_path(partial_distinct_rel, (Path *) + create_upper_unique_path(root, partial_distinct_rel, + sorted_path, + list_length(root->distinct_pathkeys), + numDistinctRows)); } } @@ -4785,8 +4826,8 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, * the other.) */ List *needed_pathkeys; - Path *path; ListCell *lc; + double limittuples = root->distinct_pathkeys == NIL ? 1.0 : -1.0; if (parse->hasDistinctOn && list_length(root->distinct_pathkeys) < @@ -4797,96 +4838,89 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, foreach(lc, input_rel->pathlist) { - path = (Path *) lfirst(lc); + Path *input_path = (Path *) lfirst(lc); + Path *sorted_path; + bool is_sorted; + int presorted_keys; - if (pathkeys_contained_in(needed_pathkeys, path->pathkeys)) + is_sorted = pathkeys_count_contained_in(needed_pathkeys, + input_path->pathkeys, + &presorted_keys); + + if (is_sorted) + sorted_path = input_path; + else { /* - * distinct_pathkeys may have become empty if all of the - * pathkeys were determined to be redundant. If all of the - * pathkeys are redundant then each DISTINCT target must only - * allow a single value, therefore all resulting tuples must - * be identical (or at least indistinguishable by an equality - * check). We can uniquify these tuples simply by just taking - * the first tuple. All we do here is add a path to do "LIMIT - * 1" atop of 'path'. When doing a DISTINCT ON we may still - * have a non-NIL sort_pathkeys list, so we must still only do - * this with paths which are correctly sorted by - * sort_pathkeys. + * Try at least sorting the cheapest path and also try + * incrementally sorting any path which is partially sorted + * already (no need to deal with paths which have presorted keys + * when incremental sort is disabled unless it's the cheapest + * input path). */ - if (root->distinct_pathkeys == NIL) - { - Node *limitCount; - - limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid, - sizeof(int64), - Int64GetDatum(1), false, - FLOAT8PASSBYVAL); + if (input_path != cheapest_input_path && + (presorted_keys == 0 || !enable_incremental_sort)) + continue; - /* - * If the query already has a LIMIT clause, then we could - * end up with a duplicate LimitPath in the final plan. - * That does not seem worth troubling over too much. - */ - add_path(distinct_rel, (Path *) - create_limit_path(root, distinct_rel, path, NULL, - limitCount, LIMIT_OPTION_COUNT, - 0, 1)); - } + /* + * We've no need to consider both a sort and incremental sort. + * We'll just do a sort if there are no presorted keys and an + * incremental sort when there are presorted keys. + */ + if (presorted_keys == 0 || !enable_incremental_sort) + sorted_path = (Path *) create_sort_path(root, + distinct_rel, + input_path, + needed_pathkeys, + limittuples); else - { - add_path(distinct_rel, (Path *) - create_upper_unique_path(root, distinct_rel, - path, - list_length(root->distinct_pathkeys), - numDistinctRows)); - } + sorted_path = (Path *) create_incremental_sort_path(root, + distinct_rel, + input_path, + needed_pathkeys, + presorted_keys, + limittuples); } - } - /* For explicit-sort case, always use the more rigorous clause */ - if (list_length(root->distinct_pathkeys) < - list_length(root->sort_pathkeys)) - { - needed_pathkeys = root->sort_pathkeys; - /* Assert checks that parser didn't mess up... */ - Assert(pathkeys_contained_in(root->distinct_pathkeys, - needed_pathkeys)); - } - else - needed_pathkeys = root->distinct_pathkeys; + /* + * distinct_pathkeys may have become empty if all of the pathkeys + * were determined to be redundant. If all of the pathkeys are + * redundant then each DISTINCT target must only allow a single + * value, therefore all resulting tuples must be identical (or at + * least indistinguishable by an equality check). We can uniquify + * these tuples simply by just taking the first tuple. All we do + * here is add a path to do "LIMIT 1" atop of 'sorted_path'. When + * doing a DISTINCT ON we may still have a non-NIL sort_pathkeys + * list, so we must still only do this with paths which are + * correctly sorted by sort_pathkeys. + */ + if (root->distinct_pathkeys == NIL) + { + Node *limitCount; - path = cheapest_input_path; - if (!pathkeys_contained_in(needed_pathkeys, path->pathkeys)) - path = (Path *) create_sort_path(root, distinct_rel, - path, - needed_pathkeys, - root->distinct_pathkeys == NIL ? - 1.0 : -1.0); + limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid, + sizeof(int64), + Int64GetDatum(1), false, + FLOAT8PASSBYVAL); - /* - * As above, use a LimitPath instead of a UniquePath when all of the - * distinct_pathkeys are redundant and we're only going to get a - * series of tuples all with the same values anyway. - */ - if (root->distinct_pathkeys == NIL) - { - Node *limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid, - sizeof(int64), - Int64GetDatum(1), false, - FLOAT8PASSBYVAL); - - add_path(distinct_rel, (Path *) - create_limit_path(root, distinct_rel, path, NULL, - limitCount, LIMIT_OPTION_COUNT, 0, 1)); - } - else - { - add_path(distinct_rel, (Path *) - create_upper_unique_path(root, distinct_rel, - path, - list_length(root->distinct_pathkeys), - numDistinctRows)); + /* + * If the query already has a LIMIT clause, then we could + * end up with a duplicate LimitPath in the final plan. + * That does not seem worth troubling over too much. + */ + add_path(distinct_rel, (Path *) + create_limit_path(root, distinct_rel, sorted_path, + NULL, limitCount, + LIMIT_OPTION_COUNT, 0, 1)); + } + else + { + add_path(distinct_rel, (Path *) + create_upper_unique_path(root, distinct_rel, + sorted_path, + list_length(root->distinct_pathkeys), + numDistinctRows)); + } } } diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out index 1a1e8b2365..0c3433f8e5 100644 --- a/src/test/regress/expected/incremental_sort.out +++ b/src/test/regress/expected/incremental_sort.out @@ -1484,15 +1484,16 @@ explain (costs off) select * from t union select * from t order by 1,3; -- Full sort, not just incremental sort can be pushed below a gather merge path -- by generate_useful_gather_paths. explain (costs off) select distinct a,b from t; - QUERY PLAN ------------------------------------------- + QUERY PLAN +------------------------------------------------ Unique -> Gather Merge Workers Planned: 2 - -> Sort - Sort Key: a, b - -> Parallel Seq Scan on t -(6 rows) + -> Unique + -> Sort + Sort Key: a, b + -> Parallel Seq Scan on t +(7 rows) drop table t; -- Sort pushdown can't go below where expressions are part of the rel target. diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index 6ce889d87c..1fc07f220f 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -171,6 +171,20 @@ SELECT DISTINCT g%1000 FROM generate_series(0,9999) g; SET jit_above_cost TO DEFAULT; CREATE TABLE distinct_group_2 AS SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g; +SET enable_seqscan = 0; +-- Check to see we get an incremental sort plan +EXPLAIN (costs off) +SELECT DISTINCT hundred, two FROM tenk1; + QUERY PLAN +----------------------------------------------------- + Unique + -> Incremental Sort + Sort Key: hundred, two + Presorted Key: hundred + -> Index Scan using tenk1_hundred on tenk1 +(5 rows) + +RESET enable_seqscan; SET enable_hashagg=TRUE; -- Produce results with hash aggregation. SET enable_sort=FALSE; @@ -265,15 +279,16 @@ $$ LANGUAGE plpgsql PARALLEL SAFE; -- Ensure we do parallel distinct now that the function is parallel safe EXPLAIN (COSTS OFF) SELECT DISTINCT distinct_func(1) FROM tenk1; - QUERY PLAN ----------------------------------------------- + QUERY PLAN +---------------------------------------------------- Unique - -> Sort - Sort Key: (distinct_func(1)) - -> Gather - Workers Planned: 2 - -> Parallel Seq Scan on tenk1 -(6 rows) + -> Gather Merge + Workers Planned: 2 + -> Unique + -> Sort + Sort Key: (distinct_func(1)) + -> Parallel Seq Scan on tenk1 +(7 rows) RESET max_parallel_workers_per_gather; RESET min_parallel_table_scan_size; diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out index 90e89fb5b6..b2c6605e60 100644 --- a/src/test/regress/expected/window.out +++ b/src/test/regress/expected/window.out @@ -3944,8 +3944,9 @@ ORDER BY depname, enroll_date; QUERY PLAN ----------------------------------------------------------------------------------------------- Unique - -> Sort + -> Incremental Sort Sort Key: depname, enroll_date, empno, (sum(salary) OVER (?)), (min(salary) OVER (?)) + Presorted Key: depname, enroll_date -> WindowAgg -> Incremental Sort Sort Key: depname, enroll_date @@ -3954,7 +3955,7 @@ ORDER BY depname, enroll_date; -> Sort Sort Key: depname, empno -> Seq Scan on empsalary -(11 rows) +(12 rows) -- As above but adjust the ORDER BY clause to help ensure the plan with the -- minimum amount of sorting wasn't a fluke. @@ -3970,8 +3971,9 @@ ORDER BY depname, empno; QUERY PLAN ----------------------------------------------------------------------------------------------- Unique - -> Sort + -> Incremental Sort Sort Key: depname, empno, enroll_date, (sum(salary) OVER (?)), (min(salary) OVER (?)) + Presorted Key: depname, empno -> WindowAgg -> Incremental Sort Sort Key: depname, empno @@ -3980,7 +3982,7 @@ ORDER BY depname, empno; -> Sort Sort Key: depname, enroll_date -> Seq Scan on empsalary -(11 rows) +(12 rows) RESET enable_hashagg; -- Test Sort node reordering diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql index 34020adad1..1643526d99 100644 --- a/src/test/regress/sql/select_distinct.sql +++ b/src/test/regress/sql/select_distinct.sql @@ -69,6 +69,14 @@ SET jit_above_cost TO DEFAULT; CREATE TABLE distinct_group_2 AS SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g; +SET enable_seqscan = 0; + +-- Check to see we get an incremental sort plan +EXPLAIN (costs off) +SELECT DISTINCT hundred, two FROM tenk1; + +RESET enable_seqscan; + SET enable_hashagg=TRUE; -- Produce results with hash aggregation.