diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 5d0fd6e072..84856721f3 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4764,8 +4764,6 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, * the other.) */ List *needed_pathkeys; - Path *path; - ListCell *lc; if (parse->hasDistinctOn && list_length(root->distinct_pathkeys) < @@ -4774,44 +4772,73 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, else needed_pathkeys = root->distinct_pathkeys; - foreach(lc, input_rel->pathlist) + /* + * needed_pathkeys may have become empty if all of the pathkeys were + * determined to be redundant. If all of the pathkeys are redundant + * then each DISTINCT target must only allow a single value, therefore + * all resulting tuples must be identical. We can uniquify these + * tuples simply by just taking the first tuple. All we do here is + * add a path to do LIMIT 1 on the cheapest input path. + * + * XXX we could end up with 2 Limit nodes if the query already has a + * LIMIT clause. Does that matter? + */ + if (needed_pathkeys == NIL) + { + Node *limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid, + sizeof(int64), + Int64GetDatum(1), false, + FLOAT8PASSBYVAL); + + add_path(distinct_rel, (Path *) + create_limit_path(root, distinct_rel, + cheapest_input_path, NULL, limitCount, + LIMIT_OPTION_COUNT, 0, 1)); + } + else { - path = (Path *) lfirst(lc); + Path *path; + ListCell *lc; - if (pathkeys_contained_in(needed_pathkeys, path->pathkeys)) + foreach(lc, input_rel->pathlist) { - add_path(distinct_rel, (Path *) - create_upper_unique_path(root, distinct_rel, - path, - list_length(root->distinct_pathkeys), - numDistinctRows)); + path = (Path *) lfirst(lc); + + if (pathkeys_contained_in(needed_pathkeys, path->pathkeys)) + { + add_path(distinct_rel, (Path *) + create_upper_unique_path(root, distinct_rel, + path, + list_length(root->distinct_pathkeys), + numDistinctRows)); + } } - } - /* For explicit-sort case, always use the more rigorous clause */ - if (list_length(root->distinct_pathkeys) < - list_length(root->sort_pathkeys)) - { - needed_pathkeys = root->sort_pathkeys; - /* Assert checks that parser didn't mess up... */ - Assert(pathkeys_contained_in(root->distinct_pathkeys, - needed_pathkeys)); - } - else - needed_pathkeys = root->distinct_pathkeys; + /* For explicit-sort case, always use the more rigorous clause */ + if (list_length(root->distinct_pathkeys) < + list_length(root->sort_pathkeys)) + { + needed_pathkeys = root->sort_pathkeys; + /* Assert checks that parser didn't mess up... */ + Assert(pathkeys_contained_in(root->distinct_pathkeys, + needed_pathkeys)); + } + else + needed_pathkeys = root->distinct_pathkeys; - path = cheapest_input_path; - if (!pathkeys_contained_in(needed_pathkeys, path->pathkeys)) - path = (Path *) create_sort_path(root, distinct_rel, - path, - needed_pathkeys, - -1.0); + path = cheapest_input_path; + if (!pathkeys_contained_in(needed_pathkeys, path->pathkeys)) + path = (Path *) create_sort_path(root, distinct_rel, + path, + needed_pathkeys, + -1.0); - add_path(distinct_rel, (Path *) - create_upper_unique_path(root, distinct_rel, - path, - list_length(root->distinct_pathkeys), - numDistinctRows)); + add_path(distinct_rel, (Path *) + create_upper_unique_path(root, distinct_rel, + path, + list_length(root->distinct_pathkeys), + numDistinctRows)); + } } /* diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index 748419cee0..6ce889d87c 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -279,6 +279,60 @@ RESET max_parallel_workers_per_gather; RESET min_parallel_table_scan_size; RESET parallel_setup_cost; RESET parallel_tuple_cost; +-- +-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when +-- all of the distinct_pathkeys have been marked as redundant +-- +-- Ensure we get a plan with a Limit 1 +EXPLAIN (COSTS OFF) +SELECT DISTINCT four FROM tenk1 WHERE four = 0; + QUERY PLAN +---------------------------- + Limit + -> Seq Scan on tenk1 + Filter: (four = 0) +(3 rows) + +-- Ensure the above gives us the correct result +SELECT DISTINCT four FROM tenk1 WHERE four = 0; + four +------ + 0 +(1 row) + +-- Ensure we get a plan with a Limit 1 +EXPLAIN (COSTS OFF) +SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0; + QUERY PLAN +--------------------------------------------- + Limit + -> Seq Scan on tenk1 + Filter: ((two <> 0) AND (four = 0)) +(3 rows) + +-- Ensure no rows are returned +SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0; + four +------ +(0 rows) + +-- Ensure we get a plan with a Limit 1 when the SELECT list contains constants +EXPLAIN (COSTS OFF) +SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; + QUERY PLAN +---------------------------- + Limit + -> Seq Scan on tenk1 + Filter: (four = 0) +(3 rows) + +-- Ensure we only get 1 row +SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; + four | ?column? | ?column? | ?column? +------+----------+----------+---------- + 0 | 1 | 2 | 3 +(1 row) + -- -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its -- very own regression file. diff --git a/src/test/regress/expected/select_distinct_on.out b/src/test/regress/expected/select_distinct_on.out index 3d98990991..2e38f14adb 100644 --- a/src/test/regress/expected/select_distinct_on.out +++ b/src/test/regress/expected/select_distinct_on.out @@ -73,3 +73,27 @@ select distinct on (1) floor(random()) as r, f1 from int4_tbl order by 1,2; 0 | -2147483647 (1 row) +-- +-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when +-- all of the distinct_pathkeys have been marked as redundant +-- +-- Ensure we also get a LIMIT plan with DISTINCT ON +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (four) four,two + FROM tenk1 WHERE four = 0 AND two = 0 ORDER BY 1,2; + QUERY PLAN +-------------------------------------------------- + Result + -> Limit + -> Seq Scan on tenk1 + Filter: ((four = 0) AND (two = 0)) +(4 rows) + +-- and check the result of the above query is correct +SELECT DISTINCT ON (four) four,two + FROM tenk1 WHERE four = 0 AND two = 0 ORDER BY 1,2; + four | two +------+----- + 0 | 0 +(1 row) + diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql index f27ff714f8..34020adad1 100644 --- a/src/test/regress/sql/select_distinct.sql +++ b/src/test/regress/sql/select_distinct.sql @@ -146,6 +146,32 @@ RESET min_parallel_table_scan_size; RESET parallel_setup_cost; RESET parallel_tuple_cost; +-- +-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when +-- all of the distinct_pathkeys have been marked as redundant +-- + +-- Ensure we get a plan with a Limit 1 +EXPLAIN (COSTS OFF) +SELECT DISTINCT four FROM tenk1 WHERE four = 0; + +-- Ensure the above gives us the correct result +SELECT DISTINCT four FROM tenk1 WHERE four = 0; + +-- Ensure we get a plan with a Limit 1 +EXPLAIN (COSTS OFF) +SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0; + +-- Ensure no rows are returned +SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0; + +-- Ensure we get a plan with a Limit 1 when the SELECT list contains constants +EXPLAIN (COSTS OFF) +SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; + +-- Ensure we only get 1 row +SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; + -- -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its -- very own regression file. diff --git a/src/test/regress/sql/select_distinct_on.sql b/src/test/regress/sql/select_distinct_on.sql index 0920bd64b9..d5d6de1815 100644 --- a/src/test/regress/sql/select_distinct_on.sql +++ b/src/test/regress/sql/select_distinct_on.sql @@ -17,3 +17,17 @@ SELECT DISTINCT ON (string4, ten) string4, ten, two -- bug #5049: early 8.4.x chokes on volatile DISTINCT ON clauses select distinct on (1) floor(random()) as r, f1 from int4_tbl order by 1,2; + +-- +-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when +-- all of the distinct_pathkeys have been marked as redundant +-- + +-- Ensure we also get a LIMIT plan with DISTINCT ON +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (four) four,two + FROM tenk1 WHERE four = 0 AND two = 0 ORDER BY 1,2; + +-- and check the result of the above query is correct +SELECT DISTINCT ON (four) four,two + FROM tenk1 WHERE four = 0 AND two = 0 ORDER BY 1,2;