diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 5d0fd6e072..5c4e4dc5cd 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4780,11 +4780,53 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, if (pathkeys_contained_in(needed_pathkeys, path->pathkeys)) { - add_path(distinct_rel, (Path *) - create_upper_unique_path(root, distinct_rel, - path, - list_length(root->distinct_pathkeys), - numDistinctRows)); + /* + * distinct_pathkeys may have become empty if all of the + * pathkeys were determined to be redundant. If all of the + * pathkeys are redundant then each DISTINCT target must only + * allow a single value, therefore all resulting tuples must + * be identical. We can uniquify these tuples simply by just + * taking the first tuple. All we do here is add a path to do + * LIMIT 1 on path. When doing DISTINCT ON we may still have + * a non-NIL sort_pathkeys list, so we must still only do this + * with paths which are contained in needed_pathkeys. + */ + if (root->distinct_pathkeys == NIL) + { + Node *limitCount; + LimitPath *lpath; + + limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid, + sizeof(int64), + Int64GetDatum(1), false, + FLOAT8PASSBYVAL); + + /* + * If the query already has a LIMIT clause, then we could + * end up with a duplicate LimitPath in the final plan. + * That does not seem worth troubling over too much. + */ + lpath = create_limit_path(root, distinct_rel, path, NULL, + limitCount, LIMIT_OPTION_COUNT, + 0, 1); + + /* + * We set the LimitPath's pathkeys to the sort_pathkeys + * since there can only be a single row after the LIMIT to + * save the planner adding a useless SortPath for the + * ORDER BY. + */ + lpath->path.pathkeys = root->sort_pathkeys; + add_path(distinct_rel, (Path *) lpath); + } + else + { + add_path(distinct_rel, (Path *) + create_upper_unique_path(root, distinct_rel, + path, + list_length(root->distinct_pathkeys), + numDistinctRows)); + } } } @@ -4805,13 +4847,33 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel, path = (Path *) create_sort_path(root, distinct_rel, path, needed_pathkeys, - -1.0); + root->distinct_pathkeys == NIL ? + 1.0 : -1.0); - add_path(distinct_rel, (Path *) - create_upper_unique_path(root, distinct_rel, - path, - list_length(root->distinct_pathkeys), - numDistinctRows)); + /* + * As above, use a LimitPath instead of a UniquePath when all of the + * distinct_pathkeys are redundant and we're only going to get a + * series of tuples all with the same values anyway. + */ + if (root->distinct_pathkeys == NIL) + { + Node *limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid, + sizeof(int64), + Int64GetDatum(1), false, + FLOAT8PASSBYVAL); + + add_path(distinct_rel, (Path *) + create_limit_path(root, distinct_rel, path, NULL, + limitCount, LIMIT_OPTION_COUNT, 0, 1)); + } + else + { + add_path(distinct_rel, (Path *) + create_upper_unique_path(root, distinct_rel, + path, + list_length(root->distinct_pathkeys), + numDistinctRows)); + } } /* diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index 748419cee0..6ce889d87c 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -279,6 +279,60 @@ RESET max_parallel_workers_per_gather; RESET min_parallel_table_scan_size; RESET parallel_setup_cost; RESET parallel_tuple_cost; +-- +-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when +-- all of the distinct_pathkeys have been marked as redundant +-- +-- Ensure we get a plan with a Limit 1 +EXPLAIN (COSTS OFF) +SELECT DISTINCT four FROM tenk1 WHERE four = 0; + QUERY PLAN +---------------------------- + Limit + -> Seq Scan on tenk1 + Filter: (four = 0) +(3 rows) + +-- Ensure the above gives us the correct result +SELECT DISTINCT four FROM tenk1 WHERE four = 0; + four +------ + 0 +(1 row) + +-- Ensure we get a plan with a Limit 1 +EXPLAIN (COSTS OFF) +SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0; + QUERY PLAN +--------------------------------------------- + Limit + -> Seq Scan on tenk1 + Filter: ((two <> 0) AND (four = 0)) +(3 rows) + +-- Ensure no rows are returned +SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0; + four +------ +(0 rows) + +-- Ensure we get a plan with a Limit 1 when the SELECT list contains constants +EXPLAIN (COSTS OFF) +SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; + QUERY PLAN +---------------------------- + Limit + -> Seq Scan on tenk1 + Filter: (four = 0) +(3 rows) + +-- Ensure we only get 1 row +SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; + four | ?column? | ?column? | ?column? +------+----------+----------+---------- + 0 | 1 | 2 | 3 +(1 row) + -- -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its -- very own regression file. diff --git a/src/test/regress/expected/select_distinct_on.out b/src/test/regress/expected/select_distinct_on.out index 3d98990991..b2978c1114 100644 --- a/src/test/regress/expected/select_distinct_on.out +++ b/src/test/regress/expected/select_distinct_on.out @@ -73,3 +73,53 @@ select distinct on (1) floor(random()) as r, f1 from int4_tbl order by 1,2; 0 | -2147483647 (1 row) +-- +-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when +-- all of the distinct_pathkeys have been marked as redundant +-- +-- Ensure we also get a LIMIT plan with DISTINCT ON +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (four) four,two + FROM tenk1 WHERE four = 0 ORDER BY 1; + QUERY PLAN +---------------------------------- + Result + -> Limit + -> Seq Scan on tenk1 + Filter: (four = 0) +(4 rows) + +-- and check the result of the above query is correct +SELECT DISTINCT ON (four) four,two + FROM tenk1 WHERE four = 0 ORDER BY 1; + four | two +------+----- + 0 | 0 +(1 row) + +-- Ensure a Sort -> Limit is used when the ORDER BY contains additional cols +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (four) four,two + FROM tenk1 WHERE four = 0 ORDER BY 1,2; + QUERY PLAN +---------------------------------- + Limit + -> Sort + Sort Key: two + -> Seq Scan on tenk1 + Filter: (four = 0) +(5 rows) + +-- Same again but use a column that is indexed so that we get an index scan +-- then a limit +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (four) four,hundred + FROM tenk1 WHERE four = 0 ORDER BY 1,2; + QUERY PLAN +----------------------------------------------------- + Result + -> Limit + -> Index Scan using tenk1_hundred on tenk1 + Filter: (four = 0) +(4 rows) + diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql index f27ff714f8..34020adad1 100644 --- a/src/test/regress/sql/select_distinct.sql +++ b/src/test/regress/sql/select_distinct.sql @@ -146,6 +146,32 @@ RESET min_parallel_table_scan_size; RESET parallel_setup_cost; RESET parallel_tuple_cost; +-- +-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when +-- all of the distinct_pathkeys have been marked as redundant +-- + +-- Ensure we get a plan with a Limit 1 +EXPLAIN (COSTS OFF) +SELECT DISTINCT four FROM tenk1 WHERE four = 0; + +-- Ensure the above gives us the correct result +SELECT DISTINCT four FROM tenk1 WHERE four = 0; + +-- Ensure we get a plan with a Limit 1 +EXPLAIN (COSTS OFF) +SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0; + +-- Ensure no rows are returned +SELECT DISTINCT four FROM tenk1 WHERE four = 0 AND two <> 0; + +-- Ensure we get a plan with a Limit 1 when the SELECT list contains constants +EXPLAIN (COSTS OFF) +SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; + +-- Ensure we only get 1 row +SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0; + -- -- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its -- very own regression file. diff --git a/src/test/regress/sql/select_distinct_on.sql b/src/test/regress/sql/select_distinct_on.sql index 0920bd64b9..261e6140fe 100644 --- a/src/test/regress/sql/select_distinct_on.sql +++ b/src/test/regress/sql/select_distinct_on.sql @@ -17,3 +17,28 @@ SELECT DISTINCT ON (string4, ten) string4, ten, two -- bug #5049: early 8.4.x chokes on volatile DISTINCT ON clauses select distinct on (1) floor(random()) as r, f1 from int4_tbl order by 1,2; + +-- +-- Test the planner's ability to use a LIMIT 1 instead of a Unique node when +-- all of the distinct_pathkeys have been marked as redundant +-- + +-- Ensure we also get a LIMIT plan with DISTINCT ON +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (four) four,two + FROM tenk1 WHERE four = 0 ORDER BY 1; + +-- and check the result of the above query is correct +SELECT DISTINCT ON (four) four,two + FROM tenk1 WHERE four = 0 ORDER BY 1; + +-- Ensure a Sort -> Limit is used when the ORDER BY contains additional cols +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (four) four,two + FROM tenk1 WHERE four = 0 ORDER BY 1,2; + +-- Same again but use a column that is indexed so that we get an index scan +-- then a limit +EXPLAIN (COSTS OFF) +SELECT DISTINCT ON (four) four,hundred + FROM tenk1 WHERE four = 0 ORDER BY 1,2;