From 3a74857e06e367307f85d25166f62e3462de543d Mon Sep 17 00:00:00 2001
From: Richard Guo
Date: Fri, 23 Aug 2024 16:55:32 +0900
Subject: [PATCH v2] Avoid unnecessary post-sort projection
When generating paths for the ORDER BY clause, one thing we need to
ensure is that the output paths project the correct final_target. To
achieve this, in create_ordered_paths, we compare the pathtarget of
each generated path with the given 'target', and add a post-sort
projection step if the two targets do not match.
Currently we perform a simple pointer comparison between the two
targets. It turns out that this is not sufficient. Each sorted_path
generated in create_ordered_paths initially projects the correct
target required by the preceding steps of sort. If it is the same
pointer as sort_input_target, pointer comparison suffices, because
sort_input_target is always identical to final_target when no
post-sort projection is needed.
However, sorted_path's initial pathtarget may not be the same pointer
as sort_input_target, because in apply_scanjoin_target_to_paths, if
the target to be applied has the same expressions as the existing
reltarget, we only inject the sortgroupref info into the existing
pathtargets, rather than create new projection paths. As a result,
pointer comparison in create_ordered_paths is not reliable.
Instead, we can compare PathTarget.exprs to determine whether a
projection step is needed. If the expressions match, we can be
confident that a post-sort projection is not required.
It could be argued that this change adds extra check cost each time we
decide whether a post-sort projection is needed. However, as
explained in apply_scanjoin_target_to_paths, by avoiding the creation
of projection paths, we save effort both immediately and at plan
creation time. This, I think, justifies the extra check cost.
There are two ensuing plan changes in the regression tests, but they
look reasonable and are exactly what we are fixing here. So no
additional test cases are added.
---
src/backend/optimizer/plan/planner.c | 14 +++++++---
.../regress/expected/select_distinct_on.out | 26 +++++++++----------
2 files changed, 22 insertions(+), 18 deletions(-)
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index b5827d3980..62b2354f00 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -5300,8 +5300,11 @@ create_ordered_paths(PlannerInfo *root,
limit_tuples);
}
- /* Add projection step if needed */
- if (sorted_path->pathtarget != target)
+ /*
+ * If the pathtarget of the result path has different expressions from
+ * the target to be applied, a projection step is needed.
+ */
+ if (!equal(sorted_path->pathtarget->exprs, target->exprs))
sorted_path = apply_projection_to_path(root, ordered_rel,
sorted_path, target);
@@ -5378,8 +5381,11 @@ create_ordered_paths(PlannerInfo *root,
root->sort_pathkeys, NULL,
&total_groups);
- /* Add projection step if needed */
- if (sorted_path->pathtarget != target)
+ /*
+ * If the pathtarget of the result path has different expressions
+ * from the target to be applied, a projection step is needed.
+ */
+ if (!equal(sorted_path->pathtarget->exprs, target->exprs))
sorted_path = apply_projection_to_path(root, ordered_rel,
sorted_path, target);
diff --git a/src/test/regress/expected/select_distinct_on.out b/src/test/regress/expected/select_distinct_on.out
index b2978c1114..958381afe5 100644
--- a/src/test/regress/expected/select_distinct_on.out
+++ b/src/test/regress/expected/select_distinct_on.out
@@ -81,13 +81,12 @@ select distinct on (1) floor(random()) as r, f1 from int4_tbl order by 1,2;
EXPLAIN (COSTS OFF)
SELECT DISTINCT ON (four) four,two
FROM tenk1 WHERE four = 0 ORDER BY 1;
- QUERY PLAN
-----------------------------------
- Result
- -> Limit
- -> Seq Scan on tenk1
- Filter: (four = 0)
-(4 rows)
+ QUERY PLAN
+----------------------------
+ Limit
+ -> Seq Scan on tenk1
+ Filter: (four = 0)
+(3 rows)
-- and check the result of the above query is correct
SELECT DISTINCT ON (four) four,two
@@ -115,11 +114,10 @@ SELECT DISTINCT ON (four) four,two
EXPLAIN (COSTS OFF)
SELECT DISTINCT ON (four) four,hundred
FROM tenk1 WHERE four = 0 ORDER BY 1,2;
- QUERY PLAN
------------------------------------------------------
- Result
- -> Limit
- -> Index Scan using tenk1_hundred on tenk1
- Filter: (four = 0)
-(4 rows)
+ QUERY PLAN
+-----------------------------------------------
+ Limit
+ -> Index Scan using tenk1_hundred on tenk1
+ Filter: (four = 0)
+(3 rows)
--
2.43.0