diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index b399592ff8..3393e1d37a 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -2760,7 +2760,8 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel) /* * Considering query_pathkeys is always worth it, because it might allow * us to avoid a total sort when we have a partially presorted path - * available. + * available or to push the total sort into the parallel portion of the + * query. */ if (root->query_pathkeys) { @@ -2773,17 +2774,17 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel) EquivalenceClass *pathkey_ec = pathkey->pk_eclass; /* - * We can only build an Incremental Sort for pathkeys which - * contain an EC member in the current relation, so ignore any - * suffix of the list as soon as we find a pathkey without an EC - * member the relation. + * We can only build a sort for pathkeys which contain an EC + * member in the current relation's target, so ignore any suffix + * of the list as soon as we find a pathkey without an EC member + * in the relation. * * By still returning the prefix of the pathkeys list that does * meet criteria of EC membership in the current relation, we * enable not just an incremental sort on the entirety of * query_pathkeys but also incremental sort below a JOIN. */ - if (!find_em_expr_for_rel(pathkey_ec, rel)) + if (!find_em_expr_usable_for_sorting_rel(pathkey_ec, rel)) break; npathkeys++; diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 823422edad..9662dd3262 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -794,6 +794,66 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel) return NULL; } +/* + * Find an equivalence class member expression that can be safely used by a + * sort node on top of the provided relation. The rules here must match those + * applied in prepare_sort_from_pathkeys. + */ +Expr * +find_em_expr_usable_for_sorting_rel(EquivalenceClass *ec, RelOptInfo *rel) +{ + ListCell *lc_em; + + /* + * If there is more than one equivalence member matching these + * requirements we'll be content to choose any one of them. + */ + foreach(lc_em, ec->ec_members) + { + EquivalenceMember *em = lfirst(lc_em); + PathTarget *target = rel->reltarget; + ListCell *lc_expr; + + /* + * We shouldn't be trying to sort by an equivalence class that + * contains a constant, so no need to consider such cases any further. + */ + if (em->em_is_const) + continue; + + /* + * Any Vars in the equivalence class member need to come from this + * relation. This is a superset of prepare_sort_from_pathkeys ignoring + * child members unless they belong to the rel being sorted. + */ + if (!bms_is_subset(em->em_relids, rel->relids)) + continue; + + /* + * As long as the expression isn't volatile then + * prepare_sort_from_pathkeys is able to generate a new target entry, + * so there's no need to verify that one already exists. + */ + if (!ec->ec_has_volatile) + return em->em_expr; + + /* + * If, however, it's volatile, we have to verify that the + * equivalence member's expr is already generated in the + * relation's target. + */ + foreach(lc_expr, target->exprs) + { + /* XXX: Do we need to strip relabel types here? */ + if (equal(lfirst(lc_expr), em->em_expr)) + return em->em_expr; + } + } + + /* We didn't find any suitable equivalence class expression */ + return NULL; +} + /* * generate_base_implied_equalities * Generate any restriction clauses that we can deduce from equivalence diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 10b6e81079..d72b3a5d42 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -135,6 +135,7 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root, Relids rel, bool create_it); extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel); +extern Expr *find_em_expr_usable_for_sorting_rel(EquivalenceClass *ec, RelOptInfo *rel); extern void generate_base_implied_equalities(PlannerInfo *root); extern List *generate_join_implied_equalities(PlannerInfo *root, Relids join_relids, diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out index e376ea1276..7cf2eee7c1 100644 --- a/src/test/regress/expected/incremental_sort.out +++ b/src/test/regress/expected/incremental_sort.out @@ -1469,3 +1469,101 @@ explain (costs off) select * from t union select * from t order by 1,3; (13 rows) drop table t; +-- Sort pushdown can't go below where expressions are part of the rel target. +-- In particular this is interesting for volatile expressions which have to +-- go above joins since otherwise we'll incorrectly use expression evaluations +-- across multiple rows. +set enable_hashagg=off; +set enable_seqscan=off; +set enable_incremental_sort = off; +set parallel_tuple_cost=0; +set parallel_setup_cost=0; +set min_parallel_table_scan_size = 0; +set min_parallel_index_scan_size = 0; +-- Parallel sort below join. +explain (costs off) select distinct sub.unique1, stringu1 +from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub; + QUERY PLAN +-------------------------------------------------------------------------- + Unique + -> Nested Loop + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: tenk1.unique1, tenk1.stringu1 + -> Parallel Index Scan using tenk1_unique1 on tenk1 + -> Function Scan on generate_series +(8 rows) + +explain (costs off) select sub.unique1, stringu1 +from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub +order by 1, 2; + QUERY PLAN +-------------------------------------------------------------------- + Nested Loop + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: tenk1.unique1, tenk1.stringu1 + -> Parallel Index Scan using tenk1_unique1 on tenk1 + -> Function Scan on generate_series +(7 rows) + +-- Parallel sort but with expression that can be safely generated at the base rel. +explain (costs off) select distinct sub.unique1, md5(stringu1) +from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub; + QUERY PLAN +---------------------------------------------------------------------------------------- + Unique + -> Nested Loop + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: tenk1.unique1, (md5((tenk1.stringu1)::text)) COLLATE "C" + -> Parallel Index Scan using tenk1_unique1 on tenk1 + -> Function Scan on generate_series +(8 rows) + +explain (costs off) select sub.unique1, md5(stringu1) +from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub +order by 1, 2; + QUERY PLAN +---------------------------------------------------------------------------------- + Nested Loop + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: tenk1.unique1, (md5((tenk1.stringu1)::text)) COLLATE "C" + -> Parallel Index Scan using tenk1_unique1 on tenk1 + -> Function Scan on generate_series +(7 rows) + +-- Parallel sort but with expression not available until the upper rel. +explain (costs off) select distinct sub.unique1, stringu1 || random()::text +from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub; + QUERY PLAN +--------------------------------------------------------------------------------------------- + Unique + -> Sort + Sort Key: tenk1.unique1, (((tenk1.stringu1)::text || (random())::text)) COLLATE "C" + -> Gather + Workers Planned: 2 + -> Nested Loop + -> Parallel Index Scan using tenk1_unique1 on tenk1 + -> Function Scan on generate_series +(8 rows) + +explain (costs off) select sub.unique1, stringu1 || random()::text +from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub +order by 1, 2; + QUERY PLAN +--------------------------------------------------------------------------------------- + Sort + Sort Key: tenk1.unique1, (((tenk1.stringu1)::text || (random())::text)) COLLATE "C" + -> Gather + Workers Planned: 2 + -> Nested Loop + -> Parallel Index Scan using tenk1_unique1 on tenk1 + -> Function Scan on generate_series +(7 rows) + diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql index 9c040c90e6..3ee11c394b 100644 --- a/src/test/regress/sql/incremental_sort.sql +++ b/src/test/regress/sql/incremental_sort.sql @@ -221,3 +221,34 @@ set enable_hashagg to off; explain (costs off) select * from t union select * from t order by 1,3; drop table t; + +-- Sort pushdown can't go below where expressions are part of the rel target. +-- In particular this is interesting for volatile expressions which have to +-- go above joins since otherwise we'll incorrectly use expression evaluations +-- across multiple rows. +set enable_hashagg=off; +set enable_seqscan=off; +set enable_incremental_sort = off; +set parallel_tuple_cost=0; +set parallel_setup_cost=0; +set min_parallel_table_scan_size = 0; +set min_parallel_index_scan_size = 0; + +-- Parallel sort below join. +explain (costs off) select distinct sub.unique1, stringu1 +from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub; +explain (costs off) select sub.unique1, stringu1 +from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub +order by 1, 2; +-- Parallel sort but with expression that can be safely generated at the base rel. +explain (costs off) select distinct sub.unique1, md5(stringu1) +from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub; +explain (costs off) select sub.unique1, md5(stringu1) +from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub +order by 1, 2; +-- Parallel sort but with expression not available until the upper rel. +explain (costs off) select distinct sub.unique1, stringu1 || random()::text +from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub; +explain (costs off) select sub.unique1, stringu1 || random()::text +from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub +order by 1, 2;