From e7b94d4efda41becb2f28123eca2db2a88d0d4d0 Mon Sep 17 00:00:00 2001
From: James Coleman <jtc331@gmail.com>
Date: Sat, 3 Oct 2020 10:35:47 -0400
Subject: [PATCH v2 1/2] Fix get_useful_pathkeys_for_relation for volatile
 exprs below joins

It's not sufficient to find an ec member whose Vars are all in the rel
we're working with; volatile expressions in particular present a case
where we also need to know if the rel's target contains the expression
or not before we can assume the pathkey for that ec is safe to  use at
this point in the query. If the pathkey expr is volatile and we're
below a join, then the expr can't appear in the target until we're above
the join, so we can't sort with it yet.
---
 src/backend/optimizer/path/allpaths.c   | 11 ++++---
 src/backend/optimizer/path/equivclass.c | 43 +++++++++++++++++++++++++
 src/backend/optimizer/plan/planner.c    |  9 ++++--
 src/include/optimizer/paths.h           |  1 +
 4 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index b399592ff8..53050b825b 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -2760,7 +2760,8 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
 	/*
 	 * Considering query_pathkeys is always worth it, because it might allow
 	 * us to avoid a total sort when we have a partially presorted path
-	 * available.
+	 * available or to push the total sort into the parallel portion of the
+	 * query.
 	 */
 	if (root->query_pathkeys)
 	{
@@ -2773,17 +2774,17 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
 			EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
 
 			/*
-			 * We can only build an Incremental Sort for pathkeys which
-			 * contain an EC member in the current relation, so ignore any
+			 * We can only build a sort for pathkeys which
+			 * contain an EC member in the current relation's target, so ignore any
 			 * suffix of the list as soon as we find a pathkey without an EC
-			 * member the relation.
+			 * member in the relation.
 			 *
 			 * By still returning the prefix of the pathkeys list that does
 			 * meet criteria of EC membership in the current relation, we
 			 * enable not just an incremental sort on the entirety of
 			 * query_pathkeys but also incremental sort below a JOIN.
 			 */
-			if (!find_em_expr_for_rel(pathkey_ec, rel))
+			if (!find_em_expr_for_rel_target(pathkey_ec, rel))
 				break;
 
 			npathkeys++;
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index b68a5a0ec7..444c805dfb 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -794,6 +794,49 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
 	return NULL;
 }
 
+/*
+ * Find an equivalence class member expression that is in the
+ * the indicated relation's target.
+ */
+Expr *
+find_em_expr_for_rel_target(EquivalenceClass *ec, RelOptInfo *rel)
+{
+	ListCell   *lc_em;
+
+	foreach(lc_em, ec->ec_members)
+	{
+		EquivalenceMember *em = lfirst(lc_em);
+		PathTarget *target = rel->reltarget;
+		ListCell *lc_expr;
+
+		/*
+		 * First all fo the Vars in the equivalence member have to be taken
+		 * entirely from this relation.
+		 */
+		if (bms_is_subset(em->em_relids, rel->relids) &&
+			!bms_is_empty(em->em_relids))
+		{
+			/*
+			 * Then we must verify that the equivalence member's expr is
+			 * generated in the relation's target.
+			 */
+			foreach(lc_expr, target->exprs)
+			{
+				/*
+				 * If there is more than one equivalence member matching these
+				 * requirements we'll be content to choose any one of them.
+				 */
+				if (equal(lfirst(lc_expr), em->em_expr))
+					return em->em_expr;
+			}
+
+		}
+	}
+
+	/* We didn't find any suitable equivalence class expression */
+	return NULL;
+}
+
 /*
  * generate_base_implied_equalities
  *	  Generate any restriction clauses that we can deduce from equivalence
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index f331f82a6c..a48721d2fc 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -7417,7 +7417,9 @@ apply_scanjoin_target_to_paths(PlannerInfo *root,
 		 * current reltarget is.  We don't do this in the case where the
 		 * target is parallel-safe, since we will be able to generate superior
 		 * paths by doing it after the final scan/join target has been
-		 * applied.
+		 * applied. Since the target isn't parallel safe, we don't need
+		 * to apply projections to the partial paths before building
+		 * gather paths.
 		 */
 		generate_useful_gather_paths(root, rel, false);
 
@@ -7570,7 +7572,10 @@ apply_scanjoin_target_to_paths(PlannerInfo *root,
 	 * if the relation is parallel safe, and we don't do it for child rels to
 	 * avoid creating multiple Gather nodes within the same plan. We must do
 	 * this after all paths have been generated and before set_cheapest, since
-	 * one of the generated paths may turn out to be the cheapest one.
+	 * one of the generated paths may turn out to be the cheapest one. We've
+	 * already applied any necessary projections to the partial paths above
+	 * so any Gather Merge paths will be able to make use of path keys in
+	 * requested target..
 	 */
 	if (rel->consider_parallel && !IS_OTHER_REL(rel))
 		generate_useful_gather_paths(root, rel, false);
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 10b6e81079..6ddb54f415 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -135,6 +135,7 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root,
 												  Relids rel,
 												  bool create_it);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
+extern Expr *find_em_expr_for_rel_target(EquivalenceClass *ec, RelOptInfo *rel);
 extern void generate_base_implied_equalities(PlannerInfo *root);
 extern List *generate_join_implied_equalities(PlannerInfo *root,
 											  Relids join_relids,
-- 
2.17.1

