From 091627c63cfb7ab47bfb76f6a96f94370aeea28d Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@2ndquadrant.com>
Date: Tue, 9 Jul 2019 02:14:18 +0200
Subject: [PATCH 4/4] rework where incremental sort paths are created

---
 src/backend/optimizer/path/allpaths.c | 269 -----------------------
 src/backend/optimizer/plan/planner.c  | 299 ++++++++++++++++++++++++++
 2 files changed, 299 insertions(+), 269 deletions(-)

diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 34a0fb4d32..3efc807164 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -2665,242 +2665,6 @@ set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 	add_path(rel, create_worktablescan_path(root, rel, required_outer));
 }
 
-
-
-/*
- * Find an equivalence class member expression, all of whose Vars, come from
- * the indicated relation.
- */
-static Expr *
-find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
-{
-	ListCell   *lc_em;
-
-	foreach(lc_em, ec->ec_members)
-	{
-		EquivalenceMember *em = lfirst(lc_em);
-
-		if (bms_is_subset(em->em_relids, rel->relids) &&
-			!bms_is_empty(em->em_relids))
-		{
-			/*
-			 * If there is more than one equivalence member whose Vars are
-			 * taken entirely from this relation, we'll be content to choose
-			 * any one of those.
-			 */
-			return em->em_expr;
-		}
-	}
-
-	/* We didn't find any suitable equivalence class expression */
-	return NULL;
-}
-
-/*
- * get_useful_ecs_for_relation
- *		Determine which EquivalenceClasses might be involved in useful
- *		orderings of this relation.
- *
- * This function is in some respects a mirror image of the core function
- * pathkeys_useful_for_merging: for a regular table, we know what indexes
- * we have and want to test whether any of them are useful.  For a foreign
- * table, we don't know what indexes are present on the remote side but
- * want to speculate about which ones we'd like to use if they existed.
- *
- * This function returns a list of potentially-useful equivalence classes,
- * but it does not guarantee that an EquivalenceMember exists which contains
- * Vars only from the given relation.  For example, given ft1 JOIN t1 ON
- * ft1.x + t1.x = 0, this function will say that the equivalence class
- * containing ft1.x + t1.x is potentially useful.  Supposing ft1 is remote and
- * t1 is local (or on a different server), it will turn out that no useful
- * ORDER BY clause can be generated.  It's not our job to figure that out
- * here; we're only interested in identifying relevant ECs.
- */
-static List *
-get_useful_ecs_for_relation(PlannerInfo *root, RelOptInfo *rel)
-{
-	List	   *useful_eclass_list = NIL;
-	ListCell   *lc;
-	Relids		relids;
-
-	/*
-	 * First, consider whether any active EC is potentially useful for a merge
-	 * join against this relation.
-	 */
-	if (rel->has_eclass_joins)
-	{
-		foreach(lc, root->eq_classes)
-		{
-			EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc);
-
-			if (eclass_useful_for_merging(root, cur_ec, rel))
-				useful_eclass_list = lappend(useful_eclass_list, cur_ec);
-		}
-	}
-
-	/*
-	 * Next, consider whether there are any non-EC derivable join clauses that
-	 * are merge-joinable.  If the joininfo list is empty, we can exit
-	 * quickly.
-	 */
-	if (rel->joininfo == NIL)
-		return useful_eclass_list;
-
-	/* If this is a child rel, we must use the topmost parent rel to search. */
-	if (IS_OTHER_REL(rel))
-	{
-		Assert(!bms_is_empty(rel->top_parent_relids));
-		relids = rel->top_parent_relids;
-	}
-	else
-		relids = rel->relids;
-
-	/* Check each join clause in turn. */
-	foreach(lc, rel->joininfo)
-	{
-		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
-
-		/* Consider only mergejoinable clauses */
-		if (restrictinfo->mergeopfamilies == NIL)
-			continue;
-
-		/* Make sure we've got canonical ECs. */
-		update_mergeclause_eclasses(root, restrictinfo);
-
-		/*
-		 * restrictinfo->mergeopfamilies != NIL is sufficient to guarantee
-		 * that left_ec and right_ec will be initialized, per comments in
-		 * distribute_qual_to_rels.
-		 *
-		 * We want to identify which side of this merge-joinable clause
-		 * contains columns from the relation produced by this RelOptInfo. We
-		 * test for overlap, not containment, because there could be extra
-		 * relations on either side.  For example, suppose we've got something
-		 * like ((A JOIN B ON A.x = B.x) JOIN C ON A.y = C.y) LEFT JOIN D ON
-		 * A.y = D.y.  The input rel might be the joinrel between A and B, and
-		 * we'll consider the join clause A.y = D.y. relids contains a
-		 * relation not involved in the join class (B) and the equivalence
-		 * class for the left-hand side of the clause contains a relation not
-		 * involved in the input rel (C).  Despite the fact that we have only
-		 * overlap and not containment in either direction, A.y is potentially
-		 * useful as a sort column.
-		 *
-		 * Note that it's even possible that relids overlaps neither side of
-		 * the join clause.  For example, consider A LEFT JOIN B ON A.x = B.x
-		 * AND A.x = 1.  The clause A.x = 1 will appear in B's joininfo list,
-		 * but overlaps neither side of B.  In that case, we just skip this
-		 * join clause, since it doesn't suggest a useful sort order for this
-		 * relation.
-		 */
-		if (bms_overlap(relids, restrictinfo->right_ec->ec_relids))
-			useful_eclass_list = list_append_unique_ptr(useful_eclass_list,
-														restrictinfo->right_ec);
-		else if (bms_overlap(relids, restrictinfo->left_ec->ec_relids))
-			useful_eclass_list = list_append_unique_ptr(useful_eclass_list,
-														restrictinfo->left_ec);
-	}
-
-	return useful_eclass_list;
-}
-
-/*
- * get_useful_pathkeys_for_relation
- *		Determine which orderings of a relation might be useful.
- *
- * Getting data in sorted order can be useful either because the requested
- * order matches the final output ordering for the overall query we're
- * planning, or because it enables an efficient merge join.  Here, we try
- * to figure out which pathkeys to consider.
- */
-static List *
-get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
-{
-	List	   *useful_pathkeys_list = NIL;
-	List	   *useful_eclass_list;
-	EquivalenceClass *query_ec = NULL;
-	ListCell   *lc;
-
-	/*
-	 * Pushing the query_pathkeys to the remote server is always worth
-	 * considering, because it might let us avoid a local sort.
-	 */
-	if (root->query_pathkeys)
-	{
-		bool		query_pathkeys_ok = true;
-
-		foreach(lc, root->query_pathkeys)
-		{
-			PathKey    *pathkey = (PathKey *) lfirst(lc);
-			EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
-			Expr	   *em_expr;
-
-			/*
-			 * The planner and executor don't have any clever strategy for
-			 * taking data sorted by a prefix of the query's pathkeys and
-			 * getting it to be sorted by all of those pathkeys. We'll just
-			 * end up resorting the entire data set.  So, unless we can push
-			 * down all of the query pathkeys, forget it.
-			 *
-			 * is_foreign_expr would detect volatile expressions as well, but
-			 * checking ec_has_volatile here saves some cycles.
-			 */
-			if (pathkey_ec->ec_has_volatile ||
-				!(em_expr = find_em_expr_for_rel(pathkey_ec, rel)))
-			{
-				query_pathkeys_ok = false;
-				break;
-			}
-		}
-
-		if (query_pathkeys_ok)
-			useful_pathkeys_list = list_make1(list_copy(root->query_pathkeys));
-	}
-
-	/* Get the list of interesting EquivalenceClasses. */
-	useful_eclass_list = get_useful_ecs_for_relation(root, rel);
-
-	/* Extract unique EC for query, if any, so we don't consider it again. */
-	if (list_length(root->query_pathkeys) == 1)
-	{
-		PathKey    *query_pathkey = linitial(root->query_pathkeys);
-
-		query_ec = query_pathkey->pk_eclass;
-	}
-
-	/*
-	 * As a heuristic, the only pathkeys we consider here are those of length
-	 * one.  It's surely possible to consider more, but since each one we
-	 * choose to consider will generate a round-trip to the remote side, we
-	 * need to be a bit cautious here.  It would sure be nice to have a local
-	 * cache of information about remote index definitions...
-	 */
-	foreach(lc, useful_eclass_list)
-	{
-		EquivalenceClass *cur_ec = lfirst(lc);
-		Expr	   *em_expr;
-		PathKey    *pathkey;
-
-		/* If redundant with what we did above, skip it. */
-		if (cur_ec == query_ec)
-			continue;
-
-		/* If no pushable expression for this rel, skip it. */
-		em_expr = find_em_expr_for_rel(cur_ec, rel);
-		if (em_expr == NULL)
-			continue;
-
-		/* Looks like we can generate a pathkey, so let's do it. */
-		pathkey = make_canonical_pathkey(root, cur_ec,
-										 linitial_oid(cur_ec->ec_opfamilies),
-										 BTLessStrategyNumber,
-										 false);
-		useful_pathkeys_list = lappend(useful_pathkeys_list,
-									   list_make1(pathkey));
-	}
-
-	return useful_pathkeys_list;
-}
-
 /*
  * generate_gather_paths
  *		Generate parallel access paths for a relation by pushing a Gather or
@@ -2955,10 +2719,6 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
 	{
 		Path	   *subpath = (Path *) lfirst(lc);
 		GatherMergePath *path;
-		bool		is_sorted;
-		int			presorted_keys;
-		List	   *useful_pathkeys_list = NIL; /* List of all pathkeys */
-		ListCell   *lc;
 
 		if (subpath->pathkeys == NIL)
 			continue;
@@ -2967,35 +2727,6 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
 		path = create_gather_merge_path(root, rel, subpath, rel->reltarget,
 										subpath->pathkeys, NULL, rowsp);
 		add_path(rel, &path->path);
-
-		/* consider incremental sort for interesting orderings */
-		useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel);
-
-		foreach(lc, useful_pathkeys_list)
-		{
-			List	   *useful_pathkeys = lfirst(lc);
-
-			is_sorted = pathkeys_common_contained_in(useful_pathkeys,
-													 subpath->pathkeys,
-													 &presorted_keys);
-
-			if (!is_sorted && (presorted_keys > 0))
-			{
-				/* Also consider incremental sort. */
-				subpath = (Path *) create_incremental_sort_path(root,
-																rel,
-																subpath,
-																useful_pathkeys,
-																presorted_keys,
-																-1);
-
-				path = create_gather_merge_path(root, rel, subpath, rel->reltarget,
-												subpath->pathkeys, NULL, rowsp);
-
-				add_path(rel, &path->path);
-			}
-		}
-
 	}
 }
 
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 16996b1bc2..ecad427c40 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -5068,6 +5068,48 @@ create_ordered_paths(PlannerInfo *root,
 
 			add_path(ordered_rel, path);
 		}
+
+		/* also consider incremental sorts on all partial paths */
+		{
+			ListCell *lc;
+			foreach (lc, input_rel->partial_pathlist)
+			{
+				Path	   *input_path = (Path *) lfirst(lc);
+				Path	   *sorted_path = input_path;
+				bool		is_sorted;
+				int			presorted_keys;
+
+				/* already handled above */
+				if (input_path == cheapest_partial_path)
+					continue;
+
+				is_sorted = pathkeys_common_contained_in(root->sort_pathkeys,
+														 input_path->pathkeys, &presorted_keys);
+
+				/* also ignore already sorted paths */
+				if (is_sorted)
+					continue;
+
+				if (presorted_keys > 0)
+				{
+					/* Also consider incremental sort. */
+					sorted_path = (Path *) create_incremental_sort_path(root,
+																		ordered_rel,
+																		input_path,
+																		root->sort_pathkeys,
+																		presorted_keys,
+																		limit_tuples);
+
+					/* Add projection step if needed */
+					if (sorted_path->pathtarget != target)
+						sorted_path = apply_projection_to_path(root, ordered_rel,
+															   sorted_path, target);
+
+					add_path(ordered_rel, sorted_path);
+				}
+			}
+
+		}
 	}
 
 	/*
@@ -6484,6 +6526,80 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 			}
 		}
 
+
+		/*
+		 * Use any available suitably-sorted path as input, with incremental
+		 * sort path.
+		 */
+		foreach(lc, input_rel->pathlist)
+		{
+			Path	   *path = (Path *) lfirst(lc);
+			bool		is_sorted;
+			int			presorted_keys;
+
+			is_sorted = pathkeys_common_contained_in(root->group_pathkeys,
+													 path->pathkeys,
+													 &presorted_keys);
+
+			if (is_sorted)
+				continue;
+
+			if (presorted_keys == 0)
+				continue;
+
+			path = (Path *) create_incremental_sort_path(root,
+														 grouped_rel,
+														 path,
+														 root->group_pathkeys,
+														 presorted_keys,
+														 -1.0);
+
+			/* Now decide what to stick atop it */
+			if (parse->groupingSets)
+			{
+				consider_groupingsets_paths(root, grouped_rel,
+											path, true, can_hash,
+											gd, agg_costs, dNumGroups);
+			}
+			else if (parse->hasAggs)
+			{
+				/*
+				 * We have aggregation, possibly with plain GROUP BY. Make
+				 * an AggPath.
+				 */
+				add_path(grouped_rel, (Path *)
+						 create_agg_path(root,
+										 grouped_rel,
+										 path,
+										 grouped_rel->reltarget,
+										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+										 AGGSPLIT_SIMPLE,
+										 parse->groupClause,
+										 havingQual,
+										 agg_costs,
+										 dNumGroups));
+			}
+			else if (parse->groupClause)
+			{
+				/*
+				 * We have GROUP BY without aggregation or grouping sets.
+				 * Make a GroupPath.
+				 */
+				add_path(grouped_rel, (Path *)
+						 create_group_path(root,
+										   grouped_rel,
+										   path,
+										   parse->groupClause,
+										   havingQual,
+										   dNumGroups));
+			}
+			else
+			{
+				/* Other cases should have been handled above */
+				Assert(false);
+			}
+		}
+
 		/*
 		 * Instead of operating directly on the input relation, we can
 		 * consider finalizing a partially aggregated path.
@@ -6530,6 +6646,53 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											   havingQual,
 											   dNumGroups));
 			}
+
+			/* incremental sort */
+			foreach(lc, partially_grouped_rel->pathlist)
+			{
+				Path	   *path = (Path *) lfirst(lc);
+				bool		is_sorted;
+				int			presorted_keys;
+
+				is_sorted = pathkeys_common_contained_in(root->group_pathkeys,
+														 path->pathkeys,
+														 &presorted_keys);
+
+				if (is_sorted)
+					continue;
+
+				if (presorted_keys == 0)
+					continue;
+
+				path = (Path *) create_incremental_sort_path(root,
+															 grouped_rel,
+															 path,
+															 root->group_pathkeys,
+															 presorted_keys,
+															 -1.0);
+
+				if (parse->hasAggs)
+					add_path(grouped_rel, (Path *)
+							 create_agg_path(root,
+											 grouped_rel,
+											 path,
+											 grouped_rel->reltarget,
+											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											 AGGSPLIT_FINAL_DESERIAL,
+											 parse->groupClause,
+											 havingQual,
+											 agg_final_costs,
+											 dNumGroups));
+				else
+					add_path(grouped_rel, (Path *)
+							 create_group_path(root,
+											   grouped_rel,
+											   path,
+											   parse->groupClause,
+											   havingQual,
+											   dNumGroups));
+			}
+
 		}
 	}
 
@@ -6798,6 +6961,57 @@ create_partial_grouping_paths(PlannerInfo *root,
 											   dNumPartialGroups));
 			}
 		}
+
+		/*
+		 * Use any available suitably-sorted path as input, and also consider
+		 * sorting the cheapest partial path.
+		 */
+		foreach(lc, input_rel->pathlist)
+		{
+			Path	   *path = (Path *) lfirst(lc);
+			bool		is_sorted;
+			int			presorted_keys;
+
+			is_sorted = pathkeys_common_contained_in(root->group_pathkeys,
+													 path->pathkeys,
+													 &presorted_keys);
+
+			/* also ignore already sorted paths */
+			if (is_sorted)
+				continue;
+
+			if (presorted_keys == 0)
+				continue;
+
+			/* add incremental sort */
+			path = (Path *) create_incremental_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 root->group_pathkeys,
+														 presorted_keys,
+														 -1.0);
+
+			if (parse->hasAggs)
+				add_path(partially_grouped_rel, (Path *)
+						 create_agg_path(root,
+										 partially_grouped_rel,
+										 path,
+										 partially_grouped_rel->reltarget,
+										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+										 AGGSPLIT_INITIAL_SERIAL,
+										 parse->groupClause,
+										 NIL,
+										 agg_partial_costs,
+										 dNumPartialGroups));
+			else
+				add_path(partially_grouped_rel, (Path *)
+						 create_group_path(root,
+										   partially_grouped_rel,
+										   path,
+										   parse->groupClause,
+										   NIL,
+										   dNumPartialGroups));
+		}
 	}
 
 	if (can_sort && cheapest_partial_path != NULL)
@@ -6842,6 +7056,52 @@ create_partial_grouping_paths(PlannerInfo *root,
 													   dNumPartialPartialGroups));
 			}
 		}
+
+		/* consider incremental sort */
+		foreach(lc, input_rel->partial_pathlist)
+		{
+			Path	   *path = (Path *) lfirst(lc);
+			bool		is_sorted;
+			int			presorted_keys;
+
+			is_sorted = pathkeys_common_contained_in(root->group_pathkeys,
+													 path->pathkeys,
+													 &presorted_keys);
+
+			if (is_sorted)
+				continue;
+
+			if (presorted_keys == 0)
+				continue;
+
+			path = (Path *) create_incremental_sort_path(root,
+														 partially_grouped_rel,
+														 path,
+														 root->group_pathkeys,
+														 presorted_keys,
+														 -1.0);
+
+			if (parse->hasAggs)
+				add_partial_path(partially_grouped_rel, (Path *)
+								 create_agg_path(root,
+												 partially_grouped_rel,
+												 path,
+												 partially_grouped_rel->reltarget,
+												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+												 AGGSPLIT_INITIAL_SERIAL,
+												 parse->groupClause,
+												 NIL,
+												 agg_partial_costs,
+												 dNumPartialPartialGroups));
+			else
+				add_partial_path(partially_grouped_rel, (Path *)
+								 create_group_path(root,
+												   partially_grouped_rel,
+												   path,
+												   parse->groupClause,
+												   NIL,
+												   dNumPartialPartialGroups));
+		}
 	}
 
 	if (can_hash && cheapest_total_path != NULL)
@@ -6938,6 +7198,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 static void
 gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
 {
+	ListCell   *lc;
 	Path	   *cheapest_partial_path;
 
 	/* Try Gather for unordered paths and Gather Merge for ordered ones. */
@@ -6967,6 +7228,44 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
 
 		add_path(rel, path);
 	}
+
+	/* also consider incremental sort on all partial paths */
+	foreach (lc, rel->partial_pathlist)
+	{
+		Path	   *path = (Path *) lfirst(lc);
+		bool		is_sorted;
+		int			presorted_keys;
+		double		total_groups;
+
+		is_sorted = pathkeys_common_contained_in(root->group_pathkeys,
+												 path->pathkeys,
+												 &presorted_keys);
+
+		if (is_sorted)
+			continue;
+
+		if (presorted_keys == 0)
+			continue;
+
+		path = (Path *) create_incremental_sort_path(root,
+													 rel,
+													 path,
+													 root->group_pathkeys,
+													 presorted_keys,
+													 -1.0);
+
+		path = (Path *)
+			create_gather_merge_path(root,
+									 rel,
+									 path,
+									 rel->reltarget,
+									 root->group_pathkeys,
+									 NULL,
+									 &total_groups);
+
+		add_path(rel, path);
+	}
+
 }
 
 /*
-- 
2.20.1