From fbc5e6709550f485a2153dda97ef805700717f23 Mon Sep 17 00:00:00 2001
From: Andrey Lepikhov <a.lepikhov@postgrespro.ru>
Date: Fri, 24 Dec 2021 13:01:48 +0500
Subject: [PATCH] My fixes.

---
 src/backend/optimizer/path/costsize.c | 33 ++++++++++++---------------
 src/backend/optimizer/path/pathkeys.c |  9 ++++----
 src/backend/optimizer/plan/planner.c  |  8 -------
 3 files changed, 20 insertions(+), 30 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index afb8ba54ea..70af9c91d5 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1845,19 +1845,17 @@ get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
  * groups in the current pathkey prefix and the new pathkey), and the comparison
  * costs (which is data type specific).
  *
- * Estimation of the number of comparisons is based on ideas from two sources:
+ * Estimation of the number of comparisons is based on ideas from:
  *
- * 1) "Algorithms" (course), Robert Sedgewick, Kevin Wayne [https://algs4.cs.princeton.edu/home/]
+ * "Quicksort Is Optimal", Robert Sedgewick, Jon Bentley, 2002
+ * [https://www.cs.princeton.edu/~rs/talks/QuicksortIsOptimal.pdf]
  *
- * 2) "Quicksort Is Optimal For Many Equal Keys" (paper), Sebastian Wild,
- * arXiv:1608.04906v4 [cs.DS] 1 Nov 2017. [https://arxiv.org/abs/1608.04906]
- *
- * In term of that paper, let N - number of tuples, Xi - number of tuples with
- * key Ki, then the estimate of number of comparisons is:
+ * In term of that paper, let N - number of tuples, Xi - number of identical
+ * tuples with value Ki, then the estimate of number of comparisons is:
  *
  *	log(N! / (X1! * X2! * ..))  ~  sum(Xi * log(N/Xi))
  *
- * In our case all Xi are the same because now we don't have any estimation of
+ * We assume all Xi the same because now we don't have any estimation of
  * group sizes, we have only know the estimate of number of groups (distinct
  * values). In that case, formula becomes:
  *
@@ -1865,7 +1863,7 @@ get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
  *
  * For multi-column sorts we need to estimate the number of comparisons for
  * each individual column - for example with columns (c1, c2, ..., ck) we
- * can estimate that number of comparions on ck is roughly
+ * can estimate that number of comparisons on ck is roughly
  *
  *	ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1))
  *
@@ -1874,10 +1872,10 @@ get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
  *
  *	N * sum( Fk * log(Gk) )
  *
- * Note: We also consider column witdth, not just the comparator cost.
+ * Note: We also consider column width, not just the comparator cost.
  *
  * NOTE: some callers currently pass NIL for pathkeys because they
- * can't conveniently supply the sort keys.  In this case, it will fallback to
+ * can't conveniently supply the sort keys. In this case, it will fallback to
  * simple comparison cost estimate.
  */
 static Cost
@@ -1925,13 +1923,13 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 	 */
 	foreach(lc, pathkeys)
 	{
-		PathKey				*pathkey = (PathKey*)lfirst(lc);
+		PathKey				*pathkey = (PathKey*) lfirst(lc);
 		EquivalenceMember	*em;
 		double				 nGroups,
 							 correctedNGroups;
 
 		/*
-		 * We believe than equivalence members aren't very  different, so, to
+		 * We believe that equivalence members aren't very different, so, to
 		 * estimate cost we take just first member
 		 */
 		em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
@@ -1964,7 +1962,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 
 		totalFuncCost += funcCost;
 
-		/* remeber if we have a fake var in pathkeys */
+		/* Remember if we have a fake var in pathkeys */
 		has_fake_var |= is_fake_var(em->em_expr);
 		pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
 
@@ -1974,7 +1972,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 		 */
 		if (has_fake_var == false)
 			/*
-			 * Recursively compute number of group in group from previous step
+			 * Recursively compute number of groups in a group from previous step
 			 */
 			nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
 													  tuplesPerPrevGroup, NULL, NULL,
@@ -1992,8 +1990,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 			 *
 			 * XXX What's the logic of the following formula?
 			 */
-			nGroups = ceil(2.0 + sqrt(tuples) *
-				list_length(pathkeyExprs) / list_length(pathkeys));
+			nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys));
 		else
 			nGroups = tuples;
 
@@ -2033,7 +2030,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 
 		/*
 		 * We could skip all following columns for cost estimation, because we
-		 * believe that tuples are unique by set ot previous columns
+		 * believe that tuples are unique by the set of previous columns
 		 */
 		if (tuplesPerPrevGroup <= 1.0)
 			break;
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 457018c2a4..f1919823da 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -544,9 +544,10 @@ PathkeyMutatorNext(PathkeyMutatorState *state)
 	return state->elemsList;
 }
 
-typedef struct {
-	Cost cost;
-	PathKey *pathkey;
+typedef struct PathkeySortCost
+{
+	Cost		cost;
+	PathKey	   *pathkey;
 } PathkeySortCost;
 
 static int
@@ -793,7 +794,7 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
 	 * more complex logic to decide the ordering.
 	 *
 	 * XXX Isn't this somewhat redundant with presorted_keys? Actually, it's
-	 * more a complement, because it allows benefinting from incremental sort
+	 * more a complement, because it allows benefiting from incremental sort
 	 * as much as possible.
 	 *
 	 * XXX This does nothing if (n_preordered == 0). We shouldn't create the
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index fac5822e5b..a7e36ed76e 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6158,7 +6158,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 		{
 			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			Path	   *path_save = path;
 			Path	   *path_original = path;
 
 			List	   *pathkey_orderings = NIL;
@@ -6182,9 +6181,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 				int			presorted_keys = 0;
 				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-				/* restore the path (we replace it in the loop) */
-				path = path_save;
-
 				is_sorted = pathkeys_count_contained_in(info->pathkeys,
 														path->pathkeys,
 														&presorted_keys);
@@ -6653,7 +6649,6 @@ create_partial_grouping_paths(PlannerInfo *root,
 		{
 			ListCell   *lc2;
 			Path	   *path = (Path *) lfirst(lc);
-			Path	   *path_save = path;
 
 			List	   *pathkey_orderings = NIL;
 
@@ -6676,9 +6671,6 @@ create_partial_grouping_paths(PlannerInfo *root,
 				int			presorted_keys = 0;
 				PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
 
-				/* restore the path (we replace it in the loop) */
-				path = path_save;
-
 				is_sorted = pathkeys_count_contained_in(info->pathkeys,
 														path->pathkeys,
 														&presorted_keys);
-- 
2.25.1