From d0819177ef1c6f86a588e3d2700ecff638f83b4a Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Wed, 8 Jan 2020 19:23:51 -0600
Subject: [PATCH v4 1/2] Make more clear the computation of min/max IO..

..and specifically the double use and effect of correlation.

Avoid re-use of the "pages_fetched" variable
---
 src/backend/optimizer/path/costsize.c | 47 +++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 22 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index b5a0033..bdc23a0 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -491,12 +491,13 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
 				csquared;
 	double		spc_seq_page_cost,
 				spc_random_page_cost;
-	Cost		min_IO_cost,
+	double		min_pages_fetched,	/* The min and max page count based on index correlation */
+				max_pages_fetched;
+	Cost		min_IO_cost,	/* The min and max cost based on index correlation */
 				max_IO_cost;
 	QualCost	qpqual_cost;
 	Cost		cpu_per_tuple;
 	double		tuples_fetched;
-	double		pages_fetched;
 	double		rand_heap_pages;
 	double		index_pages;
 
@@ -579,7 +580,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
 	 * (just after a CLUSTER, for example), the number of pages fetched should
 	 * be exactly selectivity * table_size.  What's more, all but the first
 	 * will be sequential fetches, not the random fetches that occur in the
-	 * uncorrelated case.  So if the number of pages is more than 1, we
+	 * uncorrelated case (the index is expected to read fewer pages, *and* each
+	 * page read is cheaper).  So if the number of pages is more than 1, we
 	 * ought to charge
 	 *		spc_random_page_cost + (pages_fetched - 1) * spc_seq_page_cost
 	 * For partially-correlated indexes, we ought to charge somewhere between
@@ -604,17 +606,17 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
 		 * pro-rate the costs for one scan.  In this case we assume all the
 		 * fetches are random accesses.
 		 */
-		pages_fetched = index_pages_fetched(tuples_fetched * loop_count,
+		max_pages_fetched = index_pages_fetched(tuples_fetched * loop_count,
 											baserel->pages,
 											(double) index->pages,
 											root);
 
 		if (indexonly)
-			pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
+			max_pages_fetched = ceil(max_pages_fetched * (1.0 - baserel->allvisfrac));
 
-		rand_heap_pages = pages_fetched;
+		rand_heap_pages = max_pages_fetched;
 
-		max_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count;
+		max_IO_cost = (max_pages_fetched * spc_random_page_cost) / loop_count;
 
 		/*
 		 * In the perfectly correlated case, the number of pages touched by
@@ -626,17 +628,17 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
 		 * where such a plan is actually interesting, only one page would get
 		 * fetched per scan anyway, so it shouldn't matter much.)
 		 */
-		pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
+		min_pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
 
-		pages_fetched = index_pages_fetched(pages_fetched * loop_count,
+		min_pages_fetched = index_pages_fetched(min_pages_fetched * loop_count,
 											baserel->pages,
 											(double) index->pages,
 											root);
 
 		if (indexonly)
-			pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
+			min_pages_fetched = ceil(min_pages_fetched * (1.0 - baserel->allvisfrac));
 
-		min_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count;
+		min_IO_cost = (min_pages_fetched * spc_random_page_cost) / loop_count;
 	}
 	else
 	{
@@ -644,30 +646,31 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
 		 * Normal case: apply the Mackert and Lohman formula, and then
 		 * interpolate between that and the correlation-derived result.
 		 */
-		pages_fetched = index_pages_fetched(tuples_fetched,
+
+		/* For the perfectly uncorrelated case (csquared=0) */
+		max_pages_fetched = index_pages_fetched(tuples_fetched,
 											baserel->pages,
 											(double) index->pages,
 											root);
 
 		if (indexonly)
-			pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
+			max_pages_fetched = ceil(max_pages_fetched * (1.0 - baserel->allvisfrac));
 
-		rand_heap_pages = pages_fetched;
+		rand_heap_pages = max_pages_fetched;
 
-		/* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */
-		max_IO_cost = pages_fetched * spc_random_page_cost;
+		max_IO_cost = max_pages_fetched * spc_random_page_cost;
 
-		/* min_IO_cost is for the perfectly correlated case (csquared=1) */
-		pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
+		/* For the perfectly correlated case (csquared=1) */
+		min_pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
 
 		if (indexonly)
-			pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
+			min_pages_fetched = ceil(min_pages_fetched * (1.0 - baserel->allvisfrac));
 
-		if (pages_fetched > 0)
+		if (min_pages_fetched > 0)
 		{
 			min_IO_cost = spc_random_page_cost;
-			if (pages_fetched > 1)
-				min_IO_cost += (pages_fetched - 1) * spc_seq_page_cost;
+			if (min_pages_fetched > 1)
+				min_IO_cost += (min_pages_fetched - 1) * spc_seq_page_cost;
 		}
 		else
 			min_IO_cost = 0;
-- 
2.7.4