Re: On disable_cost

From: Jim Finnerty <jfinnert(at)amazon(dot)com>
To: pgsql-hackers(at)postgresql(dot)org
Subject: Re: On disable_cost
Date: 2019-12-10 22:50:29
Message-ID: 1576018229539-0.post@n3.nabble.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers

As a proof of concept, I hacked around a bit today to re-purpose one of the
bits of the Cost structure to mean "is_disabled" so that we can distinguish
'disabled' from 'non-disabled' paths without making the Cost structure any
bigger. In fact, it's still a valid double. The obvious choice would have
been to re-purpose the sign bit, but I've had occasion to exploit negative
costs before so for this POC I used the high-order bit of the fractional
bits of the double. (see Wikipedia for double precision floating point for
the layout).

The idea is to set a special bit when disable_cost is added to a cost.
Dedicating multiple bits instead of just 1 would be easily done, but as it
is we can accumulate many disable_costs without overflowing, so just
comparing the cost suffices.

The patch is not fully debugged and fails on a couple of tests in the serial
test suite. It seems to fail on Cartesian products, and maybe in one other
non-CP case. I wasn't able to debug it before the day came to an end.

In one place the core code subtracts off the disable_cost. I left the
"disabled" bit set in this case, which might be wrong.

I don't see an option to attach the patch as an attachment, so here is the
patch inline (it is based on PG11). The more interesting part is in a small
number of lines in costsize.c. Other changes just add functions that assign
a disable_cost and set the bit, or that compare costs such that a
non-disabled cost always compares less than a disabled cost.

------------------

diff --git a/src/backend/optimizer/path/costsize.c
b/src/backend/optimizer/path/costsize.c
index 4e86458672..3718639330 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -123,6 +123,8 @@ double parallel_setup_cost =
DEFAULT_PARALLEL_SETUP_COST;
int effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;

Cost disable_cost = 1.0e10;
+uint64 disabled_mask = 0x8000000000000;
+#define IS_DISABLED(cost) (((uint64) cost) & disabled_mask)

int max_parallel_workers_per_gather = 2;

@@ -205,6 +207,53 @@ clamp_row_est(double nrows)
return nrows;
}

+Cost
+add_cost(Cost cost, Cost delta_cost)
+{
+ uint64 mask = (delta_cost == disable_cost) ? disabled_mask : 0;
+ Cost max_cost = disabled_mask - disable_cost;
+
+ if (cost + delta_cost < max_cost)
+ return ((Cost) ((uint64)(cost + delta_cost) | mask));
+ else
+ return ((Cost) ((uint64)(max_cost) | mask));
+}
+
+bool
+is_lower_cost(Cost cost1, Cost cost2)
+{
+ if ((uint64)cost1 & disabled_mask && !((uint64)cost2 & disabled_mask))
+ return false;
+
+ if (!((uint64)cost1 & disabled_mask) && (uint64)cost2 & disabled_mask)
+ return true;
+
+ return (cost1 < cost2);
+}
+
+bool
+is_greater_cost(Cost cost1, Cost cost2)
+{
+ if ((uint64)cost1 & disabled_mask && !((uint64)cost2 & disabled_mask))
+ return true;
+
+ if (!((uint64)cost1 & disabled_mask) && (uint64)cost2 & disabled_mask)
+ return false;
+
+ return (cost1 > cost2);
+}
+
+bool
+is_geq_cost(Cost cost1, Cost cost2)
+{
+ if ((uint64)cost1 & disabled_mask && !((uint64)cost2 & disabled_mask))
+ return true;
+
+ if (!((uint64)cost1 & disabled_mask) && (uint64)cost2 & disabled_mask)
+ return false;
+
+ return (cost1 >= cost2);
+}

/*
* cost_seqscan
@@ -235,7 +284,7 @@ cost_seqscan(Path *path, PlannerInfo *root,
path->rows = baserel->rows;

if (!enable_seqscan)
- startup_cost += disable_cost;
+ startup_cost = add_cost(startup_cost, disable_cost);

/* fetch estimated page cost for tablespace containing table */
get_tablespace_page_costs(baserel->reltablespace,
@@ -424,7 +473,7 @@ cost_gather_merge(GatherMergePath *path, PlannerInfo
*root,
path->path.rows = rel->rows;

if (!enable_gathermerge)
- startup_cost += disable_cost;
+ startup_cost = add_cost(startup_cost, disable_cost);

/*
* Add one to the number of workers to account for the leader. This might
@@ -538,7 +587,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double
loop_count,
}

if (!enable_indexscan)
- startup_cost += disable_cost;
+ startup_cost = add_cost(startup_cost, disable_cost);
/* we don't need to check enable_indexonlyscan; indxpath.c does that */

/*
@@ -976,7 +1025,7 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root,
RelOptInfo *baserel,
path->rows = baserel->rows;

if (!enable_bitmapscan)
- startup_cost += disable_cost;
+ startup_cost = add_cost(startup_cost, disable_cost);

pages_fetched = compute_bitmap_pages(root, baserel, bitmapqual,
loop_count, &indexTotalCost,
@@ -1242,10 +1291,10 @@ cost_tidscan(Path *path, PlannerInfo *root,
if (isCurrentOf)
{
Assert(baserel->baserestrictcost.startup >= disable_cost);
- startup_cost -= disable_cost;
+ startup_cost -= disable_cost; /* but do not un-set the disabled mark */
}
else if (!enable_tidscan)
- startup_cost += disable_cost;
+ startup_cost = add_cost(startup_cost, disable_cost);

/*
* The TID qual expressions will be computed once, any other baserestrict
@@ -1676,7 +1725,7 @@ cost_sort(Path *path, PlannerInfo *root,
long sort_mem_bytes = sort_mem * 1024L;

if (!enable_sort)
- startup_cost += disable_cost;
+ startup_cost = add_cost(startup_cost, disable_cost);

path->rows = tuples;

@@ -2121,8 +2170,8 @@ cost_agg(Path *path, PlannerInfo *root,
total_cost = input_total_cost;
if (aggstrategy == AGG_MIXED && !enable_hashagg)
{
- startup_cost += disable_cost;
- total_cost += disable_cost;
+ startup_cost = add_cost(startup_cost, disable_cost);
+ total_cost = add_cost(total_cost, disable_cost);
}
/* calcs phrased this way to match HASHED case, see note above */
total_cost += aggcosts->transCost.startup;
@@ -2137,7 +2186,7 @@ cost_agg(Path *path, PlannerInfo *root,
/* must be AGG_HASHED */
startup_cost = input_total_cost;
if (!enable_hashagg)
- startup_cost += disable_cost;
+ startup_cost = add_cost(startup_cost, disable_cost);
startup_cost += aggcosts->transCost.startup;
startup_cost += aggcosts->transCost.per_tuple * input_tuples;
startup_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
@@ -2436,7 +2485,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path,
* disabled, which doesn't seem like the way to bet.
*/
if (!enable_nestloop)
- startup_cost += disable_cost;
+ startup_cost = add_cost(startup_cost, disable_cost);

/* cost of inner-relation source data (we already dealt with outer rel) */

@@ -2882,7 +2931,7 @@ final_cost_mergejoin(PlannerInfo *root, MergePath
*path,
* disabled, which doesn't seem like the way to bet.
*/
if (!enable_mergejoin)
- startup_cost += disable_cost;
+ startup_cost = add_cost(startup_cost, disable_cost);

/*
* Compute cost of the mergequals and qpquals (other restriction clauses)
@@ -3312,7 +3361,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
* disabled, which doesn't seem like the way to bet.
*/
if (!enable_hashjoin)
- startup_cost += disable_cost;
+ startup_cost = add_cost(startup_cost, disable_cost);

/* mark the path with estimated # of batches */
path->num_batches = numbatches;
@@ -3410,7 +3459,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
if (relation_byte_size(clamp_row_est(inner_path_rows * innermcvfreq),
inner_path->pathtarget->width) >
(work_mem * 1024L))
- startup_cost += disable_cost;
+ startup_cost = add_cost(startup_cost, disable_cost);

/*
* Compute cost of the hashquals and qpquals (other restriction clauses)
@@ -3930,7 +3979,7 @@ cost_qual_eval_walker(Node *node,
cost_qual_eval_context *context)
else if (IsA(node, CurrentOfExpr))
{
/* Report high cost to prevent selection of anything but TID scan */
- context->total.startup += disable_cost;
+ context->total.startup = add_cost(context->total.startup, disable_cost);
}
else if (IsA(node, SubLink))
{
diff --git a/src/backend/optimizer/util/pathnode.c
b/src/backend/optimizer/util/pathnode.c
index 4736d84a83..fd746a06bc 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -72,33 +72,33 @@ compare_path_costs(Path *path1, Path *path2,
CostSelector criterion)
{
if (criterion == STARTUP_COST)
{
- if (path1->startup_cost < path2->startup_cost)
+ if (is_lower_cost(path1->startup_cost, path2->startup_cost))
return -1;
- if (path1->startup_cost > path2->startup_cost)
+ if (is_greater_cost(path1->startup_cost, path2->startup_cost))
return +1;

/*
* If paths have the same startup cost (not at all unlikely), order
* them by total cost.
*/
- if (path1->total_cost < path2->total_cost)
+ if (is_lower_cost(path1->total_cost, path2->total_cost))
return -1;
- if (path1->total_cost > path2->total_cost)
+ if (is_greater_cost(path1->total_cost, path2->total_cost))
return +1;
}
else
{
- if (path1->total_cost < path2->total_cost)
+ if (is_lower_cost(path1->total_cost, path2->total_cost))
return -1;
- if (path1->total_cost > path2->total_cost)
+ if (is_greater_cost(path1->total_cost, path2->total_cost))
return +1;

/*
* If paths have the same total cost, order them by startup cost.
*/
- if (path1->startup_cost < path2->startup_cost)
+ if (is_lower_cost(path1->startup_cost, path2->startup_cost))
return -1;
- if (path1->startup_cost > path2->startup_cost)
+ if (is_greater_cost(path1->startup_cost, path2->startup_cost))
return +1;
}
return 0;
@@ -126,9 +126,9 @@ compare_fractional_path_costs(Path *path1, Path *path2,
fraction * (path1->total_cost - path1->startup_cost);
cost2 = path2->startup_cost +
fraction * (path2->total_cost - path2->startup_cost);
- if (cost1 < cost2)
+ if (is_lower_cost(cost1, cost2))
return -1;
- if (cost1 > cost2)
+ if (is_greater_cost(cost1, cost2))
return +1;
return 0;
}
@@ -172,11 +172,11 @@ compare_path_costs_fuzzily(Path *path1, Path *path2,
double fuzz_factor)
* Check total cost first since it's more likely to be different; many
* paths have zero startup cost.
*/
- if (path1->total_cost > path2->total_cost * fuzz_factor)
+ if (is_greater_cost(path1->total_cost, path2->total_cost * fuzz_factor))
{
/* path1 fuzzily worse on total cost */
if (CONSIDER_PATH_STARTUP_COST(path1) &&
- path2->startup_cost > path1->startup_cost * fuzz_factor)
+ is_greater_cost(path2->startup_cost, path1->startup_cost * fuzz_factor))
{
/* ... but path2 fuzzily worse on startup, so DIFFERENT */
return COSTS_DIFFERENT;
@@ -184,11 +184,11 @@ compare_path_costs_fuzzily(Path *path1, Path *path2,
double fuzz_factor)
/* else path2 dominates */
return COSTS_BETTER2;
}
- if (path2->total_cost > path1->total_cost * fuzz_factor)
+ if (is_greater_cost(path2->total_cost, path1->total_cost * fuzz_factor))
{
/* path2 fuzzily worse on total cost */
if (CONSIDER_PATH_STARTUP_COST(path2) &&
- path1->startup_cost > path2->startup_cost * fuzz_factor)
+ is_greater_cost(path1->startup_cost, path2->startup_cost * fuzz_factor))
{
/* ... but path1 fuzzily worse on startup, so DIFFERENT */
return COSTS_DIFFERENT;
@@ -197,12 +197,12 @@ compare_path_costs_fuzzily(Path *path1, Path *path2,
double fuzz_factor)
return COSTS_BETTER1;
}
/* fuzzily the same on total cost ... */
- if (path1->startup_cost > path2->startup_cost * fuzz_factor)
+ if (is_greater_cost(path1->startup_cost, path2->startup_cost *
fuzz_factor))
{
/* ... but path1 fuzzily worse on startup, so path2 wins */
return COSTS_BETTER2;
}
- if (path2->startup_cost > path1->startup_cost * fuzz_factor)
+ if (is_greater_cost(path2->startup_cost, path1->startup_cost *
fuzz_factor))
{
/* ... but path2 fuzzily worse on startup, so path1 wins */
return COSTS_BETTER1;
@@ -605,7 +605,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
else
{
/* new belongs after this old path if it has cost >= old's */
- if (new_path->total_cost >= old_path->total_cost)
+ if (is_geq_cost(new_path->total_cost, old_path->total_cost))
insert_after = p1;
/* p1_prev advances */
p1_prev = p1;
@@ -681,7 +681,7 @@ add_path_precheck(RelOptInfo *parent_rel,
*
* Cost comparisons here should match compare_path_costs_fuzzily.
*/
- if (total_cost > old_path->total_cost * STD_FUZZ_FACTOR)
+ if (is_greater_cost(total_cost, old_path->total_cost * STD_FUZZ_FACTOR))
{
/* new path can win on startup cost only if consider_startup */
if (startup_cost > old_path->startup_cost * STD_FUZZ_FACTOR ||
@@ -796,14 +796,14 @@ add_partial_path(RelOptInfo *parent_rel, Path
*new_path)
/* Unless pathkeys are incompable, keep just one of the two paths. */
if (keyscmp != PATHKEYS_DIFFERENT)
{
- if (new_path->total_cost > old_path->total_cost * STD_FUZZ_FACTOR)
+ if (is_greater_cost(new_path->total_cost, old_path->total_cost *
STD_FUZZ_FACTOR))
{
/* New path costs more; keep it only if pathkeys are better. */
if (keyscmp != PATHKEYS_BETTER1)
accept_new = false;
}
- else if (old_path->total_cost > new_path->total_cost
- * STD_FUZZ_FACTOR)
+ else if (is_greater_cost(old_path->total_cost, new_path->total_cost
+ * STD_FUZZ_FACTOR))
{
/* Old path costs more; keep it only if pathkeys are better. */
if (keyscmp != PATHKEYS_BETTER2)
@@ -819,7 +819,7 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
/* Costs are about the same, old path has better pathkeys. */
accept_new = false;
}
- else if (old_path->total_cost > new_path->total_cost * 1.0000000001)
+ else if (is_greater_cost(old_path->total_cost, new_path->total_cost *
1.0000000001))
{
/* Pathkeys are the same, and the old path costs more. */
remove_old = true;
@@ -847,7 +847,7 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
else
{
/* new belongs after this old path if it has cost >= old's */
- if (new_path->total_cost >= old_path->total_cost)
+ if (is_geq_cost(new_path->total_cost, old_path->total_cost))
insert_after = p1;
/* p1_prev advances */
p1_prev = p1;
@@ -913,10 +913,10 @@ add_partial_path_precheck(RelOptInfo *parent_rel, Cost
total_cost,
keyscmp = compare_pathkeys(pathkeys, old_path->pathkeys);
if (keyscmp != PATHKEYS_DIFFERENT)
{
- if (total_cost > old_path->total_cost * STD_FUZZ_FACTOR &&
+ if (is_greater_cost(total_cost, old_path->total_cost * STD_FUZZ_FACTOR)
&&
keyscmp != PATHKEYS_BETTER1)
return false;
- if (old_path->total_cost > total_cost * STD_FUZZ_FACTOR &&
+ if (is_greater_cost(old_path->total_cost, total_cost * STD_FUZZ_FACTOR)
&&
keyscmp != PATHKEYS_BETTER2)
return true;
}
@@ -1697,7 +1697,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel,
Path *subpath,

if (sjinfo->semi_can_btree && sjinfo->semi_can_hash)
{
- if (agg_path.total_cost < sort_path.total_cost)
+ if (is_lower_cost(agg_path.total_cost, sort_path.total_cost))
pathnode->umethod = UNIQUE_PATH_HASH;
else
pathnode->umethod = UNIQUE_PATH_SORT;
diff --git a/src/backend/utils/cache/relcache.c
b/src/backend/utils/cache/relcache.c
index 78f3b99a76..c261a9d790 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5076,8 +5076,8 @@ IsProjectionFunctionalIndex(Relation index)
* when values differ because the expression is recalculated when
* inserting a new index entry for the changed value.
*/
- if ((index_expr_cost.startup + index_expr_cost.per_tuple) >
- HEURISTIC_MAX_HOT_RECHECK_EXPR_COST)
+ if (is_greater_cost((index_expr_cost.startup +
index_expr_cost.per_tuple),
+ HEURISTIC_MAX_HOT_RECHECK_EXPR_COST))
is_projection = false;

tuple = SearchSysCache1(RELOID,
ObjectIdGetDatum(RelationGetRelid(index)));
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 9159f2bab1..c01d08eae5 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -251,6 +251,12 @@ extern PathTarget
*set_pathtarget_cost_width(PlannerInfo *root, PathTarget *targ
extern double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel,
Path *bitmapqual, int loop_count, Cost *cost, double *tuple);

+extern Cost add_cost(Cost cost, Cost delta_cost);
+extern bool is_lower_cost(Cost cost1, Cost cost2);
+extern bool is_greater_cost(Cost cost1, Cost cost2);
+extern bool is_geq_cost(Cost cost1, Cost cost2);
+
+
/*
* prototypes for clausesel.c
* routines to compute clause selectivities

-----
Jim Finnerty, AWS, Amazon Aurora PostgreSQL
--
Sent from: https://www.postgresql-archive.org/PostgreSQL-hackers-f1928748.html

In response to

Responses

Browse pgsql-hackers by date

  From Date Subject
Next Message Thomas Munro 2019-12-11 00:54:47 Re: Windows UTF-8, non-ICU collation trouble
Previous Message Tom Lane 2019-12-10 22:46:48 Re: pg_control_init() bug