From cfbfb8187f4e8303fe3358b5c909533ee6629efe Mon Sep 17 00:00:00 2001 From: "dgrowley@gmail.com" Date: Thu, 2 Jul 2020 16:06:36 +1200 Subject: [PATCH v12 1/5] Allow estimate_num_groups() to pass back further details about the estimation Here we add a new output parameter to estimate_num_groups() to allow it to set a flags variable with some bits to allow it to pass back additional details to the caller which may be useful for decision making. For now, the only new flag is one which indicates if the estimation fell back on using the hard-coded constants in any part of the estimation. Callers may like to change their behavior if this is set, and this gives them the ability to do so. Callers may pass the flag pointer as NULL if they have no interest in any of the flags. We're not adding any actual usages of these flags here. Some follow-up commits will make use of this feature. --- contrib/postgres_fdw/postgres_fdw.c | 2 +- src/backend/optimizer/path/costsize.c | 3 ++- src/backend/optimizer/path/indxpath.c | 1 + src/backend/optimizer/plan/planner.c | 10 ++++++---- src/backend/optimizer/prep/prepunion.c | 1 + src/backend/optimizer/util/pathnode.c | 1 + src/backend/utils/adt/selfuncs.c | 22 +++++++++++++++++++++- src/include/utils/selfuncs.h | 17 ++++++++++++++++- 8 files changed, 49 insertions(+), 8 deletions(-) diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index b6c72e1d1e..796ece6b3b 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -2953,7 +2953,7 @@ estimate_path_cost_size(PlannerInfo *root, numGroups = estimate_num_groups(root, get_sortgrouplist_exprs(root->parse->groupClause, fpinfo->grouped_tlist), - input_rows, NULL); + input_rows, NULL, NULL); /* * Get the retrieved_rows and rows estimates. If there are HAVING diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 22d6935824..d2bf9912e9 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1874,7 +1874,8 @@ cost_incremental_sort(Path *path, /* Estimate number of groups with equal presorted keys. */ if (!unknown_varno) - input_groups = estimate_num_groups(root, presortedExprs, input_tuples, NULL); + input_groups = estimate_num_groups(root, presortedExprs, input_tuples, + NULL, NULL); group_tuples = input_tuples / input_groups; group_input_run_cost = input_run_cost / input_groups; diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index bcb1bc6097..4f6ab5d635 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -1986,6 +1986,7 @@ adjust_rowcount_for_semijoins(PlannerInfo *root, nunique = estimate_num_groups(root, sjinfo->semi_rhs_exprs, nraw, + NULL, NULL); if (rowcount > nunique) rowcount = nunique; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 1a94b58f8b..ea7b0dd601 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3702,7 +3702,8 @@ get_number_of_groups(PlannerInfo *root, double numGroups = estimate_num_groups(root, groupExprs, path_rows, - &gset); + &gset, + NULL); gs->numGroups = numGroups; rollup->numGroups += numGroups; @@ -3727,7 +3728,8 @@ get_number_of_groups(PlannerInfo *root, double numGroups = estimate_num_groups(root, groupExprs, path_rows, - &gset); + &gset, + NULL); gs->numGroups = numGroups; gd->dNumHashGroups += numGroups; @@ -3743,7 +3745,7 @@ get_number_of_groups(PlannerInfo *root, target_list); dNumGroups = estimate_num_groups(root, groupExprs, path_rows, - NULL); + NULL, NULL); } } else if (parse->groupingSets) @@ -4793,7 +4795,7 @@ create_distinct_paths(PlannerInfo *root, parse->targetList); numDistinctRows = estimate_num_groups(root, distinctExprs, cheapest_input_path->rows, - NULL); + NULL, NULL); } /* diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 745f443e5c..f33033bc27 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -338,6 +338,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, *pNumGroups = estimate_num_groups(subroot, get_tlist_exprs(subquery->targetList, false), subpath->rows, + NULL, NULL); } } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 51478957fb..e1aaeecc8a 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1688,6 +1688,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, pathnode->path.rows = estimate_num_groups(root, sjinfo->semi_rhs_exprs, rel->rows, + NULL, NULL); numCols = list_length(sjinfo->semi_rhs_exprs); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 80bd60f876..910515ffb2 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -3241,6 +3241,7 @@ typedef struct Node *var; /* might be an expression, not just a Var */ RelOptInfo *rel; /* relation it belongs to */ double ndistinct; /* # distinct values */ + bool isdefault; /* true if DEFAULT_NUM_DISTINCT was used */ } GroupVarInfo; static List * @@ -3287,6 +3288,7 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, varinfo->var = var; varinfo->rel = vardata->rel; varinfo->ndistinct = ndistinct; + varinfo->isdefault = isdefault; varinfos = lappend(varinfos, varinfo); return varinfos; } @@ -3311,6 +3313,12 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, * pgset - NULL, or a List** pointing to a grouping set to filter the * groupExprs against * + * Outputs: + * estinfo - When passed as non-NULL, the function will set bits in the + * "flags" field in order to provide callers with additional information + * about the estimation. Currently, we only set the SELFLAG_USED_DEFAULT + * bit if we used any default values in the estimation. + * * Given the lack of any cross-correlation statistics in the system, it's * impossible to do anything really trustworthy with GROUP BY conditions * involving multiple Vars. We should however avoid assuming the worst @@ -3358,7 +3366,7 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, */ double estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, - List **pgset) + List **pgset, EstimationInfo *estinfo) { List *varinfos = NIL; double srf_multiplier = 1.0; @@ -3366,6 +3374,10 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, ListCell *l; int i; + /* Zero the estinfo output parameter, if non-NULL */ + if (estinfo != NULL) + memset(estinfo, 0, sizeof(EstimationInfo)); + /* * We don't ever want to return an estimate of zero groups, as that tends * to lead to division-by-zero and other unpleasantness. The input_rows @@ -3569,6 +3581,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, if (relmaxndistinct < varinfo2->ndistinct) relmaxndistinct = varinfo2->ndistinct; relvarcount++; + + /* + * When varinfo2's isdefault is set then we'd better set + * the SELFLAG_USED_DEFAULT bit in the EstimationInfo. + */ + if (estinfo != NULL && varinfo2->isdefault) + estinfo->flags |= SELFLAG_USED_DEFAULT; + } /* we're done with this relation */ diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index 3a2cfb7efa..a50e9ad5f4 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -65,6 +65,20 @@ p = 1.0; \ } while (0) +/* + * A set of flags which some selectivity estimation functions can pass back to + * callers to provide further details about some assumptions which were made + * during the estimation. + */ +#define SELFLAG_USED_DEFAULT (1 << 0) /* Estimation fell back on one + * of the DEFAULTs as defined + * above. */ + +typedef struct EstimationInfo +{ + int flags; /* Flags, as defined above to mark special + * properties of the estimation. */ +} EstimationInfo; /* Return data from examine_variable and friends */ typedef struct VariableStatData @@ -194,7 +208,8 @@ extern void mergejoinscansel(PlannerInfo *root, Node *clause, Selectivity *rightstart, Selectivity *rightend); extern double estimate_num_groups(PlannerInfo *root, List *groupExprs, - double input_rows, List **pgset); + double input_rows, List **pgset, + EstimationInfo *estinfo); extern void estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets, -- 2.27.0