From e8714d7edbfbafd3203623680e290d00ec3f1f8c Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Mon, 2 Dec 2019 23:02:17 +0100 Subject: [PATCH 1/3] Support using extended stats for parts of OR clauses --- src/backend/optimizer/path/clausesel.c | 88 +++++++++++++++---- src/backend/statistics/extended_stats.c | 56 +++++++++--- src/backend/statistics/mcv.c | 5 +- .../statistics/extended_stats_internal.h | 3 +- src/include/statistics/statistics.h | 3 +- 5 files changed, 120 insertions(+), 35 deletions(-) diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index a3ebe10592..8ff756bb31 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -92,7 +92,7 @@ clauselist_selectivity(PlannerInfo *root, */ s1 *= statext_clauselist_selectivity(root, clauses, varRelid, jointype, sjinfo, rel, - &estimatedclauses); + &estimatedclauses, false); } /* @@ -104,6 +104,68 @@ clauselist_selectivity(PlannerInfo *root, estimatedclauses); } +static Selectivity +clauselist_selectivity_or(PlannerInfo *root, + List *clauses, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo) +{ + ListCell *lc; + Selectivity s1 = 0.0; + RelOptInfo *rel; + Bitmapset *estimatedclauses = NULL; + int idx; + + /* + * Determine if these clauses reference a single relation. If so, and if + * it has extended statistics, try to apply those. + */ + rel = find_single_rel_for_clauses(root, clauses); + if (rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL) + { + /* + * Estimate as many clauses as possible using extended statistics. + * + * 'estimatedclauses' tracks the 0-based list position index of + * clauses that we've estimated using extended statistics, and that + * should be ignored. + * + * XXX We can't multiply with current value, because for OR clauses + * we start with 0.0, so we simply assign to s1 directly. + */ + s1 = statext_clauselist_selectivity(root, clauses, varRelid, + jointype, sjinfo, rel, + &estimatedclauses, true); + } + + /* + * Selectivities of the remaining clauses for an OR clause are computed + * as s1+s2 - s1*s2 to account for the probable overlap of selected tuple + * sets. + * + * XXX is this too conservative? + */ + idx = 0; + foreach(lc, clauses) + { + Selectivity s2; + + if (bms_is_member(idx, estimatedclauses)) + continue; + + s2 = clause_selectivity(root, + (Node *) lfirst(lc), + varRelid, + jointype, + sjinfo); + + s1 = s1 + s2 - s1 * s2; + } + + return s1; +} + /* * clauselist_selectivity_simple - * Compute the selectivity of an implicitly-ANDed list of boolean @@ -735,24 +797,14 @@ clause_selectivity(PlannerInfo *root, else if (is_orclause(clause)) { /* - * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to - * account for the probable overlap of selected tuple sets. - * - * XXX is this too conservative? + * Almost the same thing as clauselist_selectivity, but with + * the clauses connected by OR. */ - ListCell *arg; - - s1 = 0.0; - foreach(arg, ((BoolExpr *) clause)->args) - { - Selectivity s2 = clause_selectivity(root, - (Node *) lfirst(arg), - varRelid, - jointype, - sjinfo); - - s1 = s1 + s2 - s1 * s2; - } + s1 = clauselist_selectivity_or(root, + ((BoolExpr *) clause)->args, + varRelid, + jointype, + sjinfo); } else if (is_opclause(clause) || IsA(clause, DistinctExpr)) { diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index d17b8d9b1f..ccf9565c75 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -1202,7 +1202,8 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid, static Selectivity statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, - RelOptInfo *rel, Bitmapset **estimatedclauses) + RelOptInfo *rel, Bitmapset **estimatedclauses, + bool is_or) { ListCell *l; Bitmapset **list_attnums; @@ -1289,13 +1290,36 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli } /* - * First compute "simple" selectivity, i.e. without the extended - * statistics, and essentially assuming independence of the - * columns/clauses. We'll then use the various selectivities computed from - * MCV list to improve it. + * First compute "simple" selectivity, i.e. without the extended stats, + * and essentially assuming independence of the columns/clauses. We'll + * then use the selectivities computed from MCV list to improve it. */ - simple_sel = clauselist_selectivity_simple(root, stat_clauses, varRelid, - jointype, sjinfo, NULL); + if (is_or) + { + ListCell *lc; + Selectivity s1 = 0.0, + s2; + + /* + * Selectivities of OR clauses are computed s1+s2 - s1*s2 to account + * for the probable overlap of selected tuple sets. + */ + foreach(lc, stat_clauses) + { + s2 = clause_selectivity(root, + (Node *) lfirst(lc), + varRelid, + jointype, + sjinfo); + + s1 = s1 + s2 - s1 * s2; + } + + simple_sel = s1; + } + else + simple_sel = clauselist_selectivity_simple(root, stat_clauses, varRelid, + jointype, sjinfo, NULL); /* * Now compute the multi-column estimate from the MCV list, along with the @@ -1303,7 +1327,8 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli */ mcv_sel = mcv_clauselist_selectivity(root, stat, stat_clauses, varRelid, jointype, sjinfo, rel, - &mcv_basesel, &mcv_totalsel); + &mcv_basesel, &mcv_totalsel, + is_or); /* Estimated selectivity of values not covered by MCV matches */ other_sel = simple_sel - mcv_basesel; @@ -1331,13 +1356,14 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli Selectivity statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, - RelOptInfo *rel, Bitmapset **estimatedclauses) + RelOptInfo *rel, Bitmapset **estimatedclauses, + bool is_or) { Selectivity sel; /* First, try estimating clauses using a multivariate MCV list. */ sel = statext_mcv_clauselist_selectivity(root, clauses, varRelid, jointype, - sjinfo, rel, estimatedclauses); + sjinfo, rel, estimatedclauses, is_or); /* * Then, apply functional dependencies on the remaining clauses by calling @@ -1351,10 +1377,14 @@ statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, * For example, MCV list can give us an exact selectivity for values in * two columns, while functional dependencies can only provide information * about the overall strength of the dependency. + * + * Functional dependencies only work for clauses connected by AND, so skip + * this for OR clauses. */ - sel *= dependencies_clauselist_selectivity(root, clauses, varRelid, - jointype, sjinfo, rel, - estimatedclauses); + if (!is_or) + sel *= dependencies_clauselist_selectivity(root, clauses, varRelid, + jointype, sjinfo, rel, + estimatedclauses); return sel; } diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index 87e232fdd4..3f42713aa2 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -1795,7 +1795,8 @@ mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, - Selectivity *basesel, Selectivity *totalsel) + Selectivity *basesel, Selectivity *totalsel, + bool is_or) { int i; MCVList *mcv; @@ -1808,7 +1809,7 @@ mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat, mcv = statext_mcv_load(stat->statOid); /* build a match bitmap for the clauses */ - matches = mcv_get_match_bitmap(root, clauses, stat->keys, mcv, false); + matches = mcv_get_match_bitmap(root, clauses, stat->keys, mcv, is_or); /* sum frequencies for all the matching MCV items */ *basesel = 0.0; diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h index b512ee908a..5171895bba 100644 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -107,6 +107,7 @@ extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Selectivity *basesel, - Selectivity *totalsel); + Selectivity *totalsel, + bool is_or); #endif /* EXTENDED_STATS_INTERNAL_H */ diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h index f5d9b6c73a..e18c9a6539 100644 --- a/src/include/statistics/statistics.h +++ b/src/include/statistics/statistics.h @@ -116,7 +116,8 @@ extern Selectivity statext_clauselist_selectivity(PlannerInfo *root, JoinType jointype, SpecialJoinInfo *sjinfo, RelOptInfo *rel, - Bitmapset **estimatedclauses); + Bitmapset **estimatedclauses, + bool is_or); extern bool has_stats_of_kind(List *stats, char requiredkind); extern StatisticExtInfo *choose_best_statistics(List *stats, char requiredkind, Bitmapset **clause_attnums, -- 2.21.0