From 03e75cf5db6b607cd62d2fdcd1b44e56fccaf3cf Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 6 Jul 2022 15:52:36 +0300 Subject: [PATCH 5/6] CorrectiveQuals is as simple as a List of RestrictInfo, a). only one restrictinfo on this group should be counted for any joinrel estimation. b). at least 1 restrictinfo in this group should be executed during execution. In this commit, only rows estimation issue is addressed. PlannerInfo.correlative_quals is added to manage all the CorrectiveQuals at subquery level. RelOptInfo.cqual_indexes is a List * to indicate a which CorrectiveQuals this relation related to. This is designed for easy to check if the both sides of joinrel correlated to the same CorrectiveQuals. Why isn't the type a Bitmapset * will be explained later. The overall design of handing the joinrel size estimation is: a). At the base relation level, we just count everything with the correlative quals. b). During the any level joinrel size estimation, we just keep 1 side's cqual (short for corrective qual) selectivity by eliminated the other one. so the size estimation for a mergeable join selectivity becomes to: rows = R1.rows X r2.rows X 1 / Max (ndistval_of_colA, ndistinval_of_colB) X 1 / Selectivity(R1's CorrectiveQual). r1.rows X 1 / Selectivity(R1's CorrectiveQual) eliminated the impact of CorrectiveQual on R1. After this, the JoinRel of (R1, R2) still be impacted by this CorrectiveQual but just one in this level. Later if JoinRel(R1, R2) needs to join with R3, and R3 is impacted by this CorectiveQuals as well. This we need to keep one and eliminating the other one as above again. The algorithm for which Selectivity should be eliminated and which one should be kept is: When we join 2 inner_rel and outer_rel with a mergeable join restrictinfo, if both sides is impacted with the same CorrectiveQual, we first choose which "side" to eliminating based on which side of the restrictinfo has a higher distinct value. The reason for this is more or less because we used "Max"(ndistinctValT1, ndistinctValT2). After decide which "side" to eliminating, the real eliminating selecitity is the side of RelOptInfo->cqual_selectivity[n] Selectivity *RelOptInfo->cqual_selectivity: The number of elements in cqual_selecitity equals the length of cqual_indexes. The semantics is which selectivity in the corresponding CorectiveQuals's qual list is taking effect. At only time, only 1 Qual Selectivity is counted for any-level of joinrel. and the other side's RelOptInfo->cqual_selectivty is used to set the upper joinrel->cqual_selecivity. In reality, it is possible for to have many CorrectiveQuals, but for design discussion, the current implementation only take care of the 1 CorrectiveQuals. this would be helpful for PoC/review/discussion. Some flow for the key data: 1. root->corrective_quals is initialized at generate_base_implied_equalities_no_const stage. we create a CorrectiveQual in this list for each ec_filter and fill the RestrictInfo part for this cqual. At the same time, we note which RelOptInfo (cqual_indexes) has related to this cqual. 2. RelOptInfo->cqual_selecitity for baserel is set at the end of set_rel_size, at this time, the selectivity for every RestrictInfo is calcuated, we can just fetch the cached value. As for joinrel, it is maintained in calc_join_cqual_selectivity, this function would return the Selectivity to eliminate and set the above value. Limitation in this PoC: 1. Only support 1 CorrectiveQual in root->correlative_quals 2. Only tested with INNER_JOIN. 3. Inherited table is not supported. --- src/backend/nodes/outfuncs.c | 1 + src/backend/optimizer/path/allpaths.c | 27 ++++ src/backend/optimizer/path/costsize.c | 182 ++++++++++++++++++++++ src/backend/optimizer/path/equivclass.c | 48 ++++-- src/backend/optimizer/plan/planner.c | 1 + src/backend/optimizer/prep/prepjointree.c | 1 + src/include/nodes/nodes.h | 1 + src/include/nodes/pathnodes.h | 36 ++++- 8 files changed, 280 insertions(+), 17 deletions(-) diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index f31f1de983..5e0434df1e 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2676,6 +2676,7 @@ _outEquivalenceFilter(StringInfo str, const EquivalenceFilter *node) WRITE_UINT_FIELD(ef_source_rel); WRITE_OID_FIELD(opfamily); WRITE_INT_FIELD(amstrategy); + WRITE_NODE_FIELD(rinfo); } static void diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index e9342097e5..2ee28a94fc 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -463,6 +463,33 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel, * We insist that all non-dummy rels have a nonzero rowcount estimate. */ Assert(rel->rows > 0 || IS_DUMMY_REL(rel)); + + /* Now calculating the selectivity impacted by Corrective Qual */ + if (!rte->inh) /* not supported in this PoC */ + { + ListCell *l; + int i = 0; + rel->cqual_selectivity = palloc(sizeof(Selectivity) * list_length(rel->cqual_indexes)); + + foreach(l, rel->cqual_indexes) + { + int cq_index = lfirst_int(l); + CorrelativeQuals *cquals = list_nth_node(CorrelativeQuals, root->correlative_quals, cq_index); + ListCell *l2; + bool found = false; + foreach(l2, cquals->corr_restrictinfo) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l2); + if (bms_equal(rinfo->clause_relids, rel->relids)) + { + found = true; + rel->cqual_selectivity[i] = rinfo->norm_selec > 0 ? rinfo->norm_selec : rinfo->outer_selec; + Assert(rel->cqual_selectivity[i] > 0); + } + } + Assert(found); + } + } } /* diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index fcc26b01a4..03b92a2a88 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -5428,6 +5428,138 @@ get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel, return nrows; } + +/* + * Given a mergeable RestrictInfo, find out which relid should be used for + * eliminating Corrective Qual Selectivity. + */ +static int +find_relid_to_eliminate(PlannerInfo *root, RestrictInfo *rinfo) +{ + int left_relid, right_relid; + RelOptInfo *lrel, *rrel; + bool res; + + res = bms_get_singleton_member(rinfo->left_relids, &left_relid); + Assert(res); + res = bms_get_singleton_member(rinfo->left_relids, &right_relid); + Assert(res); + + lrel = root->simple_rel_array[left_relid]; + rrel = root->simple_rel_array[right_relid]; + + /* XXX: Assumed only one CorrectiveQual exists */ + + if (lrel->cqual_selectivity[0] > rrel->cqual_selectivity[0]) + return left_relid; + + return right_relid; +} + +/* + * calc_join_cqual_selectivity + * + * When join two relations, if both sides are impacted by the same CorrectiveQuals, + * we need to eliminate one of them and note the other one for future eliminating when join + * another corrective relation. or else just note the joinrel still being impacted by the + * single sides's CorrectiveQuals. + * + * Return value is the Selectivity we need to eliminate for estimating the current + * joinrel. + */ +static double +calc_join_cqual_selectivity(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel, + RestrictInfo *rinfo) +{ + double res = 1; + ListCell *lc1, *lc2; + Selectivity left_sel; /* The cqual selectivity still impacted on this joinrel. */ + + /* + * Find how many CorrectiveQual for this joinrel and allocate space for each left Selectivity + * for each CorrectiveQual here. + */ + List *final_cq_list = list_union_int(outer_rel->cqual_indexes, inner_rel->cqual_indexes); + + joinrel->cqual_selectivity = palloc(sizeof(Selectivity) * list_length(final_cq_list)); + + foreach(lc1, outer_rel->cqual_indexes) + { + int outer_cq_index = lfirst_int(lc1); + int inner_cq_pos = -1; + int outer_idx = foreach_current_index(lc1); + int curr_sel_len; + + /* + * Check if the same corrective quals applied in both sides, + * if yes, we need to decide which one to eliminate and which one + * to keep. or else, we just keep the selectivity for feature use. + */ + foreach(lc2, inner_rel->cqual_indexes) + { + if (outer_cq_index == lfirst_int(lc2)) + inner_cq_pos = foreach_current_index(lc2); + } + + if (inner_cq_pos >= 0) + { + /* Find the CorrectiveQual which impacts both side. */ + int relid = find_relid_to_eliminate(root, rinfo); + if (bms_is_member(relid, outer_rel->relids)) + { + /* XXXX: we assume only 1 CorrectiveQual exist, so [0] directly. */ + res *= outer_rel->cqual_selectivity[0]; + left_sel = inner_rel->cqual_selectivity[0]; + } + else + { + /* XXXX: we assume only 1 CorrectiveQual exist */ + res *= inner_rel->cqual_selectivity[0]; + left_sel = outer_rel->cqual_selectivity[0]; + } + } + else + { + /* Only shown in outer side. */ + left_sel = outer_rel->cqual_selectivity[outer_idx]; + } + + /* + * If any side of join relation is impacted by a cqual, it is impacted for the joinrel + * for sure. + */ + curr_sel_len = list_length(joinrel->cqual_indexes); + joinrel->cqual_indexes = lappend_int(joinrel->cqual_indexes, outer_idx); + + joinrel->cqual_selectivity[curr_sel_len] = left_sel; + // elog(INFO, "left_sel %f", left_sel); + } + + /* Push any cqual information which exists in inner_rel only to join rel. */ + foreach(lc1, inner_rel->cqual_indexes) + { + int inner_cq_index = lfirst_int(lc1); + int curr_sel_len; + + if (list_member_int(outer_rel->cqual_indexes, inner_cq_index)) + /* have been handled in the previous loop */ + continue; + + curr_sel_len = list_length(joinrel->cqual_indexes); + joinrel->cqual_selectivity[curr_sel_len] = inner_rel->cqual_selectivity[foreach_current_index(lc1)]; + } + + pfree(final_cq_list); + + // elog(INFO, "Final adjust sel (%s): %f", bmsToString(joinrel->relids), res); + + return res; +} + + /* * calc_joinrel_size_estimate * Workhorse for set_joinrel_size_estimates and @@ -5571,6 +5703,56 @@ calc_joinrel_size_estimate(PlannerInfo *root, break; } + { + Selectivity m1 = 1; + bool should_eliminate = false; + RestrictInfo *rinfo; + + // XXX: For hack only, the aim is the "only one" restrictinfo is the one impacted by "the only one" + // CorrectiveQuals. for example: + // SELECT * FROM t1, t2, t3 WHERE t1.a = t2.a and t2.a = t3.a and t3.a > 2; + + if (list_length(root->correlative_quals) == 1 && + list_length(restrictlist) == 1 && + jointype == JOIN_INNER) + { + int left_relid, right_relid; + rinfo = linitial_node(RestrictInfo, restrictlist); + if (rinfo->mergeopfamilies != NIL && + bms_get_singleton_member(rinfo->left_relids, &left_relid) && + bms_get_singleton_member(rinfo->right_relids, &right_relid)) + { + List *interset_cq_indexes = list_intersection_int( + root->simple_rel_array[left_relid]->cqual_indexes, + root->simple_rel_array[right_relid]->cqual_indexes); + + if (interset_cq_indexes != NIL && + !root->simple_rte_array[left_relid]->inh && + !root->simple_rte_array[right_relid]->inh) + should_eliminate = true; + } + } + + // elog(INFO, "joinrel: %s, %d", bmsToString(joinrel->relids), should_eliminate); + + if (should_eliminate) + m1 = calc_join_cqual_selectivity(root, joinrel, outer_rel, inner_rel, rinfo); + + /* elog(INFO, */ + /* "joinrelids: %s, outer_rel: %s, inner_rel: %s, join_clauselist: %s outer rows: %f, inner_rows: %f, join rows: %f, jselec: %f, m1 = %f, m2 = %f", */ + /* bmsToString(joinrel->relids), */ + /* bmsToString(outer_rel->relids), */ + /* bmsToString(inner_rel->relids), */ + /* bmsToString(join_list_relids), */ + /* outer_rel->rows, */ + /* inner_rel->rows, */ + /* nrows, */ + /* jselec, */ + /* m1, */ + /* m2); */ + nrows /= m1; + } + return clamp_row_est(nrows); } diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index b3e5ebfbb1..3efeb1f333 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -1272,6 +1272,8 @@ generate_base_implied_equalities_no_const(PlannerInfo *root, EquivalenceMember **prev_ems; ListCell *lc; ListCell *lc2; + int start_cq_index = list_length(root->correlative_quals); + int ef_index = 0; /* * We scan the EC members once and track the last-seen member for each @@ -1338,9 +1340,11 @@ generate_base_implied_equalities_no_const(PlannerInfo *root, pfree(prev_ems); + if (ec->ec_broken) + goto ec_filter_done; /* - * Also push any EquivalenceFilter clauses down into all relations + * Push any EquivalenceFilter clauses down into all relations * other than the one which the filter actually originated from. */ foreach(lc2, ec->ec_filters) @@ -1350,19 +1354,25 @@ generate_base_implied_equalities_no_const(PlannerInfo *root, Expr *rightexpr; Oid opno; int relid; - - if (ec->ec_broken) - break; + CorrelativeQuals *cquals = makeNode(CorrelativeQuals); foreach(lc, ec->ec_members) { EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc); + RelOptInfo *rel; + RestrictInfo *rinfo; if (!bms_get_singleton_member(cur_em->em_relids, &relid)) continue; + rel = root->simple_rel_array[relid]; + if (ef->ef_source_rel == relid) + { + rel->cqual_indexes = lappend_int(rel->cqual_indexes, start_cq_index + ef_index); + cquals->corr_restrictinfo = lappend(cquals->corr_restrictinfo, ef->rinfo); continue; + } if (ef->ef_const_is_left) { @@ -1383,19 +1393,28 @@ generate_base_implied_equalities_no_const(PlannerInfo *root, if (opno == InvalidOid) continue; - - process_implied_equality(root, opno, - ec->ec_collation, - leftexpr, - rightexpr, - bms_copy(ec->ec_relids), - bms_copy(cur_em->em_nullable_relids), - ec->ec_min_security, - ec->ec_below_outer_join, - false); + rinfo = process_implied_equality(root, opno, + ec->ec_collation, + leftexpr, + rightexpr, + bms_copy(ec->ec_relids), + bms_copy(cur_em->em_nullable_relids), + ec->ec_min_security, + ec->ec_below_outer_join, + false); + cquals->corr_restrictinfo = lappend(cquals->corr_restrictinfo, rinfo); + rel->cqual_indexes = lappend_int(rel->cqual_indexes, start_cq_index + ef_index); } + + ef_index += 1; + + root->correlative_quals = lappend(root->correlative_quals, cquals); } +ec_filter_done: + /* + * XXX this label can be removed after moving ec_filter to the end of this function. + */ /* * We also have to make sure that all the Vars used in the member clauses * will be available at any join node we might try to reference them at. @@ -2073,6 +2092,7 @@ distribute_filter_quals_to_eclass(PlannerInfo *root, List *quallist) efilter->ef_source_rel = relid; efilter->opfamily = opfamily; efilter->amstrategy = amstrategy; + efilter->rinfo = rinfo; ec->ec_filters = lappend(ec->ec_filters, efilter); break; /* Onto the next eclass */ diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 06ad856eac..2be2429454 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -619,6 +619,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, root->multiexpr_params = NIL; root->eq_classes = NIL; root->ec_merging_done = false; + root->correlative_quals = NIL; root->all_result_relids = parse->resultRelation ? bms_make_singleton(parse->resultRelation) : NULL; root->leaf_result_relids = NULL; /* we'll find out leaf-ness later */ diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 0bd99acf83..d427de6f85 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -999,6 +999,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, subroot->multiexpr_params = NIL; subroot->eq_classes = NIL; subroot->ec_merging_done = false; + subroot->correlative_quals = NIL; subroot->all_result_relids = NULL; subroot->leaf_result_relids = NULL; subroot->append_rel_list = NIL; diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index ba879ab3e9..8800a05252 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -278,6 +278,7 @@ typedef enum NodeTag /* these aren't subclasses of Path: */ T_EquivalenceClass, T_EquivalenceFilter, + T_CorrelativeQuals, T_EquivalenceMember, T_PathKey, T_PathKeyInfo, diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 942a52fcac..1e9bb39277 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -251,6 +251,8 @@ struct PlannerInfo bool ec_merging_done; /* set true once ECs are canonical */ + List *correlative_quals; /* list of CorrelativeQuals for this subquery */ + List *canon_pathkeys; /* list of "canonical" PathKeys */ List *left_join_clauses; /* list of RestrictInfos for mergejoinable @@ -767,6 +769,18 @@ typedef struct RelOptInfo * Indexes in PlannerInfo's eq_classes list of ECs that mention this rel */ Bitmapset *eclass_indexes; + List *cqual_indexes; /* Indexes in PlannerInfo's correlative_quals list of + * CorrelativeQuals that this rel has applied. It is valid + * on both baserel and joinrel. Used to quick check is the + * both sides contains the same CorrectiveQuals object. + */ + Selectivity *cqual_selectivity; /* + * The number of elements in cqual_selectivity equals + * the length of cqual_indexes. The semantics is which + * selectivity in the corresponding CorectiveQuals's qual + * list is taking effect. At only time, only 1 Qual + * Selectivity is counted for any-level of joinrel. + */ PlannerInfo *subroot; /* if subquery */ List *subplan_params; /* if subquery */ /* wanted number of parallel workers */ @@ -1181,8 +1195,24 @@ typedef struct EquivalenceFilter Index ef_source_rel; /* relid of originating relation. */ Oid opfamily; int amstrategy; + struct RestrictInfo *rinfo; /* source restrictInfo for this EquivalenceFilter */ } EquivalenceFilter; + +/* + * Currently it is as simple as a List of RestrictInfo, it means a). For any joinrel size + * estimation, only one restrictinfo on this group should be counted. b). During execution, + * at least 1 restrictinfo in this group should be executed. + * + * Define it as a Node just for better extendability, we can stripe it to a List * + * if we are sure nothing else is needed. + */ +typedef struct CorrelativeQuals +{ + NodeTag type; + List *corr_restrictinfo; +} CorrelativeQuals; + /* * If an EC contains a const and isn't below-outer-join, any PathKey depending * on it must be redundant, since there's only one possible value of the key. @@ -2872,7 +2902,7 @@ typedef enum * * flags indicating what kinds of grouping are possible. * partial_costs_set is true if the agg_partial_costs and agg_final_costs - * have been initialized. + * have been initialized. * agg_partial_costs gives partial aggregation costs. * agg_final_costs gives finalization costs. * target_parallel_safe is true if target is parallel safe. @@ -2902,8 +2932,8 @@ typedef struct * limit_tuples is an estimated bound on the number of output tuples, * or -1 if no LIMIT or couldn't estimate. * count_est and offset_est are the estimated values of the LIMIT and OFFSET - * expressions computed by preprocess_limit() (see comments for - * preprocess_limit() for more information). + * expressions computed by preprocess_limit() (see comments for + * preprocess_limit() for more information). */ typedef struct { -- 2.37.0