From 902c005fb3c064da67550fea6d29a8bb21a8fb28 Mon Sep 17 00:00:00 2001 From: amit Date: Wed, 13 Sep 2017 18:24:55 +0900 Subject: [PATCH 3/7] Planner-side changes for partition-pruning This adds all the necessary planner code and representations viz. 0. Code to teach set_append_rel_size/pathlist to look at only the *live* partitions of partitioned tables. 1. Add a field partcollation to PartitionScheme, which will be needed to verify that a operator clause's input collation indeed matches what is used for partitioning, to be able to use the clause for partition-pruning (using parttypcoll won't be correct, because that's not what's used by partitioning) 2. Code to match the clauses to the table's partition key and generate a list of such matching clauses. 3. Add a field to RelOptInfo to store an array of pointers of AppendRelInfo of *all* partitions (stored in the same order as their RelOptInfos in part_rels) 4. Add a field to RelOptInfo to store a list of AppendRelInfos of *live* partitions that survived partition-pruning (although as of this commit this contains *all* appinfos as mentioned below). 5. Some code in try_partition_wise_join in to handle the possibility that a partition RelOptInfo may not have the basic information set (note that as noted in 0, set_append_rel_size now sets such information for only the *live* partitions) If the clauses identified in 2 above does not contain values necessary to perform partition pruning, get_partitions_from_clauses would returns without pruning any partitions. In most cases, it's obvious in the planner that a set of clauses identified as matching the partition key don't contain the constant values right away, in which case, there is no need to call get_partitions_from_clauses right away. Instead, it should be deferred to another piece of code which can receive the above list of clauses and runs at a time when the constant values become available. In addition, a stub function get_partitions_from_clauses is added in partition.c, which currently simply returns all partitions from the partition descriptor. Authors: Amit Langote, Dilip Kumar --- src/backend/catalog/partition.c | 18 ++ src/backend/optimizer/path/allpaths.c | 587 +++++++++++++++++++++++++++------- src/backend/optimizer/path/indxpath.c | 3 - src/backend/optimizer/path/joinrels.c | 24 ++ src/backend/optimizer/plan/planner.c | 20 +- src/backend/optimizer/util/plancat.c | 4 + src/backend/optimizer/util/relnode.c | 101 ++++++ src/include/catalog/partition.h | 2 + src/include/catalog/pg_opfamily.h | 3 + src/include/nodes/relation.h | 29 +- src/include/optimizer/pathnode.h | 4 + 11 files changed, 662 insertions(+), 133 deletions(-) diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index 5daa8a1c19..5e601dd0a4 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -1421,6 +1421,24 @@ get_partition_dispatch_recurse(Relation rel, Relation parent, } } +/* + * get_partitions_using_clauses + * Determine the set of partitions of relation that will satisfy all + * the clauses contained in partclauses + * + * Outputs: + * A Bitmapset containing indexes of all selected partitions. + */ +Bitmapset * +get_partitions_from_clauses(Relation relation, List *partclauses) +{ + PartitionDesc partdesc = RelationGetPartitionDesc(relation); + Bitmapset *result = NULL; + + result = bms_add_range(result, 0, partdesc->nparts - 1); + return result; +} + /* Module-local functions */ /* diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index a6efb4e1d3..77b13ad397 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -20,9 +20,12 @@ #include "access/sysattr.h" #include "access/tsmapi.h" +#include "catalog/partition.h" #include "catalog/pg_class.h" #include "catalog/pg_operator.h" +#include "catalog/pg_opfamily.h" #include "catalog/pg_proc.h" +#include "catalog/pg_type.h" #include "foreign/fdwapi.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -135,6 +138,13 @@ static void recurse_push_qual(Node *setOp, Query *topquery, static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel); static void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, List *live_childrels); +static List *get_append_rel_partitions(PlannerInfo *root, + RelOptInfo *rel, + RangeTblEntry *rte); +static List *match_clauses_to_partkey(RelOptInfo *rel, + List *clauses, + bool *contains_const, + bool *constfalse); /* @@ -846,6 +856,381 @@ set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) } /* + * get_append_rel_partitions + * Return the list of partitions of rel that pass the clauses mentioned + * rel->baserestrictinfo + * + * Returned list contains the AppendRelInfos of chosen partitions. + */ +static List * +get_append_rel_partitions(PlannerInfo *root, + RelOptInfo *rel, + RangeTblEntry *rte) +{ + Relation parent = heap_open(rte->relid, NoLock); + PartitionDesc partdesc = RelationGetPartitionDesc(parent); + List *partclauses; + List *result = NIL; + int i; + Bitmapset *partindexes = NULL; + bool contains_const, + constfalse; + + /* + * Get the clauses that match the partition key, including information + * about any nullness tests against partition keys. Set keynullness to + * a invalid value of NullTestType, which 0 is not. + */ + partclauses = match_clauses_to_partkey(rel, + list_copy(rel->baserestrictinfo), + &contains_const, + &constfalse); + + /* + * If the matched clauses contains at least some constant operands, use + * the same to prune partitions right away. + */ + if (partclauses != NIL && contains_const && !constfalse) + partindexes = get_partitions_from_clauses(parent, partclauses); + else if (!constfalse) + /* No clauses to prune paritions, so scan all partitions. */ + partindexes = bms_add_range(partindexes, 0, partdesc->nparts - 1); + + /* Fetch the partition appinfos. */ + while ((i = bms_first_member(partindexes)) >= 0) + { + AppendRelInfo *appinfo = rel->part_appinfos[i]; +#ifdef USE_ASSERT_CHECKING + RangeTblEntry *rte = planner_rt_fetch(appinfo->child_relid, root); + + /* + * Must be the intended child's RTE here, because appinfos are ordered + * the same way as partitions in the partition descriptor. + */ + Assert(partdesc->oids[i] == rte->relid); +#endif + result = lappend(result, appinfo); + } + + /* Remember for future users such as set_append_rel_pathlist(). */ + rel->live_part_appinfos = result; + + heap_close(parent, NoLock); + + return result; +} + +#define PartCollMatchesExprColl(partcoll, exprcoll) \ + ((partcoll) == InvalidOid || (partcoll) == (exprcoll)) + +/* + * match_clauses_to_partkey + * Match clauses with rel's partition key + * + * For an individual clause to match with a partition key column, the clause: + * + * 1. must be in the form (partkey op const) or (const op partkey); + * 2. must contain an operator which is in the same operator family as the + * partitioning operator for the partition key column + * 3. its input collation must match the partitioning collation + * + * The "const" mentioned in 1 means any expression that doesn't involve a + * volatile function or a Var of this relation. We allow Vars belonging to + * other relations (for example, if the clause is a join clause), but they + * are treated as parameters whose values are not known now, so cannot be + * used for partition pruning right within the planner. It's the + * responsibility of higher code levels to manage restriction and join + * clauses appropriately. + * + * If a NullTest against a partition key is encountered, it's added to the + * result as well. + * + * If clauses contains at least one constant operand or a Nullness test, + * *contains_const is set so that the caller can pass the clauses to the + * partitioning module right away. + * + * If the list contains a pseudo-constant RestrictInfo with constant false + * value, *constfalse is set. + */ +static List * +match_clauses_to_partkey(RelOptInfo *rel, + List *clauses, + bool *contains_const, + bool *constfalse) +{ + PartitionScheme partscheme = rel->part_scheme; + List *result = NIL; + ListCell *lc; + + *contains_const = false; + *constfalse = false; + + Assert (partscheme != NULL); + + foreach(lc, clauses) + { + Expr *clause; + int i; + + if (IsA(lfirst(lc), RestrictInfo)) + { + RestrictInfo *rinfo = lfirst(lc); + + clause = rinfo->clause; + if (rinfo->pseudoconstant && + (IsA(clause, Const) && + ((((Const *) clause)->constisnull) || + !DatumGetBool(((Const *) clause)->constvalue)))) + { + *constfalse = true; + continue; + } + } + else + clause = (Expr *) lfirst(lc); + + /* Get the BoolExpr's out of the way. */ + if (IsA(clause, BoolExpr)) + { + bool contains_const1, + constfalse1; + + /* + * If the OR's args contain clauses that match, add the clause + * to the result. + */ + if (or_clause((Node *) clause) && + match_clauses_to_partkey(rel, + list_copy(((BoolExpr *) clause)->args), + &contains_const1, + &constfalse1) != NIL) + { + result = lappend(result, clause); + *contains_const = contains_const1; + continue; + } + else if (and_clause((Node *) clause)) + { + /* + * These clauses are ANDed with the clauses in the + * original list, so queue them after the latter. Note + * that it also means that a queued clause will be added to + * the result if it happens to match. + */ + clauses = list_concat(clauses, + list_copy(((BoolExpr *) clause)->args)); + continue; + } + /* Fall-through for a NOT clause, which is handled below. */ + } + + for (i = 0; i < partscheme->partnatts; i++) + { + Node *partkey = linitial(rel->partexprs[i]); + Oid partopfamily = partscheme->partopfamily[i], + partcoll = partscheme->partcollation[i]; + + /* + * Check if the operator is compatible with partitioning and if + * so, add it to the list of opclauses matched with this partition + * key. + */ + if (is_opclause(clause)) + { + Expr *constexpr, + *leftop, + *rightop; + Relids constrelids, + left_relids, + right_relids; + Oid expr_op, + expr_coll; + + leftop = (Expr *) get_leftop(clause); + rightop = (Expr *) get_rightop(clause); + expr_op = ((OpExpr *) clause)->opno; + expr_coll = ((OpExpr *) clause)->inputcollid; + left_relids = pull_varnos((Node *) leftop); + right_relids = pull_varnos((Node *) rightop); + + if (IsA(leftop, RelabelType)) + leftop = ((RelabelType *) leftop)->arg; + if (IsA(rightop, RelabelType)) + rightop = ((RelabelType *) rightop)->arg; + + if (equal(leftop, partkey)) + { + constexpr = rightop; + constrelids = right_relids; + } + else if (equal(rightop, partkey)) + { + constexpr = leftop; + constrelids = left_relids; + expr_op = get_commutator(expr_op); + /* + * If no commutator exists, cannot flip the qual's args, + * so give up. + */ + if (!OidIsValid(expr_op)) + continue; + } + else + /* Neither argument matches the partition key. */ + continue; + + /* + * Only allow strict operators to think sanely about the + * behavior with null arguments. + */ + if (!op_strict(expr_op)) + continue; + + /* + * Check if the operator is in the partition operator family. + * It the operator happens to be '<>', which is never listed + * as part of the operator family, check if its negator + * exists and that the latter is compatible with partitioning. + */ + if (!op_in_opfamily(expr_op, partopfamily)) + { + Oid negator = get_negator(expr_op); + + if (!OidIsValid(negator) || + !op_in_opfamily(negator, partopfamily)) + continue; + } + + /* + * Useless if what we're thinking of as a constant is actually + * a Var coming from this relation. + */ + if (bms_is_member(rel->relid, constrelids)) + continue; + + /* Useless if the "constant" can change its value. */ + if (contain_volatile_functions((Node *) constexpr)) + continue; + + /* + * Also, useless, if the clause's collation is different from + * the partitioning collation. + */ + if (!PartCollMatchesExprColl(partcoll, expr_coll)) + continue; + + /* + * Everything seems to be fine, so add it to the list of + * clauses we will use for pruning. Flip the left and right + * args if we have to, because the code that extracts the + * constant value to use for partition-pruning expects to find + * it as the rightop of the clause. + */ + if (constexpr == rightop) + result = lappend(result, clause); + else + { + OpExpr *commuted; + + commuted = (OpExpr *) copyObject(clause); + commuted->opno = expr_op; + commuted->opfuncid = get_opcode(expr_op); + commuted->args = list_make2(rightop, leftop); + result = lappend(result, commuted); + } + + *contains_const = IsA(constexpr, Const); + } + else if (IsA(clause, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; + Oid saop_op = saop->opno; + Oid saop_coll = saop->inputcollid; + Node *leftop = (Node *) linitial(saop->args), + *rightop = (Node *) lsecond(saop->args); + + if (IsA(leftop, RelabelType)) + leftop = (Node *) ((RelabelType *) leftop)->arg; + if (!equal(leftop, partkey)) + continue; + + /* Check if saop_op is compatible with partitioning. */ + if (!op_strict(saop_op)) + continue; + + /* + * In case of NOT IN (..), we get a '<>', which while not + * listed as part of any operator family, we are able to + * handle the same if its negator is indeed a part of the + * partitioning operator family. + */ + if (!op_in_opfamily(saop_op, partopfamily)) + { + Oid negator = get_negator(saop_op); + + if (!OidIsValid(negator) || + !op_in_opfamily(negator, partopfamily)) + continue; + } + + /* Useless if the "constant" can change its value. */ + if (contain_volatile_functions((Node *) rightop)) + continue; + + /* + * Also, useless, if the clause's collation is different from + * the partitioning collation. + */ + if (!PartCollMatchesExprColl(partcoll, saop_coll)) + continue; + } + else if (IsA(clause, NullTest)) + { + NullTest *nulltest = (NullTest *) clause; + Node *arg = (Node *) nulltest->arg; + + if (equal(arg, partkey)) + { + result = lappend(result, nulltest); + /* Nullness test can be used right away. */ + *contains_const = true; + } + } + /* + * Boolean conditions have a special shape, which accept if the + * partitioning opfamily accepts Boolean conditions. + */ + else if (IsBooleanOpfamily(partopfamily) && + (IsA(clause, BooleanTest) || + IsA(clause, Var) || not_clause((Node *) clause))) + { + if (IsA(clause, BooleanTest)) + { + BooleanTest *btest = (BooleanTest *) clause; + + if (btest->booltesttype != IS_UNKNOWN && + btest->booltesttype != IS_NOT_UNKNOWN) + if (equal((Node *) btest->arg, partkey)) + result = lappend(result, clause); + } + else if (IsA(clause, Var) && equal((Node *) clause, partkey)) + result = lappend(result, clause); + else + { + Node *arg = (Node *) get_notclausearg((Expr *) clause); + + if (equal(arg, partkey)) + result = lappend(result, clause); + } + *contains_const = true; + } + } + } + + return result; +} + +/* * set_append_rel_size * Set size estimates for a simple "append relation" * @@ -860,6 +1245,7 @@ static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { + List *rel_appinfos = NIL; int parentRTindex = rti; bool has_live_children; double parent_rows; @@ -873,6 +1259,23 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Assert(IS_SIMPLE_REL(rel)); + if (rte->relkind != RELKIND_PARTITIONED_TABLE) + { + foreach (l, root->append_rel_list) + { + AppendRelInfo *appinfo = lfirst(l); + + /* append_rel_list contains all append rels; ignore others */ + if (appinfo->parent_relid == parentRTindex) + rel_appinfos = lappend(rel_appinfos, appinfo); + } + } + else + { + rel_appinfos = get_append_rel_partitions(root, rel, rte); + rel->live_partitioned_rels = list_make1_int(rti); + } + /* * Initialize to compute size estimates for whole append relation. * @@ -893,7 +1296,7 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, nattrs = rel->max_attr - rel->min_attr + 1; parent_attrsizes = (double *) palloc0(nattrs * sizeof(double)); - foreach(l, root->append_rel_list) + foreach(l, rel_appinfos) { AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); int childRTindex; @@ -906,10 +1309,6 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, ListCell *childvars; ListCell *lc; - /* append_rel_list contains all append rels; ignore others */ - if (appinfo->parent_relid != parentRTindex) - continue; - childRTindex = appinfo->child_relid; childRTE = root->simple_rte_array[childRTindex]; @@ -920,85 +1319,11 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, childrel = find_base_rel(root, childRTindex); Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL); - if (rel->part_scheme) - { - AttrNumber attno; - - /* - * We need attr_needed data for building targetlist of a join - * relation representing join between matching partitions for - * partition-wise join. A given attribute of a child will be - * needed in the same highest joinrel where the corresponding - * attribute of parent is needed. Hence it suffices to use the - * same Relids set for parent and child. - */ - for (attno = rel->min_attr; attno <= rel->max_attr; attno++) - { - int index = attno - rel->min_attr; - Relids attr_needed = rel->attr_needed[index]; - - /* System attributes do not need translation. */ - if (attno <= 0) - { - Assert(rel->min_attr == childrel->min_attr); - childrel->attr_needed[index] = attr_needed; - } - else - { - Var *var = list_nth_node(Var, - appinfo->translated_vars, - attno - 1); - int child_index; - - /* - * Ignore any column dropped from the parent. - * Corresponding Var won't have any translation. It won't - * have attr_needed information, since it can not be - * referenced in the query. - */ - if (var == NULL) - { - Assert(attr_needed == NULL); - continue; - } - - child_index = var->varattno - childrel->min_attr; - childrel->attr_needed[child_index] = attr_needed; - } - } - } - /* - * Copy/Modify targetlist. Even if this child is deemed empty, we need - * its targetlist in case it falls on nullable side in a child-join - * because of partition-wise join. - * - * NB: the resulting childrel->reltarget->exprs may contain arbitrary - * expressions, which otherwise would not occur in a rel's targetlist. - * Code that might be looking at an appendrel child must cope with - * such. (Normally, a rel's targetlist would only include Vars and - * PlaceHolderVars.) XXX we do not bother to update the cost or width - * fields of childrel->reltarget; not clear if that would be useful. + * Initialize some properties of child rel from the parent rel, such + * target list, equivalence class members, etc. */ - childrel->reltarget->exprs = (List *) - adjust_appendrel_attrs(root, - (Node *) rel->reltarget->exprs, - 1, &appinfo); - - /* - * We have to make child entries in the EquivalenceClass data - * structures as well. This is needed either if the parent - * participates in some eclass joins (because we will want to consider - * inner-indexscan joins on the individual children) or if the parent - * has useful pathkeys (because we should try to build MergeAppend - * paths that produce those sort orderings). Even if this child is - * deemed dummy, it may fall on nullable side in a child-join, which - * in turn may participate in a MergeAppend, where we will need the - * EquivalenceClass data structures. - */ - if (rel->has_eclass_joins || has_useful_pathkeys(root, rel)) - add_child_rel_equivalences(root, appinfo, rel, childrel); - childrel->has_eclass_joins = rel->has_eclass_joins; + set_basic_child_rel_properties(root, rel, childrel, appinfo); /* * We have to copy the parent's quals to the child, with appropriate @@ -1164,6 +1489,17 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, has_live_children = true; /* + * If childrel is itself partitioned, add it and its partitioned + * children to the list being propagated up to the root rel. + */ + if (childrel->part_scheme && rel->part_scheme) + { + rel->live_partitioned_rels = + list_concat(rel->live_partitioned_rels, + list_copy(childrel->live_partitioned_rels)); + } + + /* * If any live child is not parallel-safe, treat the whole appendrel * as not parallel-safe. In future we might be able to generate plans * in which some children are farmed out to workers while others are @@ -1259,14 +1595,29 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { int parentRTindex = rti; - List *live_childrels = NIL; + List *rel_appinfos = NIL, + *live_childrels = NIL; ListCell *l; + if (rte->relkind != RELKIND_PARTITIONED_TABLE) + { + foreach (l, root->append_rel_list) + { + AppendRelInfo *appinfo = lfirst(l); + + /* append_rel_list contains all append rels; ignore others */ + if (appinfo->parent_relid == parentRTindex) + rel_appinfos = lappend(rel_appinfos, appinfo); + } + } + else + rel_appinfos = rel->live_part_appinfos; + /* * Generate access paths for each member relation, and remember the * non-dummy children. */ - foreach(l, root->append_rel_list) + foreach(l, rel_appinfos) { AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); int childRTindex; @@ -1337,43 +1688,40 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, ListCell *l; List *partitioned_rels = NIL; RangeTblEntry *rte; - bool build_partitioned_rels = false; + /* + * AppendPath we are about to generate must record the RT indexes of + * partitioned tables that are direct or indirect children of this Append + * rel. For partitioned tables, we collect its live partitioned children + * from rel->painfo. However, it will contain only its immediate children, + * so collect live partitioned children from all children that are + * themselves partitioned and concatenate to our list before finally + * passing the list to create_append_path() and/or + * generate_mergeappend_paths(). + * + * If this is a sub-query RTE, its RelOptInfo doesn't itself contain the + * list of live partitioned children, so we must assemble the same in the + * loop below from the children that are known to correspond to + * partitioned rels. (This assumes that we don't need to look through + * multiple levels of subquery RTEs; if we ever do, we could consider + * stuffing the list we generate here into sub-query RTE's RelOptInfo, just + * like we do for partitioned rels, which would be used when populating our + * parent rel with paths. For the present, that appears to be + * unnecessary.) + */ if (IS_SIMPLE_REL(rel)) { - /* - * A root partition will already have a PartitionedChildRelInfo, and a - * non-root partitioned table doesn't need one, because its Append - * paths will get flattened into the parent anyway. For a subquery - * RTE, no PartitionedChildRelInfo exists; we collect all - * partitioned_rels associated with any child. (This assumes that we - * don't need to look through multiple levels of subquery RTEs; if we - * ever do, we could create a PartitionedChildRelInfo with the - * accumulated list of partitioned_rels which would then be found when - * populated our parent rel with paths. For the present, that appears - * to be unnecessary.) - */ rte = planner_rt_fetch(rel->relid, root); - switch (rte->rtekind) - { - case RTE_RELATION: - if (rte->relkind == RELKIND_PARTITIONED_TABLE) - partitioned_rels = - get_partitioned_child_rels(root, rel->relid); - break; - case RTE_SUBQUERY: - build_partitioned_rels = true; - break; - default: - elog(ERROR, "unexpected rtekind: %d", (int) rte->rtekind); - } + if (rte->rtekind == RTE_RELATION && + rte->relkind == RELKIND_PARTITIONED_TABLE) + partitioned_rels = rel->live_partitioned_rels; } else if (rel->reloptkind == RELOPT_JOINREL && rel->part_scheme) { /* - * Associate PartitionedChildRelInfo of the root partitioned tables - * being joined with the root partitioned join (indicated by - * RELOPT_JOINREL). + * For joinrel consisting of root partitioned tables, get + * partitioned_rels list by combining live_partitioned_rels of the + * component partitioned tables. */ partitioned_rels = get_partitioned_child_rels_for_join(root, rel->relids); @@ -1390,17 +1738,12 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, ListCell *lcp; /* - * If we need to build partitioned_rels, accumulate the partitioned - * rels for this child. + * Accumulate the live partitioned children of this child, if it's + * itself partitioned rel. */ - if (build_partitioned_rels) - { - List *cprels; - - cprels = get_partitioned_child_rels(root, childrel->relid); + if (childrel->part_scheme) partitioned_rels = list_concat(partitioned_rels, - list_copy(cprels)); - } + childrel->live_partitioned_rels); /* * If child has an unparameterized cheapest-total path, add that to diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index f35380391a..f4203ce200 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -40,9 +40,6 @@ #include "utils/selfuncs.h" -#define IsBooleanOpfamily(opfamily) \ - ((opfamily) == BOOL_BTREE_FAM_OID || (opfamily) == BOOL_HASH_FAM_OID) - #define IndexCollMatchesExprColl(idxcollation, exprcollation) \ ((idxcollation) == InvalidOid || (idxcollation) == (exprcollation)) diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 2b868c52de..3e943391b1 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -1396,6 +1396,30 @@ try_partition_wise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, child_rel2->relids); /* + * If either child_rel1 or child_rel2 is not a live partition, they'd + * not have been touched by set_append_rel_size. So, its RelOptInfo + * would be missing some information that set_append_rel_size sets for + * live partitions, such as the target list, child EQ members, etc. + * We need to make the RelOptInfo of even the dead partitions look + * minimally valid and as having a valid dummy path attached to it. + */ + if (IS_SIMPLE_REL(child_rel1) && child_rel1->pathlist == NIL) + { + AppendRelInfo *appinfo = rel1->part_appinfos[cnt_parts]; + + set_basic_child_rel_properties(root, rel1, child_rel1, appinfo); + mark_dummy_rel(child_rel1); + } + + if (IS_SIMPLE_REL(child_rel2) && child_rel2->pathlist == NIL) + { + AppendRelInfo *appinfo = rel2->part_appinfos[cnt_parts]; + + set_basic_child_rel_properties(root, rel2, child_rel2, appinfo); + mark_dummy_rel(child_rel2); + } + + /* * Construct restrictions applicable to the child join from those * applicable to the parent join. */ diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index d58635c887..24d800d8b7 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -6182,14 +6182,24 @@ List * get_partitioned_child_rels_for_join(PlannerInfo *root, Relids join_relids) { List *result = NIL; - ListCell *l; + int relid; - foreach(l, root->pcinfo_list) + relid = -1; + while ((relid = bms_next_member(join_relids, relid)) >= 0) { - PartitionedChildRelInfo *pc = lfirst(l); + RelOptInfo *rel; - if (bms_is_member(pc->parent_relid, join_relids)) - result = list_concat(result, list_copy(pc->child_rels)); + /* Paranoia: ignore bogus relid indexes */ + if (relid >= root->simple_rel_array_size) + continue; + rel = root->simple_rel_array[relid]; + if (rel == NULL) + continue; + Assert(rel->relid == relid); /* sanity check on array */ + Assert(rel->part_scheme != NULL); + Assert(rel->live_partitioned_rels != NIL && + list_length(rel->live_partitioned_rels) > 0); + result = list_concat(result, list_copy(rel->live_partitioned_rels)); } return result; diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 9d35a41e22..e1ef936e68 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -1918,6 +1918,10 @@ find_partition_scheme(PlannerInfo *root, Relation relation) memcpy(part_scheme->parttypbyval, partkey->parttypbyval, sizeof(bool) * partnatts); + part_scheme->partcollation = (Oid *) palloc(sizeof(Oid) * partnatts); + memcpy(part_scheme->partcollation, partkey->partcollation, + sizeof(Oid) * partnatts); + /* Add the partitioning scheme to PlannerInfo. */ root->part_schemes = lappend(root->part_schemes, part_scheme); diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 3bd1063aa8..b06696b7f0 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -18,6 +18,7 @@ #include "miscadmin.h" #include "catalog/partition.h" +#include "catalog/pg_class.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" @@ -154,9 +155,12 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->part_scheme = NULL; rel->nparts = 0; rel->boundinfo = NULL; + rel->part_appinfos = NULL; rel->part_rels = NULL; rel->partexprs = NULL; rel->nullable_partexprs = NULL; + rel->live_part_appinfos = NIL; + rel->live_partitioned_rels = NIL; /* * Pass top parent's relids down the inheritance hierarchy. If the parent @@ -233,8 +237,12 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) int cnt_parts = 0; if (nparts > 0) + { + rel->part_appinfos = (AppendRelInfo **) + palloc(sizeof(AppendRelInfo *) * nparts); rel->part_rels = (RelOptInfo **) palloc(sizeof(RelOptInfo *) * nparts); + } foreach(l, root->append_rel_list) { @@ -258,6 +266,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) * also match the PartitionDesc. See expand_partitioned_rtentry. */ Assert(cnt_parts < nparts); + rel->part_appinfos[cnt_parts] = appinfo; rel->part_rels[cnt_parts] = childrel; cnt_parts++; } @@ -567,6 +576,7 @@ build_join_rel(PlannerInfo *root, joinrel->part_scheme = NULL; joinrel->nparts = 0; joinrel->boundinfo = NULL; + joinrel->part_appinfos = NULL; joinrel->part_rels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; @@ -735,6 +745,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, joinrel->has_eclass_joins = false; joinrel->top_parent_relids = NULL; joinrel->part_scheme = NULL; + joinrel->part_appinfos = NULL; joinrel->part_rels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; @@ -1747,3 +1758,93 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, joinrel->nullable_partexprs[cnt] = nullable_partexpr; } } + +/* + * Initialize some basic properties of child rel from the parent rel, such + * target list, equivalence class members, etc. + */ +void +set_basic_child_rel_properties(PlannerInfo *root, + RelOptInfo *rel, + RelOptInfo *childrel, + AppendRelInfo *appinfo) +{ + AttrNumber attno; + + if (rel->part_scheme) + { + /* + * We need attr_needed data for building targetlist of a join relation + * representing join between matching partitions for partition-wise + * join. A given attribute of a child will be needed in the same + * highest joinrel where the corresponding attribute of parent is + * needed. Hence it suffices to use the same Relids set for parent and + * child. + */ + for (attno = rel->min_attr; attno <= rel->max_attr; attno++) + { + int index = attno - rel->min_attr; + Relids attr_needed = rel->attr_needed[index]; + + /* System attributes do not need translation. */ + if (attno <= 0) + { + Assert(rel->min_attr == childrel->min_attr); + childrel->attr_needed[index] = attr_needed; + } + else + { + Var *var = list_nth_node(Var, + appinfo->translated_vars, + attno - 1); + int child_index; + + /* + * Ignore any column dropped from the parent. Corresponding + * Var won't have any translation. It won't have attr_needed + * information, since it can not be referenced in the query. + */ + if (var == NULL) + { + Assert(attr_needed == NULL); + continue; + } + + child_index = var->varattno - childrel->min_attr; + childrel->attr_needed[child_index] = attr_needed; + } + } + } + + /* + * Copy/Modify targetlist. Even if this child is deemed empty, we need + * its targetlist in case it falls on nullable side in a child-join + * because of partition-wise join. + * + * NB: the resulting childrel->reltarget->exprs may contain arbitrary + * expressions, which otherwise would not occur in a rel's targetlist. + * Code that might be looking at an appendrel child must cope with + * such. (Normally, a rel's targetlist would only include Vars and + * PlaceHolderVars.) XXX we do not bother to update the cost or width + * fields of childrel->reltarget; not clear if that would be useful. + */ + childrel->reltarget->exprs = (List *) + adjust_appendrel_attrs(root, + (Node *) rel->reltarget->exprs, + 1, &appinfo); + + /* + * We have to make child entries in the EquivalenceClass data + * structures as well. This is needed either if the parent + * participates in some eclass joins (because we will want to consider + * inner-indexscan joins on the individual children) or if the parent + * has useful pathkeys (because we should try to build MergeAppend + * paths that produce those sort orderings). Even if this child is + * deemed dummy, it may fall on nullable side in a child-join, which + * in turn may participate in a MergeAppend, where we will need the + * EquivalenceClass data structures. + */ + if (rel->has_eclass_joins || has_useful_pathkeys(root, rel)) + add_child_rel_equivalences(root, appinfo, rel, childrel); + childrel->has_eclass_joins = rel->has_eclass_joins; +} diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h index 945ac0239d..4a1ce92569 100644 --- a/src/include/catalog/partition.h +++ b/src/include/catalog/partition.h @@ -108,4 +108,6 @@ extern void check_default_allows_bound(Relation parent, Relation defaultRel, PartitionBoundSpec *new_spec); extern List *get_proposed_default_constraint(List *new_part_constaints); +/* For partition-pruning */ +Bitmapset get_partitions_from_clauses(Relation relation, List *partclauses); #endif /* PARTITION_H */ diff --git a/src/include/catalog/pg_opfamily.h b/src/include/catalog/pg_opfamily.h index 0d0ba7c66a..f2fddeceb8 100644 --- a/src/include/catalog/pg_opfamily.h +++ b/src/include/catalog/pg_opfamily.h @@ -187,4 +187,7 @@ DATA(insert OID = 4082 ( 3580 pg_lsn_minmax_ops PGNSP PGUID )); DATA(insert OID = 4104 ( 3580 box_inclusion_ops PGNSP PGUID )); DATA(insert OID = 5000 ( 4000 box_ops PGNSP PGUID )); +#define IsBooleanOpfamily(opfamily) \ + ((opfamily) == BOOL_BTREE_FAM_OID || (opfamily) == BOOL_HASH_FAM_OID) + #endif /* PG_OPFAMILY_H */ diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index e085cefb7b..ecf70a66c4 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -342,6 +342,10 @@ typedef struct PlannerInfo * partition bounds. Since partition key data types and the opclass declared * input data types are expected to be binary compatible (per ResolveOpClass), * both of those should have same byval and length properties. + * + * Since partitioning might be using a collation for a given partition key + * column that is not same as the collation implied by column's type, store + * the same separately. */ typedef struct PartitionSchemeData { @@ -349,7 +353,8 @@ typedef struct PartitionSchemeData int16 partnatts; /* number of partition attributes */ Oid *partopfamily; /* OIDs of operator families */ Oid *partopcintype; /* OIDs of opclass declared input data types */ - Oid *parttypcoll; /* OIDs of collations of partition keys. */ + Oid *parttypcoll; /* OIDs of partition key type collation. */ + Oid *partcollation; /* OIDs of partitioning collation */ /* Cached information about partition key data types. */ int16 *parttyplen; @@ -529,6 +534,7 @@ typedef struct PartitionSchemeData *PartitionScheme; * part_scheme - Partitioning scheme of the relation * boundinfo - Partition bounds * nparts - Number of partitions + * part_appinfos - AppendRelInfo of each partition * part_rels - RelOptInfos for each partition * partexprs, nullable_partexprs - Partition key expressions * @@ -657,10 +663,27 @@ typedef struct RelOptInfo PartitionScheme part_scheme; /* Partitioning scheme. */ int nparts; /* number of partitions */ struct PartitionBoundInfoData *boundinfo; /* Partition bounds */ - struct RelOptInfo **part_rels; /* Array of RelOptInfos of partitions, - * stored in the same order of bounds */ + struct AppendRelInfo **part_appinfos; /* Array of AppendRelInfos of + * of partitioned, stored in the + * same order as of bounds */ + struct RelOptInfo **part_rels; /* Array of RelOptInfos of *all* + * partitions, stored in the same order as + * of bounds */ List **partexprs; /* Non-nullable partition key expressions. */ List **nullable_partexprs; /* Nullable partition key expressions. */ + + + /* + * List of AppendRelInfo's of the table's partitions that survive a + * query's clauses. + */ + List *live_part_appinfos; + + /* + * RT indexes of live partitions that are partitioned tables themselves. + * This includes the RT index of the table itself. + */ + List *live_partitioned_rels; } RelOptInfo; /* diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index e9ed16ad32..c1f2fc93cd 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -296,5 +296,9 @@ extern RelOptInfo *build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, RelOptInfo *inner_rel, RelOptInfo *parent_joinrel, List *restrictlist, SpecialJoinInfo *sjinfo, JoinType jointype); +extern void set_basic_child_rel_properties(PlannerInfo *root, + RelOptInfo *rel, + RelOptInfo *childrel, + AppendRelInfo *appinfo); #endif /* PATHNODE_H */ -- 2.11.0