From a4c506ce64f0a78381ebcc317e1343a22f148323 Mon Sep 17 00:00:00 2001 From: amit Date: Wed, 13 Sep 2017 18:24:55 +0900 Subject: [PATCH 2/5] Planner-side changes for partition-pruning This adds all the necessary planner code and representations viz. 0. Code to teach set_append_rel_size/pathlist to look at only the *live* partitions of partitioned tables. 1. Add a field partcollation to PartitionScheme, which will be needed to verify that a operator clause's input collation indeed matches what is used for partitioning, to be able to use the clause for partition-pruning (using parttypcoll won't be correct, because that's not what's used by partitioning) 2. Code to match the clauses to the table's partition key and generate a list of such matching clauses. 3. Add a field to RelOptInfo to store an array of pointers of AppendRelInfo of *all* partitions (stored in the same order as their RelOptInfos in part_rels) 4. Add a field to RelOptInfo to store a list of AppendRelInfos of *live* partitions that survived partition-pruning (although as of this commit this contains *all* appinfos as mentioned below). 5. Some code in try_partition_wise_join in to handle the possibility that a partition RelOptInfo may not have the basic information set (note that as noted in 0, set_append_rel_size now sets such information for only the *live* partitions) If the clauses identified in 2 above does not contain values necessary to perform partition pruning, get_partitions_from_clauses would returns without pruning any partitions. In most cases, it's obvious in the planner that a set of clauses identified as matching the partition key don't contain the constant values right away, in which case, there is no need to call get_partitions_from_clauses right away. Instead, it should be deferred to another piece of code which can receive the above list of clauses and runs at a time when the constant values become available. In addition, a stub function get_partitions_from_clauses is added in partition.c, which currently simply returns all partitions from the partition descriptor. Authors: Amit Langote, Dilip Kumar --- src/backend/catalog/partition.c | 24 ++ src/backend/optimizer/path/allpaths.c | 566 +++++++++++++++++++++++++++------- src/backend/optimizer/path/joinrels.c | 24 ++ src/backend/optimizer/plan/planner.c | 20 +- src/backend/optimizer/util/plancat.c | 4 + src/backend/optimizer/util/relnode.c | 90 ++++++ src/include/catalog/partition.h | 5 + src/include/nodes/relation.h | 29 +- src/include/optimizer/pathnode.h | 4 + 9 files changed, 648 insertions(+), 118 deletions(-) diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index 66ec214e02..31c47d23e1 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -1421,6 +1421,30 @@ get_partition_dispatch_recurse(Relation rel, Relation parent, } } +/* + * get_partitions_using_clauses + * Determine the set of partitions of relation that will satisfy all + * the clauses contained in partclauses + * + * Outputs: + * *min_part_idx and *max_part_idx constitutes a range of contiguous + * indexes of partitions satisfying the query, while *other_parts + * contains indexes of partitions that satisfy the query but are + * not included in the aforementioned range + */ +void +get_partitions_from_clauses(Relation relation, int rt_index, + List *partclauses, + int *min_part_idx, int *max_part_idx, + Bitmapset **other_parts) +{ + PartitionDesc partdesc = RelationGetPartitionDesc(relation); + + *min_part_idx = 0; + *max_part_idx = partdesc->nparts - 1; + *other_parts = NULL; +} + /* Module-local functions */ /* diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 4e565b3c00..aca372a0d2 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -20,9 +20,11 @@ #include "access/sysattr.h" #include "access/tsmapi.h" +#include "catalog/partition.h" #include "catalog/pg_class.h" #include "catalog/pg_operator.h" #include "catalog/pg_proc.h" +#include "catalog/pg_type.h" #include "foreign/fdwapi.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -135,6 +137,12 @@ static void recurse_push_qual(Node *setOp, Query *topquery, static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel); static void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, List *live_childrels); +static List *get_append_rel_partitions(PlannerInfo *root, + RelOptInfo *rel, + RangeTblEntry *rte); +static List *match_clauses_to_partkey(RelOptInfo *rel, + List *clauses, + bool *constfalse); /* @@ -834,6 +842,17 @@ set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) rel->rows = clamp_row_est(rel->rows); } +static int +intcmp(const void *va, const void *vb) +{ + int a = *((const int *) va); + int b = *((const int *) vb); + + if (a == b) + return 0; + return (a > b) ? 1 : -1; +} + /* * set_foreign_pathlist * Build access paths for a foreign table RTE @@ -846,6 +865,363 @@ set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) } /* + * get_rel_partitions + * Return the list of partitions of rel that pass the clauses mentioned + * rel->baserestrictinfo + * + * Returned list contains the AppendRelInfos of chosen partitions. + */ +static List * +get_append_rel_partitions(PlannerInfo *root, + RelOptInfo *rel, + RangeTblEntry *rte) +{ + Relation parent = heap_open(rte->relid, NoLock); + PartitionDesc partdesc = RelationGetPartitionDesc(parent); + List *partclauses; + List *result = NIL; + int i, + num_parts = 0, + min_part_idx = -1, + max_part_idx = -1, + *all_indexes = NULL; + Bitmapset *other_parts = NULL; + bool constfalse; + + /* + * Get the clauses that match the partition key, including information + * about any nullness tests against partition keys. Set keynullness to + * a invalid value of NullTestType, which 0 is not. + */ + partclauses = match_clauses_to_partkey(rel, + list_copy(rel->baserestrictinfo), + &constfalse); + + /* + * Since the clauses in rel->baserestrictinfo should all contain Const + * operands, it should be possible to prune partitions right away. + */ + if (partclauses != NIL && !constfalse) + { + get_partitions_from_clauses(parent, rel->relid, partclauses, + &min_part_idx, &max_part_idx, + &other_parts); + /* Get *all* indexes in one place and sort. */ + if (min_part_idx >= 0 && max_part_idx >= 0) + num_parts += (max_part_idx - min_part_idx + 1); + if (!bms_is_empty(other_parts)) + num_parts += bms_num_members(other_parts); + + if (num_parts > 0) + { + int j; + + all_indexes = (int *) palloc(num_parts * sizeof(int)); + j = 0; + if (min_part_idx >= 0 && max_part_idx >= 0) + { + for (i = min_part_idx; i <= max_part_idx; i++) + all_indexes[j++] = i; + } + if (!bms_is_empty(other_parts)) + while ((i = bms_first_member(other_parts)) >= 0) + all_indexes[j++] = i; + if (j > 1) + qsort((void *) all_indexes, j, sizeof(int), intcmp); + } + } + else if (!constfalse) + { + /* No clauses to prune paritions, so scan all partitions. */ + num_parts = partdesc->nparts; + all_indexes = (int *) palloc(num_parts * sizeof(int)); + for (i = 0; i < partdesc->nparts; i++) + all_indexes[i] = i; + } + + /* Fetch the partition appinfos. */ + for (i = 0; i < num_parts; i++) + { + AppendRelInfo *appinfo = rel->part_appinfos[all_indexes[i]]; +#ifdef USE_ASSERT_CHECKING + RangeTblEntry *rte = planner_rt_fetch(appinfo->child_relid, root); + + /* + * Must be the intended child's RTE here, because appinfos are ordered + * the same way as partitions in the partition descriptor. + */ + Assert(partdesc->oids[all_indexes[i]] == rte->relid); +#endif + result = lappend(result, appinfo); + } + if (all_indexes) + pfree(all_indexes); + + /* Remember for future users such as set_append_rel_pathlist(). */ + rel->live_part_appinfos = result; + + heap_close(parent, NoLock); + + return result; +} + +#define PartCollMatchesExprColl(partcoll, exprcoll) \ + ((partcoll) == InvalidOid || (partcoll) == (exprcoll)) + +/* + * match_clauses_to_partkey + * Match clauses with rel's partition key + * + * For an individual clause to match with a partition key column, the clause: + * + * 1. must be in the form (partkey op const) or (const op partkey); + * 2. must contain an operator which is in the same operator family as the + * partitioning operator for the partition key column + * 3. its input collation must match the partitioning collation + * + * The "const" mentioned in 1 means any expression that doesn't involve a + * volatile function or a Var of this relation. We allow Vars belonging to + * other relations (for example, if the clause is a join clause), but they + * are treated as parameters whose values are not known now, so cannot be + * used for partition pruning right within the planner. It's the + * responsibility of higher code levels to manage restriction and join + * clauses appropriately. + * + * If a NullTest against a partition key is encountered, it's added to the + * result as well. + */ +static List * +match_clauses_to_partkey(RelOptInfo *rel, + List *clauses, + bool *constfalse) +{ + PartitionScheme partscheme = rel->part_scheme; + List *result = NIL; + ListCell *lc; + + *constfalse = false; + + Assert (partscheme != NULL); + + foreach(lc, clauses) + { + Expr *clause; + int i; + + if (IsA(lfirst(lc), RestrictInfo)) + { + RestrictInfo *rinfo = lfirst(lc); + + clause = rinfo->clause; + if (rinfo->pseudoconstant && + !DatumGetBool(((Const *) clause)->constvalue)) + { + *constfalse = true; + continue; + } + } + else + clause = (Expr *) lfirst(lc); + + /* Get the BoolExpr's out of the way. */ + if (IsA(clause, BoolExpr)) + { + bool constfalse1; + + /* + * If the OR's args contain clauses that match, add the clause + * to the result. + */ + if (or_clause((Node *) clause) && + match_clauses_to_partkey(rel, + list_copy(((BoolExpr *) clause)->args), + &constfalse1) != NIL) + result = lappend(result, clause); + else if (and_clause((Node *) clause)) + /* + * These clauses are ANDed with the clauses in the + * original list, so queue them after the latter. Note + * that it also means that a queued clause will be added to + * the result if it happens to match. + */ + clauses = list_concat(clauses, + list_copy(((BoolExpr *) clause)->args)); + continue; + } + + for (i = 0; i < partscheme->partnatts; i++) + { + Node *partkey = linitial(rel->partexprs[i]); + Oid partopfamily = partscheme->partopfamily[i], + partcoll = partscheme->partcollation[i]; + + /* + * Check if the operator is compatible with partitioning and if + * so, add it to the list of opclauses matched with this partition + * key. + */ + if (is_opclause(clause)) + { + Expr *constexpr, + *leftop, + *rightop; + Relids constrelids, + left_relids, + right_relids; + Oid expr_op, + expr_coll; + + leftop = (Expr *) get_leftop(clause); + rightop = (Expr *) get_rightop(clause); + expr_op = ((OpExpr *) clause)->opno; + expr_coll = ((OpExpr *) clause)->inputcollid; + left_relids = pull_varnos((Node *) leftop); + right_relids = pull_varnos((Node *) rightop); + + if (IsA(leftop, RelabelType)) + leftop = ((RelabelType *) leftop)->arg; + if (IsA(rightop, RelabelType)) + rightop = ((RelabelType *) rightop)->arg; + + if (equal(leftop, partkey)) + { + constexpr = rightop; + constrelids = right_relids; + } + else if (equal(rightop, partkey)) + { + constexpr = leftop; + constrelids = left_relids; + expr_op = get_commutator(expr_op); + /* + * If no commutator exists, cannot flip the qual's args, + * so give up. + */ + if (!OidIsValid(expr_op)) + continue; + } + else + /* Neither argument matches the partition key. */ + continue; + + /* + * Only allow strict operators to think sanely about the + * behavior with null arguments. + */ + if (!op_strict(expr_op)) + continue; + + /* + * Check if the operator is in the partition operator family. + * It the operator happens to be '<>', which is never listed + * as part of the operator family, check if its negator + * exists and that the latter is compatible with partitioning. + */ + if (!op_in_opfamily(expr_op, partopfamily)) + { + Oid negator = get_negator(expr_op); + + if (!OidIsValid(negator) || + !op_in_opfamily(negator, partopfamily)) + continue; + } + + /* + * Useless if what we're thinking of as a constant is actually + * a Var coming from this relation. + */ + if (bms_is_member(rel->relid, constrelids)) + continue; + + /* Useless if the "constant" can change its value. */ + if (contain_volatile_functions((Node *) constexpr)) + continue; + + /* + * Also, useless, if the clause's collation is different from + * the partitioning collation. + */ + if (!PartCollMatchesExprColl(partcoll, expr_coll)) + continue; + + /* + * Everything seems to be fine, so add it to the list of + * clauses we will use for pruning. Flip the left and right + * args if we have to, because the code that extracts the + * constant value to use for partition-pruning expects to find + * it as the rightop of the clause. + */ + if (constexpr == rightop) + result = lappend(result, clause); + else + { + OpExpr *commuted; + + commuted = (OpExpr *) copyObject(clause); + commuted->opno = expr_op; + commuted->opfuncid = get_opcode(expr_op); + commuted->args = list_make2(rightop, leftop); + result = lappend(result, commuted); + } + } + else if (IsA(clause, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; + Oid saop_op = saop->opno; + Oid saop_coll = saop->inputcollid; + Node *leftop = (Node *) linitial(saop->args), + *rightop = (Node *) lsecond(saop->args); + + if (IsA(leftop, RelabelType)) + leftop = (Node *) ((RelabelType *) leftop)->arg; + if (!equal(leftop, partkey)) + continue; + + /* Check if saop_op is compatible with partitioning. */ + if (!op_strict(saop_op)) + continue; + + /* + * In case of NOT IN (..), we get a '<>', which while not + * listed as part of any operator family, we are able to + * handle the same if its negator is indeed a part of the + * partitioning operator family. + */ + if (!op_in_opfamily(saop_op, partopfamily)) + { + Oid negator = get_negator(saop_op); + + if (!OidIsValid(negator) || + !op_in_opfamily(negator, partopfamily)) + continue; + } + + /* Useless if the "constant" can change its value. */ + if (contain_volatile_functions((Node *) rightop)) + continue; + + /* + * Also, useless, if the clause's collation is different from + * the partitioning collation. + */ + if (!PartCollMatchesExprColl(partcoll, saop_coll)) + continue; + } + else if (IsA(clause, NullTest)) + { + NullTest *nulltest = (NullTest *) clause; + Node *arg = (Node *) nulltest->arg; + + if (equal(arg, partkey)) + result = lappend(result, nulltest); + } + } + } + + return result; +} + +/* * set_append_rel_size * Set size estimates for a simple "append relation" * @@ -860,6 +1236,7 @@ static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { + List *rel_appinfos = NIL; int parentRTindex = rti; bool has_live_children; double parent_rows; @@ -873,6 +1250,23 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Assert(IS_SIMPLE_REL(rel)); + if (rte->relkind != RELKIND_PARTITIONED_TABLE) + { + foreach (l, root->append_rel_list) + { + AppendRelInfo *appinfo = lfirst(l); + + /* append_rel_list contains all append rels; ignore others */ + if (appinfo->parent_relid == parentRTindex) + rel_appinfos = lappend(rel_appinfos, appinfo); + } + } + else + { + rel_appinfos = get_append_rel_partitions(root, rel, rte); + rel->live_partitioned_rels = list_make1_int(rti); + } + /* * Initialize to compute size estimates for whole append relation. * @@ -893,7 +1287,7 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, nattrs = rel->max_attr - rel->min_attr + 1; parent_attrsizes = (double *) palloc0(nattrs * sizeof(double)); - foreach(l, root->append_rel_list) + foreach(l, rel_appinfos) { AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); int childRTindex; @@ -906,10 +1300,6 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, ListCell *childvars; ListCell *lc; - /* append_rel_list contains all append rels; ignore others */ - if (appinfo->parent_relid != parentRTindex) - continue; - childRTindex = appinfo->child_relid; childRTE = root->simple_rte_array[childRTindex]; @@ -920,73 +1310,11 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, childrel = find_base_rel(root, childRTindex); Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL); - if (rel->part_scheme) - { - AttrNumber attno; - - /* - * We need attr_needed data for building targetlist of a join - * relation representing join between matching partitions for - * partition-wise join. A given attribute of a child will be - * needed in the same highest joinrel where the corresponding - * attribute of parent is needed. Hence it suffices to use the - * same Relids set for parent and child. - */ - for (attno = rel->min_attr; attno <= rel->max_attr; attno++) - { - int index = attno - rel->min_attr; - Relids attr_needed = rel->attr_needed[index]; - - /* System attributes do not need translation. */ - if (attno <= 0) - { - Assert(rel->min_attr == childrel->min_attr); - childrel->attr_needed[index] = attr_needed; - } - else - { - Var *var = list_nth_node(Var, - appinfo->translated_vars, - attno - 1); - int child_index; - - child_index = var->varattno - childrel->min_attr; - childrel->attr_needed[child_index] = attr_needed; - } - } - } - /* - * Copy/Modify targetlist. Even if this child is deemed empty, we need - * its targetlist in case it falls on nullable side in a child-join - * because of partition-wise join. - * - * NB: the resulting childrel->reltarget->exprs may contain arbitrary - * expressions, which otherwise would not occur in a rel's targetlist. - * Code that might be looking at an appendrel child must cope with - * such. (Normally, a rel's targetlist would only include Vars and - * PlaceHolderVars.) XXX we do not bother to update the cost or width - * fields of childrel->reltarget; not clear if that would be useful. - */ - childrel->reltarget->exprs = (List *) - adjust_appendrel_attrs(root, - (Node *) rel->reltarget->exprs, - 1, &appinfo); - - /* - * We have to make child entries in the EquivalenceClass data - * structures as well. This is needed either if the parent - * participates in some eclass joins (because we will want to consider - * inner-indexscan joins on the individual children) or if the parent - * has useful pathkeys (because we should try to build MergeAppend - * paths that produce those sort orderings). Even if this child is - * deemed dummy, it may fall on nullable side in a child-join, which - * in turn may participate in a MergeAppend, where we will need the - * EquivalenceClass data structures. + * Initialize some properties of child rel from the parent rel, such + * target list, equivalence class members, etc. */ - if (rel->has_eclass_joins || has_useful_pathkeys(root, rel)) - add_child_rel_equivalences(root, appinfo, rel, childrel); - childrel->has_eclass_joins = rel->has_eclass_joins; + set_basic_child_rel_properties(root, rel, childrel, appinfo); /* * We have to copy the parent's quals to the child, with appropriate @@ -1152,6 +1480,17 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, has_live_children = true; /* + * If childrel is itself partitioned, add it and its partitioned + * children to the list being propagated up to the root rel. + */ + if (childrel->part_scheme && rel->part_scheme) + { + rel->live_partitioned_rels = + list_concat(rel->live_partitioned_rels, + list_copy(childrel->live_partitioned_rels)); + } + + /* * If any live child is not parallel-safe, treat the whole appendrel * as not parallel-safe. In future we might be able to generate plans * in which some children are farmed out to workers while others are @@ -1247,14 +1586,29 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { int parentRTindex = rti; - List *live_childrels = NIL; + List *rel_appinfos = NIL, + *live_childrels = NIL; ListCell *l; + if (rte->relkind != RELKIND_PARTITIONED_TABLE) + { + foreach (l, root->append_rel_list) + { + AppendRelInfo *appinfo = lfirst(l); + + /* append_rel_list contains all append rels; ignore others */ + if (appinfo->parent_relid == parentRTindex) + rel_appinfos = lappend(rel_appinfos, appinfo); + } + } + else + rel_appinfos = rel->live_part_appinfos; + /* * Generate access paths for each member relation, and remember the * non-dummy children. */ - foreach(l, root->append_rel_list) + foreach(l, rel_appinfos) { AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); int childRTindex; @@ -1325,43 +1679,40 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, ListCell *l; List *partitioned_rels = NIL; RangeTblEntry *rte; - bool build_partitioned_rels = false; + /* + * AppendPath we are about to generate must record the RT indexes of + * partitioned tables that are direct or indirect children of this Append + * rel. For partitioned tables, we collect its live partitioned children + * from rel->painfo. However, it will contain only its immediate children, + * so collect live partitioned children from all children that are + * themselves partitioned and concatenate to our list before finally + * passing the list to create_append_path() and/or + * generate_mergeappend_paths(). + * + * If this is a sub-query RTE, its RelOptInfo doesn't itself contain the + * list of live partitioned children, so we must assemble the same in the + * loop below from the children that are known to correspond to + * partitioned rels. (This assumes that we don't need to look through + * multiple levels of subquery RTEs; if we ever do, we could consider + * stuffing the list we generate here into sub-query RTE's RelOptInfo, just + * like we do for partitioned rels, which would be used when populating our + * parent rel with paths. For the present, that appears to be + * unnecessary.) + */ if (IS_SIMPLE_REL(rel)) { - /* - * A root partition will already have a PartitionedChildRelInfo, and a - * non-root partitioned table doesn't need one, because its Append - * paths will get flattened into the parent anyway. For a subquery - * RTE, no PartitionedChildRelInfo exists; we collect all - * partitioned_rels associated with any child. (This assumes that we - * don't need to look through multiple levels of subquery RTEs; if we - * ever do, we could create a PartitionedChildRelInfo with the - * accumulated list of partitioned_rels which would then be found when - * populated our parent rel with paths. For the present, that appears - * to be unnecessary.) - */ rte = planner_rt_fetch(rel->relid, root); - switch (rte->rtekind) - { - case RTE_RELATION: - if (rte->relkind == RELKIND_PARTITIONED_TABLE) - partitioned_rels = - get_partitioned_child_rels(root, rel->relid); - break; - case RTE_SUBQUERY: - build_partitioned_rels = true; - break; - default: - elog(ERROR, "unexpected rtekind: %d", (int) rte->rtekind); - } + if (rte->rtekind == RTE_RELATION && + rte->relkind == RELKIND_PARTITIONED_TABLE) + partitioned_rels = rel->live_partitioned_rels; } else if (rel->reloptkind == RELOPT_JOINREL && rel->part_scheme) { /* - * Associate PartitionedChildRelInfo of the root partitioned tables - * being joined with the root partitioned join (indicated by - * RELOPT_JOINREL). + * For joinrel consisting of root partitioned tables, get + * partitioned_rels list by combining live_partitioned_rels of the + * component partitioned tables. */ partitioned_rels = get_partitioned_child_rels_for_join(root, rel->relids); @@ -1378,17 +1729,12 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, ListCell *lcp; /* - * If we need to build partitioned_rels, accumulate the partitioned - * rels for this child. + * Accumulate the live partitioned children of this child, if it's + * itself partitioned rel. */ - if (build_partitioned_rels) - { - List *cprels; - - cprels = get_partitioned_child_rels(root, childrel->relid); + if (childrel->part_scheme) partitioned_rels = list_concat(partitioned_rels, - list_copy(cprels)); - } + childrel->live_partitioned_rels); /* * If child has an unparameterized cheapest-total path, add that to diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 2b868c52de..3e943391b1 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -1396,6 +1396,30 @@ try_partition_wise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, child_rel2->relids); /* + * If either child_rel1 or child_rel2 is not a live partition, they'd + * not have been touched by set_append_rel_size. So, its RelOptInfo + * would be missing some information that set_append_rel_size sets for + * live partitions, such as the target list, child EQ members, etc. + * We need to make the RelOptInfo of even the dead partitions look + * minimally valid and as having a valid dummy path attached to it. + */ + if (IS_SIMPLE_REL(child_rel1) && child_rel1->pathlist == NIL) + { + AppendRelInfo *appinfo = rel1->part_appinfos[cnt_parts]; + + set_basic_child_rel_properties(root, rel1, child_rel1, appinfo); + mark_dummy_rel(child_rel1); + } + + if (IS_SIMPLE_REL(child_rel2) && child_rel2->pathlist == NIL) + { + AppendRelInfo *appinfo = rel2->part_appinfos[cnt_parts]; + + set_basic_child_rel_properties(root, rel2, child_rel2, appinfo); + mark_dummy_rel(child_rel2); + } + + /* * Construct restrictions applicable to the child join from those * applicable to the parent join. */ diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index d58635c887..24d800d8b7 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -6182,14 +6182,24 @@ List * get_partitioned_child_rels_for_join(PlannerInfo *root, Relids join_relids) { List *result = NIL; - ListCell *l; + int relid; - foreach(l, root->pcinfo_list) + relid = -1; + while ((relid = bms_next_member(join_relids, relid)) >= 0) { - PartitionedChildRelInfo *pc = lfirst(l); + RelOptInfo *rel; - if (bms_is_member(pc->parent_relid, join_relids)) - result = list_concat(result, list_copy(pc->child_rels)); + /* Paranoia: ignore bogus relid indexes */ + if (relid >= root->simple_rel_array_size) + continue; + rel = root->simple_rel_array[relid]; + if (rel == NULL) + continue; + Assert(rel->relid == relid); /* sanity check on array */ + Assert(rel->part_scheme != NULL); + Assert(rel->live_partitioned_rels != NIL && + list_length(rel->live_partitioned_rels) > 0); + result = list_concat(result, list_copy(rel->live_partitioned_rels)); } return result; diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 9d35a41e22..e1ef936e68 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -1918,6 +1918,10 @@ find_partition_scheme(PlannerInfo *root, Relation relation) memcpy(part_scheme->parttypbyval, partkey->parttypbyval, sizeof(bool) * partnatts); + part_scheme->partcollation = (Oid *) palloc(sizeof(Oid) * partnatts); + memcpy(part_scheme->partcollation, partkey->partcollation, + sizeof(Oid) * partnatts); + /* Add the partitioning scheme to PlannerInfo. */ root->part_schemes = lappend(root->part_schemes, part_scheme); diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 3bd1063aa8..8e290e19b0 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -18,6 +18,7 @@ #include "miscadmin.h" #include "catalog/partition.h" +#include "catalog/pg_class.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" @@ -154,9 +155,12 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->part_scheme = NULL; rel->nparts = 0; rel->boundinfo = NULL; + rel->part_appinfos = NULL; rel->part_rels = NULL; rel->partexprs = NULL; rel->nullable_partexprs = NULL; + rel->live_part_appinfos = NIL; + rel->live_partitioned_rels = NIL; /* * Pass top parent's relids down the inheritance hierarchy. If the parent @@ -233,8 +237,12 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) int cnt_parts = 0; if (nparts > 0) + { + rel->part_appinfos = (AppendRelInfo **) + palloc(sizeof(AppendRelInfo *) * nparts); rel->part_rels = (RelOptInfo **) palloc(sizeof(RelOptInfo *) * nparts); + } foreach(l, root->append_rel_list) { @@ -258,6 +266,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) * also match the PartitionDesc. See expand_partitioned_rtentry. */ Assert(cnt_parts < nparts); + rel->part_appinfos[cnt_parts] = appinfo; rel->part_rels[cnt_parts] = childrel; cnt_parts++; } @@ -567,6 +576,7 @@ build_join_rel(PlannerInfo *root, joinrel->part_scheme = NULL; joinrel->nparts = 0; joinrel->boundinfo = NULL; + joinrel->part_appinfos = NULL; joinrel->part_rels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; @@ -735,6 +745,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, joinrel->has_eclass_joins = false; joinrel->top_parent_relids = NULL; joinrel->part_scheme = NULL; + joinrel->part_appinfos = NULL; joinrel->part_rels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; @@ -1747,3 +1758,82 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, joinrel->nullable_partexprs[cnt] = nullable_partexpr; } } + +/* + * Initialize some basic properties of child rel from the parent rel, such + * target list, equivalence class members, etc. + */ +void +set_basic_child_rel_properties(PlannerInfo *root, + RelOptInfo *rel, + RelOptInfo *childrel, + AppendRelInfo *appinfo) +{ + AttrNumber attno; + + if (rel->part_scheme) + { + /* + * We need attr_needed data for building targetlist of a join relation + * representing join between matching partitions for partition-wise + * join. A given attribute of a child will be needed in the same + * highest joinrel where the corresponding attribute of parent is + * needed. Hence it suffices to use the same Relids set for parent and + * child. + */ + for (attno = rel->min_attr; attno <= rel->max_attr; attno++) + { + int index = attno - rel->min_attr; + Relids attr_needed = rel->attr_needed[index]; + + /* System attributes do not need translation. */ + if (attno <= 0) + { + Assert(rel->min_attr == childrel->min_attr); + childrel->attr_needed[index] = attr_needed; + } + else + { + Var *var = list_nth_node(Var, + appinfo->translated_vars, + attno - 1); + int child_index; + + child_index = var->varattno - childrel->min_attr; + childrel->attr_needed[child_index] = attr_needed; + } + } + } + + /* + * Copy/Modify targetlist. Even if this child is deemed empty, we need + * its targetlist in case it falls on nullable side in a child-join + * because of partition-wise join. + * + * NB: the resulting childrel->reltarget->exprs may contain arbitrary + * expressions, which otherwise would not occur in a rel's targetlist. + * Code that might be looking at an appendrel child must cope with + * such. (Normally, a rel's targetlist would only include Vars and + * PlaceHolderVars.) XXX we do not bother to update the cost or width + * fields of childrel->reltarget; not clear if that would be useful. + */ + childrel->reltarget->exprs = (List *) + adjust_appendrel_attrs(root, + (Node *) rel->reltarget->exprs, + 1, &appinfo); + + /* + * We have to make child entries in the EquivalenceClass data + * structures as well. This is needed either if the parent + * participates in some eclass joins (because we will want to consider + * inner-indexscan joins on the individual children) or if the parent + * has useful pathkeys (because we should try to build MergeAppend + * paths that produce those sort orderings). Even if this child is + * deemed dummy, it may fall on nullable side in a child-join, which + * in turn may participate in a MergeAppend, where we will need the + * EquivalenceClass data structures. + */ + if (rel->has_eclass_joins || has_useful_pathkeys(root, rel)) + add_child_rel_equivalences(root, appinfo, rel, childrel); + childrel->has_eclass_joins = rel->has_eclass_joins; +} diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h index 945ac0239d..5f55550952 100644 --- a/src/include/catalog/partition.h +++ b/src/include/catalog/partition.h @@ -108,4 +108,9 @@ extern void check_default_allows_bound(Relation parent, Relation defaultRel, PartitionBoundSpec *new_spec); extern List *get_proposed_default_constraint(List *new_part_constaints); +/* For partition-pruning */ +void get_partitions_from_clauses(Relation relation, int rt_index, + List *partclauses, + int *min_part_idx, int *max_part_idx, + Bitmapset **other_parts); #endif /* PARTITION_H */ diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index e085cefb7b..ecf70a66c4 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -342,6 +342,10 @@ typedef struct PlannerInfo * partition bounds. Since partition key data types and the opclass declared * input data types are expected to be binary compatible (per ResolveOpClass), * both of those should have same byval and length properties. + * + * Since partitioning might be using a collation for a given partition key + * column that is not same as the collation implied by column's type, store + * the same separately. */ typedef struct PartitionSchemeData { @@ -349,7 +353,8 @@ typedef struct PartitionSchemeData int16 partnatts; /* number of partition attributes */ Oid *partopfamily; /* OIDs of operator families */ Oid *partopcintype; /* OIDs of opclass declared input data types */ - Oid *parttypcoll; /* OIDs of collations of partition keys. */ + Oid *parttypcoll; /* OIDs of partition key type collation. */ + Oid *partcollation; /* OIDs of partitioning collation */ /* Cached information about partition key data types. */ int16 *parttyplen; @@ -529,6 +534,7 @@ typedef struct PartitionSchemeData *PartitionScheme; * part_scheme - Partitioning scheme of the relation * boundinfo - Partition bounds * nparts - Number of partitions + * part_appinfos - AppendRelInfo of each partition * part_rels - RelOptInfos for each partition * partexprs, nullable_partexprs - Partition key expressions * @@ -657,10 +663,27 @@ typedef struct RelOptInfo PartitionScheme part_scheme; /* Partitioning scheme. */ int nparts; /* number of partitions */ struct PartitionBoundInfoData *boundinfo; /* Partition bounds */ - struct RelOptInfo **part_rels; /* Array of RelOptInfos of partitions, - * stored in the same order of bounds */ + struct AppendRelInfo **part_appinfos; /* Array of AppendRelInfos of + * of partitioned, stored in the + * same order as of bounds */ + struct RelOptInfo **part_rels; /* Array of RelOptInfos of *all* + * partitions, stored in the same order as + * of bounds */ List **partexprs; /* Non-nullable partition key expressions. */ List **nullable_partexprs; /* Nullable partition key expressions. */ + + + /* + * List of AppendRelInfo's of the table's partitions that survive a + * query's clauses. + */ + List *live_part_appinfos; + + /* + * RT indexes of live partitions that are partitioned tables themselves. + * This includes the RT index of the table itself. + */ + List *live_partitioned_rels; } RelOptInfo; /* diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index e9ed16ad32..c1f2fc93cd 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -296,5 +296,9 @@ extern RelOptInfo *build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, RelOptInfo *inner_rel, RelOptInfo *parent_joinrel, List *restrictlist, SpecialJoinInfo *sjinfo, JoinType jointype); +extern void set_basic_child_rel_properties(PlannerInfo *root, + RelOptInfo *rel, + RelOptInfo *childrel, + AppendRelInfo *appinfo); #endif /* PATHNODE_H */ -- 2.11.0