From ff9ccd8df6555cfca31e54e22293ac1613db327c Mon Sep 17 00:00:00 2001 From: amit Date: Wed, 13 Sep 2017 18:24:55 +0900 Subject: [PATCH 1/5] Some optimizer data structures for partitioned rels Nobody uses it though. --- src/backend/optimizer/util/plancat.c | 120 +++++++++++++++++++++++++++++++ src/backend/optimizer/util/relnode.c | 20 ++++++ src/include/nodes/nodes.h | 1 + src/include/nodes/relation.h | 135 +++++++++++++++++++++++++++++++++++ src/include/optimizer/plancat.h | 2 + 5 files changed, 278 insertions(+) diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index a1ebd4acc8..f7e3a1df5f 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -68,6 +68,8 @@ static List *get_relation_constraints(PlannerInfo *root, static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index, Relation heapRelation); static List *get_relation_statistics(RelOptInfo *rel, Relation relation); +static void get_relation_partition_info(PlannerInfo *root, RelOptInfo *rel, + Relation relation); /* * get_relation_info - @@ -420,6 +422,10 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, /* Collect info about relation's foreign keys, if relevant */ get_relation_foreign_keys(root, rel, relation, inhparent); + /* Collect partitioning info, if relevant. */ + if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + get_relation_partition_info(root, rel, relation); + heap_close(relation, NoLock); /* @@ -1802,3 +1808,117 @@ has_row_triggers(PlannerInfo *root, Index rti, CmdType event) heap_close(relation, NoLock); return result; } + +static void +get_relation_partition_info(PlannerInfo *root, RelOptInfo *rel, + Relation relation) +{ + int i; + ListCell *l; + PartitionKey key = RelationGetPartitionKey(relation); + PartitionDesc partdesc = RelationGetPartitionDesc(relation); + + rel->part_scheme = find_partition_scheme(root, relation); + rel->partexprs = (List **) palloc0(key->partnatts * sizeof(List *)); + + l = list_head(key->partexprs); + for (i = 0; i < key->partnatts; i++) + { + Expr *keyCol; + + if (key->partattrs[i] != 0) + { + keyCol = (Expr *) makeVar(rel->relid, + key->partattrs[i], + key->parttypid[i], + key->parttypmod[i], + key->parttypcoll[i], + 0); + } + else + { + if (l == NULL) + elog(ERROR, "wrong number of partition key expressions"); + keyCol = (Expr *) copyObject(lfirst(l)); + l = lnext(l); + } + + rel->partexprs[i] = list_make1(keyCol); + } + + /* Values are filled in build_simple_rel(). */ + rel->child_appinfos = (AppendRelInfo **) palloc0(partdesc->nparts * + sizeof(AppendRelInfo *)); + + /* + * A PartitionAppendInfo to map this table to its immediate partitions + * that will be scanned by this query. At the same time, it records the + * table's partitioning properties reflecting any partition-pruning that + * might occur to satisfy the query. Rest of the fields are set in + * get_rel_partitions() and set_append_rel_size(). + */ + rel->painfo = makeNode(PartitionAppendInfo); + rel->painfo->boundinfo = partdesc->boundinfo; +} + +/* + * find_partition_scheme + * + * The function returns a canonical partition scheme which exactly matches the + * partitioning scheme of the given relation if one exists in the list of + * canonical partitioning schemes maintained in PlannerInfo. If none of the + * existing partitioning schemes match, the function creates a canonical + * partition scheme and adds it to the list. + * + * For an unpartitioned table or for a multi-level partitioned table it returns + * NULL. See comments in the function for more details. + */ +PartitionScheme +find_partition_scheme(PlannerInfo *root, Relation relation) +{ + ListCell *lc; + PartitionKey key = RelationGetPartitionKey(relation); + char strategy = key->strategy; + int partnatts = key->partnatts; + PartitionScheme part_scheme = NULL; + + /* Search for a matching partition scheme and return if found one. */ + foreach(lc, root->partition_schemes) + { + part_scheme = lfirst(lc); + + /* Match various partitioning attributes. */ + if (strategy != part_scheme->strategy || + partnatts != part_scheme->partnatts || + memcmp(key->parttypid, part_scheme->parttypid, + sizeof(Oid) * partnatts) != 0 || + memcmp(key->parttypmod, part_scheme->parttypmod, + sizeof(int32) * partnatts) != 0 || + memcmp(key->partcollation, part_scheme->partcollation, + sizeof(Oid) * partnatts) != 0 || + memcmp(key->partopfamily, part_scheme->partopfamily, + sizeof(Oid) * partnatts) != 0 || + memcmp(key->partopcintype, part_scheme->partopcintype, + sizeof(Oid) * partnatts) != 0) + continue; + + /* Found a matching partition scheme. */ + return part_scheme; + } + + /* Did not find matching partition scheme. Create one. */ + part_scheme = (PartitionScheme) palloc0(sizeof(PartitionSchemeData)); + + part_scheme->strategy = strategy; + part_scheme->partnatts = partnatts; + part_scheme->parttypid = key->parttypid; + part_scheme->parttypmod = key->parttypmod; + part_scheme->partcollation = key->partcollation; + part_scheme->partopfamily = key->partopfamily; + part_scheme->partopcintype = key->partopcintype; + + /* Add the partitioning scheme to PlannerInfo. */ + root->partition_schemes = lappend(root->partition_schemes, part_scheme); + + return part_scheme; +} diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index c7b2695ebb..f0973b83b9 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -17,6 +17,7 @@ #include #include "miscadmin.h" +#include "catalog/pg_class.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" @@ -163,6 +164,11 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) else rel->top_parent_relids = NULL; + rel->child_appinfos = NULL; + rel->part_scheme = NULL; + rel->partexprs = NULL; + rel->painfo = NULL; + /* Check type of rtable entry */ switch (rte->rtekind) { @@ -218,7 +224,18 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) if (rte->inh) { ListCell *l; + AppendRelInfo **child_appinfos = NULL; + int i; + if (rte->relkind == RELKIND_PARTITIONED_TABLE) + { + Assert(rel->part_scheme != NULL); + Assert(rel->child_appinfos != NULL); + Assert(rel->painfo != NULL); + child_appinfos = rel->child_appinfos; + } + + i = 0; foreach(l, root->append_rel_list) { AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); @@ -229,6 +246,9 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) (void) build_simple_rel(root, appinfo->child_relid, rel); + + if (child_appinfos) + child_appinfos[i++] = appinfo; } } diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 27bd4f3363..63196a1211 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -261,6 +261,7 @@ typedef enum NodeTag T_SpecialJoinInfo, T_AppendRelInfo, T_PartitionedChildRelInfo, + T_PartitionAppendInfo, T_PlaceHolderInfo, T_MinMaxAggInfo, T_PlannerParamItem, diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index d50ff55681..0f4996b424 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -266,6 +266,8 @@ typedef struct PlannerInfo List *distinct_pathkeys; /* distinctClause pathkeys, if any */ List *sort_pathkeys; /* sortClause pathkeys, if any */ + List *partition_schemes; /* List of PartitionScheme objects. */ + List *initial_rels; /* RelOptInfos we are now trying to join */ /* Use fetch_upper_rel() to get any particular upper rel */ @@ -326,6 +328,48 @@ typedef struct PlannerInfo ((root)->simple_rte_array ? (root)->simple_rte_array[rti] : \ rt_fetch(rti, (root)->parse->rtable)) +/* + * Partitioning scheme + * Structure to hold partitioning scheme for a given relation. + * + * Multiple relations may be partitioned in the same way. The relations + * resulting from joining such relations may be partitioned in the same way as + * the joining relations. Similarly, relations derived from such relations by + * grouping, sorting may be partitioned in the same way as the underlying scan + * relations. All such relations partitioned in the same way share the + * partitioning scheme. + * + * PlannerInfo stores a list of distinct "canonical" partitioning schemes. + * RelOptInfo of a partitioned relation holds the pointer to "canonical" + * partitioning scheme. + * + * We store opclass declared input data types instead of partition key + * datatypes since those are the ones used to compare partition bounds instead + * of actual partition key data types. Since partition key data types and the + * opclass declared input data types are expected to be binary compatible (per + * ResolveOpClass()), both of those should have same byval and length + * properties. + * + * The structure caches information about partition key data type to be used + * while matching partition bounds. While comparing partition schemes we don't + * need to compare this information as it should be same when opclass declared + * input data types are same for two partitioned relations. + */ +typedef struct PartitionSchemeData +{ + char strategy; /* Partitioning strategy */ + int16 partnatts; /* Number of partitioning attributes */ + + /* The following arrays each have partnatts members. */ + Oid *parttypid; /* Type OIDs */ + int32 *parttypmod; /* Typemod values */ + Oid *partcollation; /* Partitioning collation */ + Oid *partopfamily; /* Operator family OIDs */ + Oid *partopcintype; /* Operator class-declared input type OIDs */ +} PartitionSchemeData; + +typedef struct PartitionSchemeData *PartitionScheme; + /*---------- * RelOptInfo @@ -515,6 +559,9 @@ typedef enum RelOptKind /* Is the given relation an "other" relation? */ #define IS_OTHER_REL(rel) ((rel)->reloptkind == RELOPT_OTHER_MEMBER_REL) +typedef struct AppendRelInfo AppendRelInfo; +typedef struct PartitionAppendInfo PartitionAppendInfo; + typedef struct RelOptInfo { NodeTag type; @@ -592,6 +639,42 @@ typedef struct RelOptInfo /* used by "other" relations */ Relids top_parent_relids; /* Relids of topmost parents */ + + /* Fields set for partitioned relations */ + + /* + * Information about the partitioning attributes, such as the number of + * attributes, arrays containing per-attribute type/tpymod, partitioning + * collation, operator family OIDs, etc. + */ + PartitionScheme part_scheme; + + /* + * Following contains the exact identities of the individual partitioning + * attributes. For example, if the attribute is a table's column, then + * it will be represented herein by a Var node for the same. This is + * structured as an array of Lists with part_scheme->partnatts members, + * with each list containing the expression(s) corresponding to the ith + * partitioning attribute (0 <= i < part_schem->partnatts) of this rel. + * For baserels, there is just a single expression in each slot (the ith + * list) of the array, because it corresponds to just one table. But for + * a joinrel, there will be as many expressions as there are tables + * involved in that joinrel. We have to do it that way, because in the + * joinrel case, the same corresponding partitioning attribute may have + * different identities in different tables involved in the join; for + * example, a Var node's varno will differ and so might varattnos. + */ + List **partexprs; + + /* AppendRelInfos of *all* partitions of the table. */ + AppendRelInfo **child_appinfos; + + /* + * For a partitioned relation, the following represents the identities + * of its live partition (their RT indexes) and some informations about + * the bounds that the live partitions satisfy. + */ + PartitionAppendInfo *painfo; } RelOptInfo; /* @@ -2031,6 +2114,58 @@ typedef struct PartitionedChildRelInfo List *child_rels; } PartitionedChildRelInfo; +/* Forward declarations, to avoid including other headers */ +typedef struct PartitionDispatchData *PartitionDispatch; +typedef struct PartitionBoundInfoData *PartitionBoundInfo; +typedef struct PartitionKeyData *PartitionKey; + +/* + * PartitionAppendInfo - Properties of partitions contained in the Append path + * of a given partitioned table + */ +typedef struct PartitionAppendInfo +{ + NodeTag type; + + /* + * List of AppendRelInfos of the table's partitions that satisfy a given + * query. + */ + List *live_partition_appinfos; + + /* + * RT indexes of live partitions that are partitioned tables themselves. + * This includes the RT index of the table itself. + */ + List *live_partitioned_rels; + + /* + * The following simply copies the pointer to boundinfo in the table's + * PartitionDesc. + */ + PartitionBoundInfo boundinfo; + + /* + * Indexes in the boundinfo->datums array of the smallest and the largest + * value of the partition key that the query allows. They are set by + * calling get_partitions_for_keys(). + */ + int min_datum_idx; + int max_datum_idx; + + /* + * Does this Append contain the null-accepting partition, if one exists + * and is allowed by the query's quals. + */ + bool contains_null_partition; + + /* + * Does this Append contain the default partition, if one exists and is + * allowed by the query's quals. + */ + bool contains_default_partition; +} PartitionAppendInfo; + /* * For each distinct placeholder expression generated during planning, we * store a PlaceHolderInfo node in the PlannerInfo node's placeholder_list. diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index 71f0faf938..c45db074c6 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -56,5 +56,7 @@ extern Selectivity join_selectivity(PlannerInfo *root, SpecialJoinInfo *sjinfo); extern bool has_row_triggers(PlannerInfo *root, Index rti, CmdType event); +extern PartitionScheme find_partition_scheme(PlannerInfo *root, + Relation relation); #endif /* PLANCAT_H */ -- 2.11.0