From 0e9903d90e4be2a3f76622d70129a62a3684f640 Mon Sep 17 00:00:00 2001 From: amit Date: Tue, 22 Aug 2017 13:48:13 +0900 Subject: [PATCH v36 3/4] Faster partition pruning This adds a new module partprune.c in the optimizer, which is meant as a replacement for using constraint exclusion to prune individual partitions. The new module performs partition pruning using the information contained in parent/partitioned table's boundinfo, after extracting clauses that involve partition keys. With the new module's functionality in place, set_append_rel_size() calls prune_append_rel_partitions() to get a Bitmapset of partitions that need to be scanned and processes only the partitions contained in the set. Authors: Amit Langote, David Rowley (david.rowley@2ndquadrant.com), Dilip Kumar (dilipbalaut@gmail.com) --- src/backend/catalog/partition.c | 869 +++++++++++++++++ src/backend/nodes/copyfuncs.c | 68 ++ src/backend/nodes/nodeFuncs.c | 51 + src/backend/optimizer/path/allpaths.c | 16 + src/backend/optimizer/util/Makefile | 2 +- src/backend/optimizer/util/partprune.c | 1252 +++++++++++++++++++++++++ src/backend/optimizer/util/plancat.c | 44 +- src/backend/optimizer/util/relnode.c | 8 + src/include/catalog/partition.h | 35 + src/include/catalog/pg_opfamily.h | 3 + src/include/nodes/nodes.h | 5 + src/include/nodes/primnodes.h | 48 + src/include/nodes/relation.h | 4 + src/include/optimizer/partprune.h | 25 + src/test/regress/expected/inherit.out | 10 +- src/test/regress/expected/partition_prune.out | 157 ++-- src/test/regress/sql/partition_prune.sql | 2 +- 17 files changed, 2487 insertions(+), 112 deletions(-) create mode 100644 src/backend/optimizer/util/partprune.c create mode 100644 src/include/optimizer/partprune.h diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index 786c05df73..251355c62f 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -193,6 +193,19 @@ static int get_greatest_modulus(PartitionBoundInfo b); static uint64 compute_hash_value(int partnatts, FmgrInfo *partsupfunc, Datum *values, bool *isnull); +static bool partkey_datum_from_expr(PartitionPruneContext *context, + int partkeyidx, int opstrategy, + Expr *expr, Datum *value); +static Bitmapset *perform_pruning_combine_step(PartitionPruneContext *context, + Bitmapset *srcparts, + PartitionPruneStepCombine *cstep); +static Bitmapset *get_partitions_for_null_keys(PartitionPruneContext *context, + Bitmapset *keyisnull); +static Bitmapset *get_partitions_for_keys(PartitionPruneContext *context, + int opstrategy, Datum *values, int nvalues); +static Bitmapset *get_partitions_excluded_by_ne_datums(PartitionPruneContext *context, + Datum *ne_datums, int n_ne_datums); + /* * RelationBuildPartitionDesc * Form rel's partition descriptor @@ -1560,9 +1573,865 @@ get_partition_qual_relid(Oid relid) return result; } +/* + * get_unpruned_partitions + * Determine partitions that survive partition pruning steps + * + * Returns a Bitmapset of the matching partition indexes, or NULL if none can + * match. + */ +Bitmapset * +get_unpruned_partitions(PartitionPruneContext *context, + List *pruning_steps) +{ + Bitmapset *result = bms_add_range(NULL, 0, context->nparts - 1); + ListCell *lc; + + foreach(lc, pruning_steps) + { + PartitionPruneStep *step = lfirst(lc); + + switch (nodeTag(step)) + { + case T_PartitionPruneStepNoop: + /* no-op */ + break; + + case T_PartitionPruneStepNullness: + { + PartitionPruneStepNullness *nstep = + (PartitionPruneStepNullness *) step; + + if (!bms_is_empty(nstep->keyisnull)) + { + Bitmapset *step_parts; + + step_parts = get_partitions_for_null_keys(context, + nstep->keyisnull); + result = bms_int_members(result, step_parts); + } + + if (!bms_is_empty(nstep->keyisnotnull)) + { + Bitmapset *step_parts; + + /* + * The following will select all partitions that contain + * non-null values. + */ + step_parts = get_partitions_for_keys(context, 0, + NULL, 0); + result = bms_int_members(result, step_parts); + } + + break; + } + + case T_PartitionPruneStepOp: + { + PartitionPruneStepOp *opstep = (PartitionPruneStepOp *) step; + Bitmapset *step_parts; + ListCell *lc1; + int keyno, + nvalues; + Datum values[PARTITION_MAX_KEYS]; + + keyno = nvalues = 0; + foreach(lc1, opstep->values) + { + Expr *expr = lfirst(lc1); + Datum datum; + + if (keyno > nvalues) + break; + + if (partkey_datum_from_expr(context, keyno, + opstep->opstrategy, + expr, &datum)) + { + values[nvalues++] = datum; + } + + keyno++; + } + + step_parts = get_partitions_for_keys(context, + opstep->opstrategy, + values, nvalues); + result = bms_int_members(result, step_parts); + break; + } + + case T_PartitionPruneStepCombine: + { + PartitionPruneStepCombine *cstep = + (PartitionPruneStepCombine *) step; + + result = perform_pruning_combine_step(context, result, cstep); + break; + } + + default: + break; + } + } + + return result; +} + /* Module-local functions */ /* + * partkey_datum_from_expr + * Set *value to the constant value obtained by evaluating 'expr' + * + * Note that we may not be able to evaluate the input expression, in which + * case, the function returns false to indicate that *value has not been + * set. True is returned otherwise. + */ +static bool +partkey_datum_from_expr(PartitionPruneContext *context, + int partkeyidx, int opstrategy, + Expr *expr, Datum *value) +{ + Oid exprTyp = exprType((Node *) expr); + + if (context->partopcintype[partkeyidx] != exprTyp) + { + Oid new_supfuncid; + int16 procnum; + + + procnum = (context->strategy == PARTITION_STRATEGY_HASH) + ? HASHEXTENDED_PROC + : BTORDER_PROC; + new_supfuncid = get_opfamily_proc(context->partopfamily[partkeyidx], + context->partopcintype[partkeyidx], + exprTyp, procnum); + fmgr_info(new_supfuncid, &context->partsupfunc[partkeyidx]); + } + + /* + * Add more expression types here as needed to support the requirements + * of the higher-level code. + */ + switch (nodeTag(expr)) + { + case T_Const: + *value = ((Const *) expr)->constvalue; + return true; + + default: + break; + } + + return false; +} + +static Bitmapset * +get_partitions_for_null_keys(PartitionPruneContext *context, + Bitmapset *keyisnull) +{ + PartitionBoundInfo boundinfo = context->boundinfo; + + switch (context->strategy) + { + case PARTITION_STRATEGY_HASH: + /* No pruning possible. */ + return bms_add_range(NULL, 0, context->nparts - 1); + + case PARTITION_STRATEGY_LIST: + /* + * NULLs may only exist in the NULL partition, or in the + * default, if there's no NULL partition. + */ + if (partition_bound_accepts_nulls(boundinfo)) + return bms_make_singleton(boundinfo->null_index); + else if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(boundinfo->default_index); + break; + + case PARTITION_STRATEGY_RANGE: + /* Only the default range partition accepts nulls. */ + if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(boundinfo->default_index); + break; + + default: + elog(ERROR, "invalid partition strategy"); + } + + /* Prune all partitions as no partition has nulls. */ + return NULL; +} + +/* + * get_partitions_for_keys + * Returns the index of partitions that will need to be scanned for the + * given look up keys + * + * Input: + * See the comments above the definition of PartScanKeyInfo to see what + * kind of information is contained in 'keys'. + * + * Outputs: + * Bitmapset containing indexes of the selected partitions + */ +static Bitmapset * +get_partitions_for_keys(PartitionPruneContext *context, + int opstrategy, Datum *values, int nvalues) +{ + FmgrInfo *partsupfunc = context->partsupfunc; + PartitionBoundInfo boundinfo = context->boundinfo; + int *partindices = boundinfo->indexes, + default_index = boundinfo->default_index; + Oid *partcollation = context->partcollation; + int partnatts = context->partnatts; + Bitmapset *result = NULL; + + switch (context->strategy) + { + case PARTITION_STRATEGY_HASH: + { + uint64 rowHash; + bool keyisnull[PARTITION_MAX_KEYS]; + int greatest_modulus, + result_index; + + /* + * In this case, can only do pruning if we know values for all + * the keys and they're all non-null. + */ + Assert(nvalues == context->partnatts); + greatest_modulus = get_greatest_modulus(boundinfo); + memset(keyisnull, false, nvalues * sizeof(bool)); + rowHash = compute_hash_value(partnatts, partsupfunc, values, + keyisnull); + result_index = partindices[rowHash % greatest_modulus]; + if (result_index >= 0) + return bms_make_singleton(result_index); + } + break; + + case PARTITION_STRATEGY_LIST: + { + int off, + minoff, + maxoff, + i; + bool is_equal; + bool inclusive = false; + + Assert(partnatts == 1); + + /* + * If there are no datums to compare keys with, but there are + * partitions, just return the default partition if one exists. + */ + if (boundinfo->ndatums == 0) + { + if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(default_index); + else + return NULL; /* shouldn't happen */ + } + + /* + * For range queries, always include the default list partition, + * because list partitions divide the key space in a discontinuous + * manner, not all values in the given range will have a partition + * assigned. This may not technically be true for some data types + * (e.g. integer types), however, we currently lack any sort of + * infrastructure to provide us with proofs that would allow us to + * do anything smarter here. + */ + if (partition_bound_has_default(boundinfo)) + result = bms_add_member(result, default_index); + + minoff = 0; + maxoff = boundinfo->ndatums - 1; + + if (nvalues == 0) + { + /* + * Add indexes of *all* partitions containing non-null + * values and return. + */ + for (i = minoff; i <= maxoff; i++) + result = bms_add_member(result, partindices[i]); + + return result; + } + + switch (opstrategy) + { + case BTEqualStrategyNumber: + off = partition_list_bsearch(partsupfunc, + partcollation, + boundinfo, values[0], + &is_equal); + if (off >= 0 && is_equal) + { + /* An exact matching datum exists. */ + Assert(partindices[off] >= 0); + return bms_make_singleton(partindices[off]); + } + else if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(default_index); + else + return NULL; + break; + + case BTGreaterEqualStrategyNumber: + inclusive = true; + case BTGreaterStrategyNumber: + off = partition_list_bsearch(partsupfunc, + partcollation, + boundinfo, values[0], + &is_equal); + if (off >= 0) + { + /* + * We don't want the matched datum to be in the + * result. + */ + if (!is_equal || !inclusive) + off++; + } + else + { + /* + * This case means all partition bounds are + * greater, which in turn means that all + * partition satisfy this key. + */ + off = 0; + } + + /* + * off is greater than the numbers of datums we have + * partitions for. The only possible partition that + * could contain a match is the default partition. + * Return that, if it exists. + */ + if (off > boundinfo->ndatums - 1) + return partition_bound_has_default(boundinfo) + ? bms_make_singleton(default_index) + : NULL; + + minoff = off; + break; + + case BTLessEqualStrategyNumber: + inclusive = true; + case BTLessStrategyNumber: + off = partition_list_bsearch(partsupfunc, + partcollation, + boundinfo, values[0], + &is_equal); + if (off >= 0 && is_equal && !inclusive) + off--; + + /* + * off is smaller than the datums of all non-default + * partitions, meaning there isn't one to return. + * Return the default partition if one exists. + */ + if (off < 0) + return partition_bound_has_default(boundinfo) + ? bms_make_singleton(default_index) + : NULL; + + maxoff = off; + break; + + default: + elog(ERROR, "invalid btree operator strategy"); + } + + /* Finally add the partition indexes. */ + for (i = minoff; i <= maxoff; i++) + result = bms_add_member(result, partindices[i]); + } + break; + + case PARTITION_STRATEGY_RANGE: + { + int off, + minoff, + maxoff, + i; + bool is_equal; + bool inclusive = false; + + /* + * If there are no datums to compare keys with, but there are + * partitions, just return the default partition if one exists. + */ + if (boundinfo->ndatums == 0) + { + if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(default_index); + else + return NULL; /* shouldn't happen */ + } + + minoff = 0; + maxoff = boundinfo->ndatums; + if (nvalues == 0) + { + /* + * Add indexes of *all* partitions containing non-null + * values and return. + */ + if (partindices[minoff] < 0) + minoff++; + if (partindices[maxoff] < 0) + maxoff--; + result = bms_add_range(result, + partindices[minoff], + partindices[maxoff]); + if (partition_bound_has_default(boundinfo)) + result = bms_add_member(result, default_index); + return result; + } + + switch (opstrategy) + { + case BTEqualStrategyNumber: + off = partition_range_datum_bsearch(partsupfunc, + partcollation, + boundinfo, + nvalues, values, + &is_equal); + + if (off >= 0 && is_equal) + { + if (nvalues == partnatts) + { + /* There can only be one partition. */ + if (partindices[off+1] >= 0) + return bms_make_singleton(partindices[off+1]); + else if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(default_index); + else + return NULL; + } + else + { + int saved_off = off; + + /* + * Matched a prefix of the partition bound at off. + */ + while (off >= 1 && off < boundinfo->ndatums - 1) + { + int32 cmpval; + + cmpval = + partition_rbound_datum_cmp(partsupfunc, + partcollation, + boundinfo->datums[off-1], + boundinfo->kind[off-1], + values, nvalues); + if (cmpval != 0) + break; + off--; + } + minoff = off; + off = saved_off; + while (off < boundinfo->ndatums - 1) + { + int32 cmpval; + + cmpval = + partition_rbound_datum_cmp(partsupfunc, + partcollation, + boundinfo->datums[off+1], + boundinfo->kind[off+1], + values, nvalues); + if (cmpval != 0) + break; + off++; + } + maxoff = off+1; + } + } + else if (off >= 0) + { + if (partindices[off+1] >= 0) + minoff = maxoff = off + 1; + else if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(default_index); + else + return NULL; + } + else if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(default_index); + else + return NULL; + + if (partindices[minoff] < 0 && + minoff < boundinfo->ndatums) + minoff++; + if (partindices[maxoff] < 0 && maxoff >= 1) + maxoff--; + break; + + case BTGreaterEqualStrategyNumber: + inclusive = true; + case BTGreaterStrategyNumber: + off = partition_range_datum_bsearch(partsupfunc, + partcollation, + boundinfo, + nvalues, values, + &is_equal); + + if (off < 0) + { + /* + * All partition bounds are greater than the key, so + * include all partitions in the result. + */ + off = 0; + } + else + { + if (is_equal && nvalues < partnatts) + { + /* + * Matched a prefix of the partition bound at off. + */ + while (off < boundinfo->ndatums - 1) + { + int32 cmpval; + int nextoff; + + nextoff = inclusive ? off - 1 : off + 1; + cmpval = + partition_rbound_datum_cmp(partsupfunc, + partcollation, + boundinfo->datums[nextoff], + boundinfo->kind[nextoff], + values, nvalues); + if (cmpval != 0) + { + if (!inclusive) + off++; + break; + } + off = nextoff; + } + } + else + off++; + } + + minoff = off; + break; + + case BTLessEqualStrategyNumber: + inclusive = true; + case BTLessStrategyNumber: + off = partition_range_datum_bsearch(partsupfunc, + partcollation, + boundinfo, + nvalues, values, + &is_equal); + + if (off >= 0) + { + /* + * Matched prefix of the partition bound at off. + */ + if (is_equal && nvalues < partnatts) + { + while (off < boundinfo->ndatums - 1) + { + int32 cmpval; + int nextoff; + + nextoff = inclusive ? off + 1 : off - 1; + cmpval = + partition_rbound_datum_cmp(partsupfunc, + partcollation, + boundinfo->datums[nextoff], + boundinfo->kind[nextoff], + values, nvalues); + if (cmpval != 0) + { + if (!inclusive) + off--; + break; + } + off = nextoff; + } + + off++; + } + else if (!is_equal || inclusive) + off++; + } + else + { + /* + * All partition bounds are greater than the key, so + * select none of the partitions, except the default. + */ + if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(default_index); + return NULL; + } + + maxoff = off; + break; + + default: + elog(ERROR, "invalid btree operator strategy"); + } + + Assert (minoff >= 0 && maxoff >= 0); + if (partindices[minoff] < 0) + { + int lastkey = nvalues - 1; + + if (minoff >=0 && minoff < boundinfo->ndatums && + boundinfo->kind[minoff][lastkey] == + PARTITION_RANGE_DATUM_VALUE && + partition_bound_has_default(boundinfo)) + result = bms_add_member(result, default_index); + + minoff++; + } + + if (maxoff >= 1 && partindices[maxoff] < 0) + { + int lastkey = nvalues - 1; + + if (maxoff >=0 && maxoff <= boundinfo->ndatums && + boundinfo->kind[maxoff - 1][lastkey] == + PARTITION_RANGE_DATUM_VALUE && + partition_bound_has_default(boundinfo)) + result = bms_add_member(result, default_index); + + maxoff--; + } + + if (minoff <= maxoff) + result = bms_add_range(result, + partindices[minoff], + partindices[maxoff]); + + if (partition_bound_has_default(boundinfo)) + { + /* + * Since partition keys with nulls are mapped to the default + * range partition, we must include the default partition if + * some keys could be null. + */ + if (nvalues < partnatts) + result = bms_add_member(result, default_index); + + /* + * There may exist a range of values unassigned to any + * non-default partition between the datums at minoff and + * maxoff. Add the default partition in that case. + */ + for (i = minoff; i <= maxoff; i++) + { + if (partindices[i] < 0) + return bms_add_member(result, default_index); + } + } + } + break; + + default: + result = NULL; + elog(ERROR, "unexpected partition strategy: %d", + context->strategy); + } + + return result; +} + +static Bitmapset * +perform_pruning_combine_step(PartitionPruneContext *context, + Bitmapset *srcparts, + PartitionPruneStepCombine *cstep) +{ + ListCell *lc; + Bitmapset *result = srcparts; + + switch (cstep->combineOp) + { + case COMBINE_OR: + { + Bitmapset *orparts = NULL; + + foreach(lc, cstep->argsteps) + { + PartitionPruneStep *step = lfirst(lc); + Bitmapset *argparts; + + argparts = get_unpruned_partitions(context, + list_make1(step)); + orparts = bms_add_members(orparts, argparts); + } + + result = bms_int_members(result, orparts); + break; + } + + case COMBINE_AND: + { + Bitmapset *andparts = NULL; + + foreach(lc, cstep->argsteps) + { + PartitionPruneStep *step = lfirst(lc); + Bitmapset *argparts; + + argparts = get_unpruned_partitions(context, + list_make1(step)); + andparts = andparts == NULL + ? argparts + : bms_int_members(andparts, argparts); + } + + result = bms_int_members(result, andparts); + break; + } + + case COMBINE_NOT: + { + Bitmapset *notparts; + Datum *ne_datums; + int n_ne_datums = list_length(cstep->argvalues), + i; + + ne_datums = (Datum *) palloc0(n_ne_datums * sizeof(Datum)); + i = 0; + foreach(lc, cstep->argvalues) + { + Expr *expr = lfirst(lc); + Datum datum; + + if (partkey_datum_from_expr(context, 0, BTEqualStrategyNumber, + expr, &datum)) + ne_datums[i++] = datum; + } + notparts = get_partitions_excluded_by_ne_datums(context, + ne_datums, + n_ne_datums); + result = bms_del_members(result, notparts); + break; + } + + default: + break; + } + + return result; +} + +/* + * get_partitions_excluded_by_ne_datums + * + * Returns a Bitmapset of partition indexes that can safely be removed due to + * the discovery of <> clauses for each datum value allowed in the partition. + */ +static Bitmapset * +get_partitions_excluded_by_ne_datums(PartitionPruneContext *context, + Datum *ne_datums, int n_ne_datums) +{ + FmgrInfo *partsupfunc = context->partsupfunc; + Oid *partcollation = context->partcollation; + int nparts = context->nparts, + i, + *datums_in_part, + *datums_found; + PartitionBoundInfo boundinfo = context->boundinfo; + Bitmapset *excluded_parts; + Bitmapset *foundoffsets = NULL; + + Assert(context->strategy == PARTITION_STRATEGY_LIST); + Assert(context->partnatts == 1); + + for (i = 0; i < n_ne_datums; i++) + { + int offset; + bool is_equal; + + offset = partition_list_bsearch(partsupfunc, partcollation, + boundinfo, + ne_datums[i], &is_equal); + if (offset >= 0 && is_equal) + { + Assert(boundinfo->indexes[offset] >= 0); + foundoffsets = bms_add_member(foundoffsets, offset); + } + } + + /* No partitions can be excluded if none of the datums were found. */ + if (bms_is_empty(foundoffsets)) + return NULL; + + /* + * Since each list partition can permit multiple values, we must ensure + * that we got clauses for all those values before we can eliminate the + * the entire partition. + * + * We'll need two arrays for this, one to count the number of unique + * datums found in the query which belong to each partition, and another + * to record the number of datums permitted in each partition. Once we've + * counted all this, we can eliminate any partition where the number of + * datums found matches the number of datums allowed in the partition. + */ + datums_in_part = (int *) palloc0(sizeof(int) * nparts); + datums_found = (int *) palloc0(sizeof(int) * nparts); + + i = -1; + while ((i = bms_next_member(foundoffsets, i)) >= 0) + datums_found[boundinfo->indexes[i]]++; + + /* + * Now, in a single pass over all the datums, count the number of datums + * permitted in each partition. + */ + for (i = 0; i < boundinfo->ndatums; i++) + datums_in_part[boundinfo->indexes[i]]++; + + /* + * Now compare the counts and eliminate any partition for which we found + * clauses for all its permitted values. We must be careful here not to + * eliminate the default partition. We can recognize that by it having a + * zero value in both arrays. + */ + excluded_parts = NULL; + + for (i = 0; i < nparts; i++) + { + if (datums_found[i] >= datums_in_part[i] && datums_found[i] > 0) + excluded_parts = bms_add_member(excluded_parts, i); + } + + /* + * Because the above clauses are strict, we can also exclude the NULL + * partition, provided it does not also allow non-NULL values. + */ + if (partition_bound_accepts_nulls(boundinfo) && + datums_in_part[boundinfo->null_index] == 0) + excluded_parts = bms_add_member(excluded_parts, + boundinfo->null_index); + + pfree(datums_in_part); + pfree(datums_found); + + return excluded_parts; +} + +/* * get_partition_operator * * Return oid of the operator of given strategy for a given partition key diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index f84da801c6..dd2974e0e3 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -2132,6 +2132,62 @@ _copyOnConflictExpr(const OnConflictExpr *from) return newnode; } +/* + * _copyPartitionPruneStepNoop + */ +static PartitionPruneStepNoop * +_copyPartitionPruneStepNoop(const PartitionPruneStepNoop *from) +{ + PartitionPruneStepNoop *newnode = makeNode(PartitionPruneStepNoop); + + /* Nothing to copy. */ + + return newnode; +} + +/* + * _copyPartitionPruneStepOp + */ +static PartitionPruneStepOp * +_copyPartitionPruneStepOp(const PartitionPruneStepOp *from) +{ + PartitionPruneStepOp *newnode = makeNode(PartitionPruneStepOp); + + COPY_SCALAR_FIELD(opstrategy); + COPY_NODE_FIELD(values); + + return newnode; +} + +/* + * _copyPartitionPruneStepNullness + */ +static PartitionPruneStepNullness * +_copyPartitionPruneStepNullness(const PartitionPruneStepNullness *from) +{ + PartitionPruneStepNullness *newnode = makeNode(PartitionPruneStepNullness); + + COPY_BITMAPSET_FIELD(keyisnull); + COPY_BITMAPSET_FIELD(keyisnotnull); + + return newnode; +} + +/* + * _copyPartitionPruneStepCombine + */ +static PartitionPruneStepCombine * +_copyPartitionPruneStepCombine(const PartitionPruneStepCombine *from) +{ + PartitionPruneStepCombine *newnode = makeNode(PartitionPruneStepCombine); + + COPY_SCALAR_FIELD(combineOp); + COPY_NODE_FIELD(argsteps); + COPY_NODE_FIELD(argvalues); + + return newnode; +} + /* **************************************************************** * relation.h copy functions * @@ -5022,6 +5078,18 @@ copyObjectImpl(const void *from) case T_OnConflictExpr: retval = _copyOnConflictExpr(from); break; + case T_PartitionPruneStepNoop: + retval = _copyPartitionPruneStepNoop(from); + break; + case T_PartitionPruneStepOp: + retval = _copyPartitionPruneStepOp(from); + break; + case T_PartitionPruneStepNullness: + retval = _copyPartitionPruneStepNullness(from); + break; + case T_PartitionPruneStepCombine: + retval = _copyPartitionPruneStepCombine(from); + break; /* * RELATION NODES diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index 6c76c41ebe..1b475a7395 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -2146,6 +2146,27 @@ expression_tree_walker(Node *node, return true; } break; + case T_PartitionPruneStepNoop: + /* No sub-structure. */ + return true; + case T_PartitionPruneStepOp: + { + PartitionPruneStepOp *opstep = (PartitionPruneStepOp *) node; + + if (walker((Node *) opstep->values, context)) + return true; + } + break; + case T_PartitionPruneStepCombine: + { + PartitionPruneStepCombine *cstep = (PartitionPruneStepCombine *) node; + + if (walker((Node *) cstep->argsteps, context)) + return true; + if (walker((Node *) cstep->argvalues, context)) + return true; + } + break; case T_JoinExpr: { JoinExpr *join = (JoinExpr *) node; @@ -2932,6 +2953,36 @@ expression_tree_mutator(Node *node, return (Node *) newnode; } break; + case T_PartitionPruneStepNoop: + { + PartitionPruneStepNoop *noopstep = (PartitionPruneStepNoop *) node; + PartitionPruneStepNoop *newnode; + + FLATCOPY(newnode, noopstep, PartitionPruneStepNoop); + + return (Node *) newnode; + } + case T_PartitionPruneStepOp: + { + PartitionPruneStepOp *opstep = (PartitionPruneStepOp *) node; + PartitionPruneStepOp *newnode; + + FLATCOPY(newnode, opstep, PartitionPruneStepOp); + MUTATE(newnode->values, opstep->values, List *); + + return (Node *) newnode; + } + case T_PartitionPruneStepCombine: + { + PartitionPruneStepCombine *cstep = ( PartitionPruneStepCombine *) node; + PartitionPruneStepCombine *newnode; + + FLATCOPY(newnode, cstep, PartitionPruneStepCombine); + MUTATE(newnode->argsteps, cstep->argsteps, List *); + MUTATE(newnode->argvalues, cstep->argvalues, List *); + + return (Node *) newnode; + } case T_JoinExpr: { JoinExpr *join = (JoinExpr *) node; diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 1c792a00eb..542c4a2bca 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -33,6 +33,7 @@ #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/geqo.h" +#include "optimizer/partprune.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/plancat.h" @@ -862,6 +863,7 @@ static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { + Relids live_children = NULL; int parentRTindex = rti; bool has_live_children; double parent_rows; @@ -875,6 +877,9 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Assert(IS_SIMPLE_REL(rel)); + if (rte->relkind == RELKIND_PARTITIONED_TABLE) + live_children = prune_append_rel_partitions(root, rel); + /* * Initialize to compute size estimates for whole append relation. * @@ -1123,6 +1128,17 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, continue; } + if (IS_PARTITIONED_REL(rel) && + !bms_is_member(appinfo->child_relid, live_children)) + { + /* + * This child need not be scanned, so we can omit it from the + * appendrel. + */ + set_dummy_rel_pathlist(childrel); + continue; + } + if (relation_excluded_by_constraints(root, childrel, childRTE)) { /* diff --git a/src/backend/optimizer/util/Makefile b/src/backend/optimizer/util/Makefile index c54d0a690d..aebd98875e 100644 --- a/src/backend/optimizer/util/Makefile +++ b/src/backend/optimizer/util/Makefile @@ -12,7 +12,7 @@ subdir = src/backend/optimizer/util top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = clauses.o joininfo.o orclauses.o pathnode.o placeholder.o \ +OBJS = clauses.o joininfo.o orclauses.o partprune.o pathnode.o placeholder.o \ plancat.o predtest.o relnode.o restrictinfo.o tlist.o var.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/optimizer/util/partprune.c b/src/backend/optimizer/util/partprune.c new file mode 100644 index 0000000000..475eccf765 --- /dev/null +++ b/src/backend/optimizer/util/partprune.c @@ -0,0 +1,1252 @@ +/*------------------------------------------------------------------------- + * + * partprune.c + * Provides functions to prune partitions of a partitioned table by + * comparing provided set of clauses with the table's partitions' + * boundaries + * + * + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/optimizer/util/partprune.c + * + *------------------------------------------------------------------------- +*/ + +#include "postgres.h" + +#include "access/hash.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_opfamily.h" +#include "catalog/pg_type.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/partprune.h" +#include "optimizer/planner.h" +#include "optimizer/predtest.h" +#include "optimizer/prep.h" +#include "parser/parse_coerce.h" +#include "parser/parsetree.h" +#include "rewrite/rewriteManip.h" +#include "utils/lsyscache.h" + +/* + * Information about a clause matched with a partition key. + */ +typedef struct PartClauseInfo +{ + int keyno; /* Partition key number (0 to partnatts - 1) */ + Oid opno; /* operator used to compare partkey to 'value' */ + Expr *value; /* The value the partition key is being compared to */ + + /* cached info. */ + int op_strategy; +} PartClauseInfo; + +typedef enum PartClauseMatchStatus +{ + PARTCLAUSE_NOMATCH, + PARTCLAUSE_MATCH_CLAUSE, + PARTCLAUSE_MATCH_NULLNESS, + PARTCLAUSE_MATCH_STEPS, + PARTCLAUSE_MATCH_CONTRADICT, + PARTCLAUSE_UNSUPPORTED +} PartClauseMatchStatus; + +static List *generate_partition_pruning_steps_internal( + PartitionPruneContext *context, + List *clauses, + bool *constfalse); +static PartClauseMatchStatus match_clause_to_partition_key( + PartitionPruneContext *context, + Expr *clause, Expr *partkey, int partkeyidx, + Bitmapset **keyisnull, Bitmapset **keyisnotnull, + PartClauseInfo **pc, List **clause_steps, + bool *is_neop_listp); +static bool match_boolean_partition_clause(Oid partopfamily, Expr *clause, + Expr *partkey, Expr **rightop); +static List *get_steps_using_prefix(PartClauseInfo *last, List *prefix); +static List *get_steps_using_prefix_recurse(PartClauseInfo *last, + List *prefix, + ListCell *start_in_prefix, + List *step_values); + +/* + * prune_append_rel_partitions + * Returns RT indexes of relations belonging to the minimum set of + * partitions which must be scanned to satisfy rel's baserestrictinfo + * quals. + */ +Relids +prune_append_rel_partitions(PlannerInfo *root, RelOptInfo *rel) +{ + Relids result = NULL; + List *clauses = rel->baserestrictinfo; + int i; + + if (clauses == NIL) + { + /* If there are no clauses then include every partition */ + for (i = 0; i < rel->nparts; i++) + result = bms_add_member(result, rel->part_rels[i]->relid); + } + else + { + PartitionPruneContext context; + List *pruning_steps; + bool constfalse; + int partnatts = rel->part_scheme->partnatts; + + /* Initiate partition pruning using clauses. */ + memset(&context, 0, sizeof(context)); + context.relid = rel->relid; + context.strategy = rel->part_scheme->strategy; + context.partnatts = partnatts; + + context.partkeys = (Expr **) palloc(sizeof(Expr *) * partnatts); + for (i = 0; i < partnatts; i++) + context.partkeys[i] = linitial(rel->partexprs[i]); + + context.partopfamily = rel->part_scheme->partopfamily; + context.partopcintype = rel->part_scheme->partopcintype; + context.partcollation = rel->part_scheme->partcollation; + context.partsupfunc = rel->part_scheme->partsupfunc; + context.nparts = rel->nparts; + context.boundinfo = rel->boundinfo; + context.has_default_part = rel->has_default_part; + context.partition_qual = rel->partition_qual; + + /* process clauses */ + pruning_steps = generate_partition_pruning_steps(&context, + clauses, + &constfalse); + + if (!constfalse) + { + /* Actual pruning happens here. */ + Bitmapset *partindexes; + + partindexes = get_unpruned_partitions(&context, pruning_steps); + + /* Add selected partitions' RT indexes to result. */ + i = -1; + while ((i = bms_next_member(partindexes, i)) >= 0) + result = bms_add_member(result, rel->part_rels[i]->relid); + } + } + + return result; +} + +/* + * generate_partition_pruning_steps + * Processes 'clauses' and returns a list of "partition pruning steps" + * + * If any of the clause in the input list is a pseudo-constant "false", + * *constfalse is set to true upon return. + */ +List * +generate_partition_pruning_steps(PartitionPruneContext *context, + List *clauses, + bool *constfalse) +{ + /* The clauses list may be modified below, so better make a copy. */ + clauses = list_copy(clauses); + + /* + * For sub-partitioned tables there's a corner case where if the + * sub-partitioned table shares any partition keys with its parent, + * then it's possible that the partitioning hierarchy allows the + * parent partition to only contain a narrower range of values than + * the sub-partitioned table does. In this case it is possible that + * we'd include partitions that could not possibly have any tuples + * matching 'clauses'. The possibility of such a partition + * arrangement is perhaps unlikely for non-default partitions, but + * it may be more likely in the case of default partitions, so we'll + * add the parent partition table's partition qual to the clause list + * in this case only. This may result in the default partition being + * eliminated. + */ + if (context->has_default_part && context->partition_qual != NIL) + { + List *partqual = context->partition_qual; + + partqual = (List *) expression_planner((Expr *) partqual); + + /* Fix Vars to have the desired varno */ + if (context->relid != 1) + ChangeVarNodes((Node *) partqual, 1, context->relid, 0); + + clauses = list_concat(clauses, partqual); + } + + /* Down into the rabbit-hole. */ + return generate_partition_pruning_steps_internal(context, clauses, + constfalse); +} + +/* Module-local functions */ + +/* + * generate_partition_pruning_steps_internal + * Processes 'clauses' to generate partition pruning steps. + * + * For each operator clause that's matched with a partition key, we generate + * a PartitionPruneStepOp containing relevant details of the operator and + * the expression whose value to use for comparison against partition bounds. + * + * If we encounter an OR clause, we generate a PartitionPruneStepCombine whose + * arguments are other partition pruning steps, each of which might be a + * PartitionPruneStepOp or another PartitionPruneStepCombine. + * + * If we find a RestrictInfo that's marked as pseudoconstant and contains a + * constant false value for clause, we stop generating any further steps and + * return NIL (no pruning steps) after setting *constfalse to true. + * + * Note: the 'clauses' List may be modified inside this function. Callers may + * like to make a copy of important lists before passing them to this + * function. + */ +static List * +generate_partition_pruning_steps_internal(PartitionPruneContext *context, + List *clauses, + bool *constfalse) +{ + List *keyclauses[PARTITION_MAX_KEYS], + *btree_clauses[BTMaxStrategyNumber], + *hash_clauses[HTMaxStrategyNumber], + *ne_clauses = NIL; + Bitmapset *keyisnull = NULL, + *keyisnotnull = NULL; + bool foundkeyclause = false; + bool need_next_key; + List *steps = NIL; + ListCell *lc; + int i; + + *constfalse = false; + memset(keyclauses, 0, sizeof(keyclauses)); + foreach(lc, clauses) + { + Expr *clause = (Expr *) lfirst(lc); + int i; + + if (IsA(clause, RestrictInfo)) + { + RestrictInfo *rinfo = (RestrictInfo *) clause; + + clause = rinfo->clause; + if (rinfo->pseudoconstant && + !DatumGetBool(((Const *) clause)->constvalue)) + { + *constfalse = true; + return NIL; + } + } + + /* Get the BoolExpr's out of the way.*/ + if (IsA(clause, BoolExpr)) + { + if (or_clause((Node *) clause)) + { + PartitionPruneStepCombine *combineStep; + List *all_arg_steps = NIL; + bool all_args_constfalse = true; + ListCell *lc1; + + /* Get pruning step for each arg. */ + foreach(lc1, ((BoolExpr *) clause)->args) + { + Expr *arg = lfirst(lc1); + bool arg_constfalse; + List *argsteps; + + argsteps = + generate_partition_pruning_steps_internal(context, + list_make1(arg), + &arg_constfalse); + if (!arg_constfalse) + all_args_constfalse = false; + if (argsteps != NIL) + { + if (list_length(argsteps) == 1) + all_arg_steps = lappend(all_arg_steps, + linitial(argsteps)); + else + { + PartitionPruneStepCombine *argcomb; + + /* Make a nested AND/OR combine step. */ + Assert(IsA(arg, BoolExpr)); + Assert(((BoolExpr *) arg)->boolop != NOT_EXPR); + argcomb = makeNode(PartitionPruneStepCombine); + if (((BoolExpr *) arg)->boolop == AND_EXPR) + argcomb->combineOp = COMBINE_AND; + else if (((BoolExpr *) arg)->boolop == OR_EXPR) + argcomb->combineOp = COMBINE_OR; + argcomb->argsteps = argsteps; + argcomb->argvalues = NIL; + + all_arg_steps = lappend(all_arg_steps, argcomb); + } + } + else + { + List *partconstr = context->partition_qual; + PartitionPruneStepNoop *noop; + + if (partconstr) + { + partconstr = (List *) + expression_planner((Expr *) partconstr); + if (context->relid != 1) + ChangeVarNodes((Node *) partconstr, 1, + context->relid, 0); + if (predicate_refuted_by(partconstr, + list_make1(arg), + false)) + continue; + } + + noop = makeNode(PartitionPruneStepNoop); + all_arg_steps = lappend(all_arg_steps, noop); + } + } + + *constfalse = all_args_constfalse; + if (*constfalse) + return NIL; + + combineStep = makeNode(PartitionPruneStepCombine); + combineStep->combineOp = COMBINE_OR; + combineStep->argsteps = all_arg_steps; + combineStep->argvalues = NIL; + steps = lappend(steps, combineStep); + continue; + } + else if (and_clause((Node *) clause)) + { + /* + * Queue its args to be processed later within the same + * invocation. + */ + clauses = list_concat(clauses, + list_copy(((BoolExpr *) clause)->args)); + continue; + } + + /* + * Fall-through for a NOT clause, which is handled in + * match_clause_to_partition_key(). + */ + } + + for (i = 0; i < context->partnatts; i++) + { + Expr *partkey = context->partkeys[i]; + bool is_neop_listp = false; + PartClauseInfo *pc = NULL; + List *clause_steps = NIL; + bool unsupported_clause = false; + + switch (match_clause_to_partition_key(context, clause, partkey, i, + &keyisnull, &keyisnotnull, + &pc, &clause_steps, + &is_neop_listp)) + { + case PARTCLAUSE_MATCH_CLAUSE: + foundkeyclause = true; + Assert(pc != NULL); + if (is_neop_listp) + ne_clauses = lappend(ne_clauses, pc); + else + keyclauses[i] = lappend(keyclauses[i], pc); + break; + + case PARTCLAUSE_MATCH_NULLNESS: + /* + * match_clause_to_partition_key() already set the values + * in keyisnull or keyisnotnull for us. + */ + foundkeyclause = true; + break; + + case PARTCLAUSE_MATCH_STEPS: + Assert(clause_steps != NIL); + steps = list_concat(steps, clause_steps); + break; + + case PARTCLAUSE_MATCH_CONTRADICT: + return NIL; + + case PARTCLAUSE_NOMATCH: + /* go check for the next key. */ + break; + + case PARTCLAUSE_UNSUPPORTED: + unsupported_clause = true; + break; + + default: + break; + } + + /* go check the next clause. */ + if (unsupported_clause) + break; + } + } + + /* There were nothing but combining steps in the clauses we got. */ + if (!foundkeyclause) + return steps; + + /* + * Generate PartitionPruneStepOp nodes from the clauses in keyclauses + * lists. + */ + + /* + * Group clauses according to the operator strategies, generating one list + * for each partitioning operator strategy. + */ + need_next_key = true; + memset(btree_clauses, 0, sizeof(btree_clauses)); + memset(hash_clauses, 0, sizeof(hash_clauses)); + for (i = 0; i < context->partnatts; i++) + { + bool need_cur_less = true, + need_cur_eq = true, + need_cur_greater = true; + List *clauselist = keyclauses[i]; + + if (!need_next_key || clauselist == NIL) + break; + + /* + * Check whether we need this key's clauses. Basically, we don't if + * we didn't find a requisite clause for adjacently previous column. + */ + switch (context->strategy) + { + case PARTITION_STRATEGY_LIST: + case PARTITION_STRATEGY_RANGE: + { + int j; + + for (j = 0; j < BTMaxStrategyNumber; j++) + { + if (btree_clauses[j] != NIL) + { + PartClauseInfo *last = llast(btree_clauses[j]); + + switch (last->op_strategy) + { + case BTLessStrategyNumber: + case BTLessEqualStrategyNumber: + if (i > last->keyno + 1) + need_cur_less = false; + break; + case BTEqualStrategyNumber: + if (i > last->keyno + 1) + need_cur_eq = false; + break; + case BTGreaterStrategyNumber: + case BTGreaterEqualStrategyNumber: + if (i > last->keyno + 1) + need_cur_greater = false; + break; + } + } + } + + break; + } + + case PARTITION_STRATEGY_HASH: + { + if (hash_clauses[HTEqualStrategyNumber - 1] != NIL) + { + PartClauseInfo *last; + + last = llast(hash_clauses[HTEqualStrategyNumber - 1]); + if (i > last->keyno + 1) + need_cur_eq = false; + } + break; + } + + default: + break; + } + + if (clauselist == NIL || + (!need_cur_less && !need_cur_eq && !need_cur_greater)) + break; + + foreach(lc, clauselist) + { + PartClauseInfo *pc = (PartClauseInfo *) lfirst(lc); + Oid lefttype, + righttype; + + /* Look up the operator's btree/hash strategy number. */ + if (pc->op_strategy == InvalidStrategy) + get_op_opfamily_properties(pc->opno, + context->partopfamily[i], + false, + &pc->op_strategy, + &lefttype, + &righttype); + + switch (context->strategy) + { + case PARTITION_STRATEGY_LIST: + case PARTITION_STRATEGY_RANGE: + { + bool need_cur_clause = true, + inclusive = false; + + switch (pc->op_strategy) + { + case BTLessEqualStrategyNumber: + inclusive = true; + case BTLessStrategyNumber: + need_cur_clause = need_cur_less; + if (!inclusive) + need_next_key = false; + break; + case BTEqualStrategyNumber: + need_cur_clause = need_cur_eq; + break; + case BTGreaterEqualStrategyNumber: + inclusive = true; + case BTGreaterStrategyNumber: + need_cur_clause = need_cur_greater; + if (!inclusive) + need_next_key = false; + break; + } + + if (need_cur_clause) + btree_clauses[pc->op_strategy - 1] = + lappend(btree_clauses[pc->op_strategy - 1], pc); + break; + } + + case PARTITION_STRATEGY_HASH: + if (pc->op_strategy != HTEqualStrategyNumber) + elog(ERROR, "invalid clause for hash partitioning"); + if (need_cur_eq) + hash_clauses[pc->op_strategy - 1] = + lappend(hash_clauses[pc->op_strategy - 1], pc); + break; + + default: + break; + } + } + } + + /* + * If we didn't find clauses for all partition columns in the hash + * partitioning case, give up on pruning. + */ + if (context->strategy == PARTITION_STRATEGY_HASH && + i < context->partnatts) + return NIL; + + /* + * Generate actual steps for various operator strategies by generating + * tuples of values, possibly multiple per operator strategy. + */ + switch (context->strategy) + { + case PARTITION_STRATEGY_LIST: + case PARTITION_STRATEGY_RANGE: + { + List *eq_clauses = btree_clauses[BTEqualStrategyNumber - 1]; + List *le_clauses = btree_clauses[BTLessEqualStrategyNumber - 1]; + List *ge_clauses = btree_clauses[BTGreaterEqualStrategyNumber - 1]; + + /* + * For each non-equality strategy, generate tuples of values such + * that each tuple's non-last values come from an equality clause. + */ + for (i = 0; i < BTMaxStrategyNumber; i++) + { + PartClauseInfo *pc; + List *pc_steps; + + foreach(lc, btree_clauses[i]) + { + ListCell *lc1; + List *prefix = NIL; + + pc = lfirst(lc); + foreach(lc1, eq_clauses) + { + PartClauseInfo *eqpc = lfirst(lc1); + + if (eqpc->keyno == pc->keyno) + break; + if (eqpc->keyno < pc->keyno) + prefix = lappend(prefix, eqpc); + } + + if (i == BTLessStrategyNumber - 1 || + i == BTLessEqualStrategyNumber - 1) + { + foreach(lc1, le_clauses) + { + PartClauseInfo *lepc = lfirst(lc1); + + if (lepc->keyno == pc->keyno) + break; + if (lepc->keyno < pc->keyno) + prefix = lappend(prefix, lepc); + } + } + + if (i == BTGreaterStrategyNumber - 1 || + i == BTGreaterEqualStrategyNumber - 1) + { + foreach(lc1, ge_clauses) + { + PartClauseInfo *gepc = lfirst(lc1); + + if (gepc->keyno == pc->keyno) + break; + if (gepc->keyno < pc->keyno) + prefix = lappend(prefix, gepc); + } + } + + if (prefix == NIL && pc->keyno > 0) + continue; + + /* + * Considering pc->value as the last value in the pruning + * tuple, try to generate pruning steps for tuples + * containing various combinations of values for earlier + * columns from the clauses in prefix. + */ + pc_steps = get_steps_using_prefix(pc, prefix); + steps = list_concat(steps, list_copy(pc_steps)); + } + } + break; + } + + case PARTITION_STRATEGY_HASH: + { + List *eq_clauses = hash_clauses[HTEqualStrategyNumber - 1]; + List *pc_steps; + + foreach(lc, eq_clauses) + { + PartClauseInfo *pc = lfirst(lc); + List *prefix = NIL; + ListCell *lc1; + + /* Skip to the last column. */ + if (pc->keyno < context->partnatts - 1) + continue; + + foreach(lc1, eq_clauses) + { + PartClauseInfo *eqpc = lfirst(lc1); + + if (eqpc->keyno == pc->keyno) + break; + prefix = lappend(prefix, eqpc); + } + + pc_steps = get_steps_using_prefix(pc, prefix); + steps = list_concat(steps, list_copy(pc_steps)); + } + break; + } + + default: + break; + } + + /* Combine values from all <> operator clauses into one prune step. */ + if (ne_clauses != NIL) + { + List *argvalues = NIL; + PartitionPruneStepCombine *combineStep; + + Assert(context->strategy == PARTITION_STRATEGY_LIST); + foreach(lc, ne_clauses) + { + PartClauseInfo *pc = lfirst(lc); + + argvalues = lappend(argvalues, pc->value); + } + + combineStep = makeNode(PartitionPruneStepCombine); + combineStep->combineOp = COMBINE_NOT; + combineStep->argsteps = NIL; + combineStep->argvalues = argvalues; + steps = lappend(steps, combineStep); + } + + /* + * Generate one prune step for the information derived from IS NULL and + * IS NOT NULL clauses + */ + if (!bms_is_empty(keyisnull) || !bms_is_empty(keyisnotnull)) + { + PartitionPruneStepNullness *nstep; + + nstep = makeNode(PartitionPruneStepNullness); + nstep->keyisnull = keyisnull; + nstep->keyisnotnull = keyisnotnull; + steps = lappend(steps, nstep); + } + + return steps; +} + +/* + * If the partition key has a collation, then the clause must have the same + * input collation. If the partition key is non-collatable, we assume the + * collation doesn't matter, because while collation wasn't considered when + * performing partitioning, the clause still may have a collation assigned + * due to the other input being of a collatable type. + */ +#define PartCollMatchesExprColl(partcoll, exprcoll) \ + ((partcoll) == InvalidOid || (partcoll) == (exprcoll)) + +/* + * match_clause_to_partition_key + * Match a given clause with the specified partition key + * + * A word on the outputs this produces. *pc will contain PartClauseInfo for + * this clause if it is successfully selected for pruning, if the clause is + * a simple operator clause or becomes one after we recognize the clause as + * a specially-shaped Boolean clause. For clauses that come in the form of + * a ScalarArrayOpExpr, we don't generate a PartClauseInfo, but rather + * recursively generate pruning steps for the values contained therein. + * *is_neop_listp is set if the clause contains a <> operator whose negator + * is a btree equality operator and list partitioning is in use. + * + */ +static PartClauseMatchStatus +match_clause_to_partition_key(PartitionPruneContext *context, + Expr *clause, Expr *partkey, int partkeyidx, + Bitmapset **keyisnull, Bitmapset **keyisnotnull, + PartClauseInfo **pc, List **clause_steps, + bool *is_neop_listp) +{ + Expr *value; + Oid partopfamily = context->partopfamily[partkeyidx], + partcoll = context->partcollation[partkeyidx]; + + /* + * Recognize specially shaped clauses that match with the Boolean + * partition key. + */ + if (match_boolean_partition_clause(partopfamily, clause, partkey, &value)) + { + *pc = palloc0(sizeof(PartClauseInfo)); + (*pc)->keyno = partkeyidx; + /* Do pruning with the Boolean equality operator. */ + (*pc)->opno = BooleanEqualOperator; + (*pc)->value = value; + + return PARTCLAUSE_MATCH_CLAUSE; + } + else if (IsA(clause, OpExpr) && list_length(((OpExpr *) clause)->args) == 2) + { + OpExpr *opclause = (OpExpr *) clause; + Expr *leftop, + *rightop; + Oid commutator = InvalidOid, + negator = InvalidOid; + + leftop = (Expr *) get_leftop(clause); + if (IsA(leftop, RelabelType)) + leftop = ((RelabelType *) leftop)->arg; + rightop = (Expr *) get_rightop(clause); + if (IsA(rightop, RelabelType)) + rightop = ((RelabelType *) rightop)->arg; + + /* check if the clause matches this partition key */ + if (equal(leftop, partkey)) + value = rightop; + else if (equal(rightop, partkey)) + { + value = leftop; + commutator = get_commutator(opclause->opno); + + /* nothing we can do unless we can swap the operands */ + if (!OidIsValid(commutator)) + return PARTCLAUSE_UNSUPPORTED; + } + else + /* clause does not match this partition key, but perhaps next. */ + return PARTCLAUSE_NOMATCH; + + /* + * Partition key also consists of a collation that's specified + * for it, so try to match it too. There may be multiple keys + * with the same expression but different collations. + */ + if (!PartCollMatchesExprColl(partcoll, opclause->inputcollid)) + return PARTCLAUSE_NOMATCH; + + /* + * Matched with this key. Now check various properties of + * the clause to see if it's sane to use it for pruning. If + * any of the properties makes it unsuitable for pruning, then + * the clause is useless no matter which key it's matched to. + */ + + /* Only allow strict operators. This will guarantee nulls are filtered. */ + if (!op_strict(opclause->opno)) + return PARTCLAUSE_UNSUPPORTED; + + /* We can't use any volatile value to prune partitions. */ + if (contain_volatile_functions((Node *) value)) + return PARTCLAUSE_UNSUPPORTED; + + /* + * Normally we only bother with operators that are listed as being + * part of the partitioning operator family. But we make an exception + * in one case -- operators named '<>' are not listed in any operator + * family whatsoever, in which case, we try to perform partition + * pruning with it only if list partitioning is in use. + */ + if (!op_in_opfamily(opclause->opno, partopfamily)) + { + if (context->strategy != PARTITION_STRATEGY_LIST) + return PARTCLAUSE_UNSUPPORTED; + + /* + * To confirm if the operator is really '<>', check if its negator is + * a btree equality operator. + */ + negator = get_negator(opclause->opno); + if (OidIsValid(negator) && op_in_opfamily(negator, partopfamily)) + { + Oid lefttype; + Oid righttype; + int strategy; + + get_op_opfamily_properties(negator, partopfamily, false, + &strategy, &lefttype, &righttype); + + if (strategy == BTEqualStrategyNumber) + *is_neop_listp = true; + } + + /* Operator isn't really what we were hoping it'd be. */ + if (!*is_neop_listp) + return PARTCLAUSE_UNSUPPORTED; + } + + /* + * Since we only allow strict operators, check for any + * contradicting IS NULLs. + */ + if (bms_is_member(partkeyidx, *keyisnull)) + return PARTCLAUSE_MATCH_CONTRADICT; + + if (*is_neop_listp) + { + Assert(OidIsValid(negator)); + opclause = copyObject(opclause); + opclause->opno = negator; + } + + *pc = palloc0(sizeof(PartClauseInfo)); + (*pc)->keyno = partkeyidx; + + /* + * If commuted before matching with the key, switch the + * clause's operator to the commutator. + */ + if (OidIsValid(commutator)) + (*pc)->opno = commutator; + else + (*pc)->opno = opclause->opno; + (*pc)->value = value; + + return PARTCLAUSE_MATCH_CLAUSE; + } + else if (IsA(clause, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; + Oid saop_op = saop->opno; + Oid saop_coll = saop->inputcollid; + Expr *leftop = (Expr *) linitial(saop->args), + *rightop = (Expr *) lsecond(saop->args); + List *elem_exprs, + *elem_clauses; + ListCell *lc1; + + if (IsA(leftop, RelabelType)) + leftop = ((RelabelType *) leftop)->arg; + + /* Check it matches this partition key */ + if (!equal(leftop, partkey) || + !PartCollMatchesExprColl(partcoll, saop->inputcollid)) + return PARTCLAUSE_NOMATCH; + + /* + * Matched with this key. Check various properties of the + * clause to see if it can sanely be used for partition + * pruning. + */ + + /* + * Only allow strict operators. This will guarantee nulls are + * filtered. + */ + if (!op_strict(saop->opno)) + return PARTCLAUSE_UNSUPPORTED; + + /* Useless if the array has any volatile functions. */ + if (contain_volatile_functions((Node *) rightop)) + return PARTCLAUSE_UNSUPPORTED; + + /* + * In case of NOT IN (..), we get a '<>', which we handle if + * list partitioning is in use and we're able to confirm that + * it's negator is a btree equality operator belonging to the + * partitioning operator family. + */ + if (!op_in_opfamily(saop_op, partopfamily)) + { + Oid negator; + + if (context->strategy != PARTITION_STRATEGY_LIST) + return PARTCLAUSE_UNSUPPORTED; + + negator = get_negator(saop_op); + if (OidIsValid(negator) && op_in_opfamily(negator, partopfamily)) + { + int strategy; + Oid lefttype, + righttype; + + get_op_opfamily_properties(negator, partopfamily, + false, &strategy, + &lefttype, &righttype); + if (strategy != BTEqualStrategyNumber) + return PARTCLAUSE_UNSUPPORTED; + } + } + + /* + * First generate a list of Const nodes, one for each array element. + */ + elem_exprs = NIL; + if (IsA(rightop, Const)) + { + Const *arr = (Const *) lsecond(saop->args); + ArrayType *arrval = DatumGetArrayTypeP(arr->constvalue); + int16 elemlen; + bool elembyval; + char elemalign; + Datum *elem_values; + bool *elem_nulls; + int num_elems, + i; + + get_typlenbyvalalign(ARR_ELEMTYPE(arrval), + &elemlen, &elembyval, &elemalign); + deconstruct_array(arrval, + ARR_ELEMTYPE(arrval), + elemlen, elembyval, elemalign, + &elem_values, &elem_nulls, + &num_elems); + for (i = 0; i < num_elems; i++) + { + /* Only consider non-null values. */ + if (!elem_nulls[i]) + { + Const *elem_expr = makeConst(ARR_ELEMTYPE(arrval), + -1, arr->constcollid, + elemlen, + elem_values[i], + false, elembyval); + + elem_exprs = lappend(elem_exprs, elem_expr); + } + } + } + else + { + ArrayExpr *arrexpr = castNode(ArrayExpr, rightop); + + /* + * For a nested ArrayExpr, we don't know how to get the + * actual scalar values out into a flat list, so we give + * up doing anything with this ScalarArrayOpExpr. + */ + if (arrexpr->multidims) + return PARTCLAUSE_UNSUPPORTED; + + elem_exprs = arrexpr->elements; + } + + /* + * Now generate a list of clauses, one for each array element, + * of the form: saop_leftop saop_op elem_expr + */ + elem_clauses = NIL; + foreach(lc1, elem_exprs) + { + Expr *rightop = (Expr *) lfirst(lc1), + *elem_clause; + + elem_clause = (Expr *) make_opclause(saop_op, BOOLOID, + false, + leftop, rightop, + InvalidOid, + saop_coll); + elem_clauses = lappend(elem_clauses, elem_clause); + } + + /* + * Build a combine step as if for an OR clause or add the + * clauses to the end of the list that's being processed + * currently. + */ + if (saop->useOr && list_length(elem_clauses) > 1) + { + Expr *orexpr; + bool constfalse; + + orexpr = makeBoolExpr(OR_EXPR, elem_clauses, -1); + *clause_steps = generate_partition_pruning_steps_internal(context, + list_make1(orexpr), + &constfalse); + if (constfalse) + return PARTCLAUSE_MATCH_CONTRADICT; + Assert(list_length(*clause_steps) == 1); + return PARTCLAUSE_MATCH_STEPS; + } + else + { + bool constfalse; + + *clause_steps = generate_partition_pruning_steps_internal(context, + elem_clauses, + &constfalse); + if (constfalse) + return PARTCLAUSE_MATCH_CONTRADICT; + Assert(list_length(*clause_steps) >= 1); + return PARTCLAUSE_MATCH_STEPS; + } + } + else if (IsA(clause, NullTest)) + { + NullTest *nulltest = (NullTest *) clause; + Expr *arg = nulltest->arg; + + if (IsA(arg, RelabelType)) + arg = ((RelabelType *) arg)->arg; + + /* Does arg match with this partition key column? */ + if (!equal(arg, partkey)) + return PARTCLAUSE_NOMATCH; + + if (nulltest->nulltesttype == IS_NULL) + { + /* check for conflicting IS NOT NULLs */ + if (bms_is_member(partkeyidx, *keyisnotnull)) + return PARTCLAUSE_MATCH_CONTRADICT; + + *keyisnull = bms_add_member(*keyisnull, partkeyidx); + } + else + { + /* check for conflicting IS NULLs */ + if (bms_is_member(partkeyidx, *keyisnull)) + return PARTCLAUSE_MATCH_CONTRADICT; + + *keyisnotnull = bms_add_member(*keyisnotnull, partkeyidx); + } + + return PARTCLAUSE_MATCH_NULLNESS; + } + + return PARTCLAUSE_UNSUPPORTED; +} + +/* + * match_boolean_partition_clause + * + * Sets *rightop to a Const containing true or false value and returns true if + * we're able to match the clause to the partition key as specially-shaped + * Boolean clause. Returns false otherwise with *rightop set to NULL. + */ +static bool +match_boolean_partition_clause(Oid partopfamily, Expr *clause, Expr *partkey, + Expr **rightop) +{ + Expr *leftop; + + if (!IsBooleanOpfamily(partopfamily)) + return false; + + *rightop = NULL; + if (IsA(clause, BooleanTest)) + { + BooleanTest *btest = (BooleanTest *) clause; + + /* Only IS [NOT] TRUE/FALSE are any good to us */ + if (btest->booltesttype == IS_UNKNOWN || + btest->booltesttype == IS_NOT_UNKNOWN) + return false; + + leftop = btest->arg; + if (IsA(leftop, RelabelType)) + leftop = ((RelabelType *) leftop)->arg; + + if (equal(leftop, partkey)) + *rightop = (btest->booltesttype == IS_TRUE || + btest->booltesttype == IS_NOT_FALSE) + ? (Expr *) makeBoolConst(true, false) + : (Expr *) makeBoolConst(false, false); + + if (*rightop) + return true; + } + else + { + leftop = not_clause((Node *) clause) + ? get_notclausearg(clause) + : clause; + + if (IsA(leftop, RelabelType)) + leftop = ((RelabelType *) leftop)->arg; + + /* Clause does not match this partition key. */ + if (equal(leftop, partkey)) + *rightop = not_clause((Node *) clause) + ? (Expr *) makeBoolConst(false, false) + : (Expr *) makeBoolConst(true, false); + else if (equal(negate_clause((Node *) leftop), partkey)) + *rightop = (Expr *) makeBoolConst(false, false); + + if (*rightop) + return true; + } + + return false; +} + +/* + * Recursively generate tuples and subsequently a PartitionPruneStepOp for + * each tuple. + * + * Example: Consider a partition key named (a, b, c) and a set of mutually + * AND'd clauses a <= 1 and a <= 2 and b <= 3 and b <= 4 and c = 2. If the + * caller passed c = 2 as 'last', 'prefix' should contain a <= 1, a <= 2, + * b <= 1 and b <= 2. Pruning steps containing = operator (from c = 2) that + * will be generated as a result will contain following tuples respectively: + * (1, 3, 2), (1, 4, 2), (2, 3, 2), and (2, 4, 2). + */ +static List * +get_steps_using_prefix(PartClauseInfo *last, List *prefix) +{ + /* Quick exit if there are no values to prefix last's value with. */ + if (list_length(prefix) == 0) + { + PartitionPruneStepOp *step = makeNode(PartitionPruneStepOp); + + step->opstrategy = last->op_strategy; + step->values = list_make1(last->value); + + return list_make1(step); + } + + return get_steps_using_prefix_recurse(last, prefix, list_head(prefix), + NIL); +} + +static List * +get_steps_using_prefix_recurse(PartClauseInfo *last, + List *prefix, + ListCell *start_in_prefix, + List *step_values) +{ + List *result = NIL; + ListCell *lc; + int step_keyno; + + Assert(start_in_prefix != NULL); + step_keyno = ((PartClauseInfo *) lfirst(start_in_prefix))->keyno; + if (step_keyno == last->keyno - 1) + { + /* + * Recursion ends here. We generate pruning steps here by + * finalizing the step_values list. + */ + Assert(list_length(step_values) == step_keyno); + for_each_cell(lc, start_in_prefix) + { + PartClauseInfo *prefix_pc = lfirst(lc); + PartitionPruneStepOp *step; + List *step_values1; + + if (prefix_pc->keyno > step_keyno) + break; + + step_values1 = list_copy(step_values); + step_values1 = lappend(step_values1, prefix_pc->value); + step_values1 = lappend(step_values1, last->value); + step = makeNode(PartitionPruneStepOp); + step->opstrategy = last->op_strategy; + step->values = step_values1; + result = lappend(result, step); + } + } + else + { + PartClauseInfo *pc; + ListCell *next_start_in_prefix; + + for_each_cell(lc, start_in_prefix) + { + pc = lfirst(lc); + + if (pc->keyno > step_keyno) + break; + } + next_start_in_prefix = lc; + + for_each_cell(lc, start_in_prefix) + { + pc = lfirst(lc); + if (pc->keyno == 0) + { + /* Start recursion for a new keyno == 0 value */ + list_free(step_values); + step_values = list_make1(pc->value); + } + else if (pc->keyno == step_keyno) + step_values = lappend(step_values, pc->value); + else + break; + + result = list_concat(result, + list_copy(get_steps_using_prefix_recurse(last, + prefix, + next_start_in_prefix, + step_values))); + } + } + + return result; +} diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 709a00924e..e272c445bf 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -1171,7 +1171,6 @@ get_relation_constraints(PlannerInfo *root, Index varno = rel->relid; Relation relation; TupleConstr *constr; - List *pcqual; /* * We assume the relation has already been safely locked. @@ -1257,24 +1256,34 @@ get_relation_constraints(PlannerInfo *root, } } - /* Append partition predicates, if any */ - pcqual = RelationGetPartitionQual(relation); - if (pcqual) + /* + * Append partition predicates, if any. + * + * For selects, partition pruning uses the parent table's partition bound + * descriptor, instead of constraint exclusion which is driven by the + * individual partition's partition constraint. + */ + if (root->parse->commandType != CMD_SELECT) { - /* - * Run the partition quals through const-simplification similar to - * check constraints. We skip canonicalize_qual, though, because - * partition quals should be in canonical form already; also, since - * the qual is in implicit-AND format, we'd have to explicitly convert - * it to explicit-AND format and back again. - */ - pcqual = (List *) eval_const_expressions(root, (Node *) pcqual); + List *pcqual = RelationGetPartitionQual(relation); - /* Fix Vars to have the desired varno */ - if (varno != 1) - ChangeVarNodes((Node *) pcqual, 1, varno, 0); + if (pcqual) + { + /* + * Run the partition quals through const-simplification similar to + * check constraints. We skip canonicalize_qual, though, because + * partition quals should be in canonical form already; also, + * since the qual is in implicit-AND format, we'd have to + * explicitly convert it to explicit-AND format and back again. + */ + pcqual = (List *) eval_const_expressions(root, (Node *) pcqual); - result = list_concat(result, pcqual); + /* Fix Vars to have the desired varno */ + if (varno != 1) + ChangeVarNodes((Node *) pcqual, 1, varno, 0); + + result = list_concat(result, pcqual); + } } heap_close(relation, NoLock); @@ -1865,6 +1874,9 @@ set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel, rel->boundinfo = partition_bounds_copy(partdesc->boundinfo, partkey); rel->nparts = partdesc->nparts; set_baserel_partition_key_exprs(relation, rel); + rel->has_default_part = + OidIsValid(get_default_oid_from_partdesc(partdesc)); + rel->partition_qual = RelationGetPartitionQual(relation); } /* diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index da8f0f93fc..7f1428b8d8 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -154,6 +154,8 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->part_scheme = NULL; rel->nparts = 0; rel->boundinfo = NULL; + rel->has_default_part = false; + rel->partition_qual = NIL; rel->part_rels = NULL; rel->partexprs = NULL; rel->nullable_partexprs = NULL; @@ -567,6 +569,8 @@ build_join_rel(PlannerInfo *root, joinrel->part_scheme = NULL; joinrel->nparts = 0; joinrel->boundinfo = NULL; + joinrel->has_default_part = false; + joinrel->partition_qual = NIL; joinrel->part_rels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; @@ -734,6 +738,10 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, joinrel->has_eclass_joins = false; joinrel->top_parent_relids = NULL; joinrel->part_scheme = NULL; + joinrel->nparts = 0; + joinrel->boundinfo = NULL; + joinrel->has_default_part = false; + joinrel->partition_qual = NIL; joinrel->part_rels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h index 2faf0ca26e..49c0546e5f 100644 --- a/src/include/catalog/partition.h +++ b/src/include/catalog/partition.h @@ -42,6 +42,38 @@ typedef struct PartitionDescData typedef struct PartitionDescData *PartitionDesc; +/* + * PartitionPruneContext + * + * Information about a partitioned table needed to perform partition pruning. + */ +typedef struct PartitionPruneContext +{ + /* Table's range table index */ + int relid; + + /* Partition key information */ + char strategy; + int partnatts; + Expr **partkeys; + Oid *partopfamily; + Oid *partopcintype; + Oid *partcollation; + FmgrInfo *partsupfunc; + + /* Number of partitions */ + int nparts; + + /* Is one of the partitions the default partition */ + bool has_default_part; + + /* Partition qual if this's not the root partitioned table */ + List *partition_qual; + + /* Partition boundary info */ + PartitionBoundInfo boundinfo; +} PartitionPruneContext; + extern void RelationBuildPartitionDesc(Relation relation); extern bool partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval, PartitionBoundInfo b1, @@ -73,4 +105,7 @@ extern List *get_proposed_default_constraint(List *new_part_constaints); extern int get_partition_for_tuple(Relation relation, Datum *values, bool *isnull); +/* For partition-pruning */ +extern Bitmapset *get_unpruned_partitions(PartitionPruneContext *context, + List *pruning_steps); #endif /* PARTITION_H */ diff --git a/src/include/catalog/pg_opfamily.h b/src/include/catalog/pg_opfamily.h index b544474254..0847df97ff 100644 --- a/src/include/catalog/pg_opfamily.h +++ b/src/include/catalog/pg_opfamily.h @@ -188,4 +188,7 @@ DATA(insert OID = 4104 ( 3580 box_inclusion_ops PGNSP PGUID )); DATA(insert OID = 5000 ( 4000 box_ops PGNSP PGUID )); DATA(insert OID = 5008 ( 4000 poly_ops PGNSP PGUID )); +#define IsBooleanOpfamily(opfamily) \ + ((opfamily) == BOOL_BTREE_FAM_OID || (opfamily) == BOOL_HASH_FAM_OID) + #endif /* PG_OPFAMILY_H */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 74b094a9c3..206bca3023 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -191,6 +191,11 @@ typedef enum NodeTag T_FromExpr, T_OnConflictExpr, T_IntoClause, + T_PartitionPruneStep, + T_PartitionPruneStepNoop, + T_PartitionPruneStepOp, + T_PartitionPruneStepNullness, + T_PartitionPruneStepCombine, /* * TAGS FOR EXPRESSION STATE NODES (execnodes.h) diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 1b4b0d75af..5e3c1d3379 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -1506,4 +1506,52 @@ typedef struct OnConflictExpr List *exclRelTlist; /* tlist of the EXCLUDED pseudo relation */ } OnConflictExpr; +/*---------- + * PartitionPruneStep - base type for nodes representing a partition pruning + * step + *---------- + */ +typedef struct PartitionPruneStep +{ + NodeTag type; +} PartitionPruneStep; + +/* a no-op step that doesn't prune any of the partitions. */ +typedef struct PartitionPruneStepNoop +{ + PartitionPruneStep step; +} PartitionPruneStepNoop; + +typedef struct PartitionPruneStepOp +{ + PartitionPruneStep step; + + int opstrategy; + List *values; +} PartitionPruneStepOp; + +typedef struct PartitionPruneStepNullness +{ + PartitionPruneStep step; + + Bitmapset *keyisnull; + Bitmapset *keyisnotnull; +} PartitionPruneStepNullness; + +typedef enum PartitionPruneCombineOp +{ + COMBINE_OR, + COMBINE_AND, + COMBINE_NOT +} PartitionPruneCombineOp; + +typedef struct PartitionPruneStepCombine +{ + PartitionPruneStep step; + + PartitionPruneCombineOp combineOp; + List *argsteps; + List *argvalues; +} PartitionPruneStepCombine; + #endif /* PRIMNODES_H */ diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 08a177dac4..b687924443 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -535,6 +535,8 @@ typedef struct PartitionSchemeData *PartitionScheme; * part_scheme - Partitioning scheme of the relation * boundinfo - Partition bounds * nparts - Number of partitions + * has_default_part - Whether the table has a default partition + * partition_qual - Partition constraint if not the root * part_rels - RelOptInfos for each partition * partexprs, nullable_partexprs - Partition key expressions * @@ -663,6 +665,8 @@ typedef struct RelOptInfo PartitionScheme part_scheme; /* Partitioning scheme. */ int nparts; /* number of partitions */ struct PartitionBoundInfoData *boundinfo; /* Partition bounds */ + bool has_default_part; /* does it have a default partition? */ + List *partition_qual; /* partition constraint */ struct RelOptInfo **part_rels; /* Array of RelOptInfos of partitions, * stored in the same order of bounds */ List **partexprs; /* Non-nullable partition key expressions. */ diff --git a/src/include/optimizer/partprune.h b/src/include/optimizer/partprune.h new file mode 100644 index 0000000000..d9ac2b49cb --- /dev/null +++ b/src/include/optimizer/partprune.h @@ -0,0 +1,25 @@ +/*------------------------------------------------------------------------- + * + * partprune.h + * prototypes for partprune.c + * + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/optimizer/partprune.h + * + *------------------------------------------------------------------------- + */ +#ifndef PARTPRUNE_H +#define PARTPRUNE_H + +#include "catalog/partition.h" + +extern Relids prune_append_rel_partitions(PlannerInfo *root, + RelOptInfo *rel); +extern List *generate_partition_pruning_steps(PartitionPruneContext *context, + List *clauses, + bool *constfalse); + +#endif /* PARTPRUNE_H */ diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out index d768dc0215..d799acb91f 100644 --- a/src/test/regress/expected/inherit.out +++ b/src/test/regress/expected/inherit.out @@ -1739,11 +1739,7 @@ explain (costs off) select * from list_parted where a = 'ab' or a in (null, 'cd' Append -> Seq Scan on part_ab_cd Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[]))) - -> Seq Scan on part_ef_gh - Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[]))) - -> Seq Scan on part_null_xy - Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[]))) -(7 rows) +(3 rows) explain (costs off) select * from list_parted where a = 'ab'; QUERY PLAN @@ -1930,11 +1926,13 @@ explain (costs off) select * from mcrparted where abs(b) = 5; -- scans all parti Filter: (abs(b) = 5) -> Seq Scan on mcrparted3 Filter: (abs(b) = 5) + -> Seq Scan on mcrparted4 + Filter: (abs(b) = 5) -> Seq Scan on mcrparted5 Filter: (abs(b) = 5) -> Seq Scan on mcrparted_def Filter: (abs(b) = 5) -(13 rows) +(15 rows) explain (costs off) select * from mcrparted where a > -1; -- scans all partitions QUERY PLAN diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index 775bba6547..ef767e9f30 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -24,11 +24,13 @@ explain (costs off) select * from lp where a > 'a' and a < 'd'; QUERY PLAN ----------------------------------------------------------- Append + -> Seq Scan on lp_ad + Filter: ((a > 'a'::bpchar) AND (a < 'd'::bpchar)) -> Seq Scan on lp_bc Filter: ((a > 'a'::bpchar) AND (a < 'd'::bpchar)) -> Seq Scan on lp_default Filter: ((a > 'a'::bpchar) AND (a < 'd'::bpchar)) -(5 rows) +(7 rows) explain (costs off) select * from lp where a > 'a' and a <= 'd'; QUERY PLAN @@ -208,16 +210,14 @@ explain (costs off) select * from rlp where 1 > a; /* commuted */ (3 rows) explain (costs off) select * from rlp where a <= 1; - QUERY PLAN ---------------------------------------- + QUERY PLAN +-------------------------- Append -> Seq Scan on rlp1 Filter: (a <= 1) -> Seq Scan on rlp2 Filter: (a <= 1) - -> Seq Scan on rlp_default_default - Filter: (a <= 1) -(7 rows) +(5 rows) explain (costs off) select * from rlp where a = 1; QUERY PLAN @@ -235,7 +235,7 @@ explain (costs off) select * from rlp where a = 1::bigint; /* same as above */ Filter: (a = '1'::bigint) (3 rows) -explain (costs off) select * from rlp where a = 1::numeric; /* only null can be pruned */ +explain (costs off) select * from rlp where a = 1::numeric; /* no pruning */ QUERY PLAN ----------------------------------------------- Append @@ -265,9 +265,11 @@ explain (costs off) select * from rlp where a = 1::numeric; /* only null can be Filter: ((a)::numeric = '1'::numeric) -> Seq Scan on rlp_default_30 Filter: ((a)::numeric = '1'::numeric) + -> Seq Scan on rlp_default_null + Filter: ((a)::numeric = '1'::numeric) -> Seq Scan on rlp_default_default Filter: ((a)::numeric = '1'::numeric) -(29 rows) +(31 rows) explain (costs off) select * from rlp where a <= 10; QUERY PLAN @@ -575,7 +577,9 @@ explain (costs off) select * from rlp where a > 20 and a < 27; Filter: ((a > 20) AND (a < 27)) -> Seq Scan on rlp4_default Filter: ((a > 20) AND (a < 27)) -(7 rows) + -> Seq Scan on rlp_default_default + Filter: ((a > 20) AND (a < 27)) +(9 rows) explain (costs off) select * from rlp where a = 29; QUERY PLAN @@ -714,9 +718,7 @@ explain (costs off) select * from mc3p where a = 1 and abs(b) = 1 and c < 8; Filter: ((c < 8) AND (a = 1) AND (abs(b) = 1)) -> Seq Scan on mc3p1 Filter: ((c < 8) AND (a = 1) AND (abs(b) = 1)) - -> Seq Scan on mc3p_default - Filter: ((c < 8) AND (a = 1) AND (abs(b) = 1)) -(7 rows) +(5 rows) explain (costs off) select * from mc3p where a = 10 and abs(b) between 5 and 35; QUERY PLAN @@ -892,6 +894,8 @@ explain (costs off) select * from mc3p where a = 1 or abs(b) = 1 or c = 1; Filter: ((a = 1) OR (abs(b) = 1) OR (c = 1)) -> Seq Scan on mc3p2 Filter: ((a = 1) OR (abs(b) = 1) OR (c = 1)) + -> Seq Scan on mc3p3 + Filter: ((a = 1) OR (abs(b) = 1) OR (c = 1)) -> Seq Scan on mc3p4 Filter: ((a = 1) OR (abs(b) = 1) OR (c = 1)) -> Seq Scan on mc3p5 @@ -902,7 +906,7 @@ explain (costs off) select * from mc3p where a = 1 or abs(b) = 1 or c = 1; Filter: ((a = 1) OR (abs(b) = 1) OR (c = 1)) -> Seq Scan on mc3p_default Filter: ((a = 1) OR (abs(b) = 1) OR (c = 1)) -(17 rows) +(19 rows) explain (costs off) select * from mc3p where (a = 1 and abs(b) = 1) or (a = 10 and abs(b) = 10); QUERY PLAN @@ -963,9 +967,11 @@ explain (costs off) select * from mc2p where a = 2 and b < 1; QUERY PLAN --------------------------------------- Append + -> Seq Scan on mc2p2 + Filter: ((b < 1) AND (a = 2)) -> Seq Scan on mc2p3 Filter: ((b < 1) AND (a = 2)) -(3 rows) +(5 rows) explain (costs off) select * from mc2p where a > 1; QUERY PLAN @@ -1007,24 +1013,20 @@ explain (costs off) select * from boolpart where a in (true, false); (5 rows) explain (costs off) select * from boolpart where a = false; - QUERY PLAN ------------------------------------- + QUERY PLAN +------------------------------ Append -> Seq Scan on boolpart_f Filter: (NOT a) - -> Seq Scan on boolpart_default - Filter: (NOT a) -(5 rows) +(3 rows) explain (costs off) select * from boolpart where not a = false; - QUERY PLAN ------------------------------------- + QUERY PLAN +------------------------------ Append -> Seq Scan on boolpart_t Filter: a - -> Seq Scan on boolpart_default - Filter: a -(5 rows) +(3 rows) explain (costs off) select * from boolpart where a is true or a is not true; QUERY PLAN @@ -1034,33 +1036,22 @@ explain (costs off) select * from boolpart where a is true or a is not true; Filter: ((a IS TRUE) OR (a IS NOT TRUE)) -> Seq Scan on boolpart_t Filter: ((a IS TRUE) OR (a IS NOT TRUE)) - -> Seq Scan on boolpart_default - Filter: ((a IS TRUE) OR (a IS NOT TRUE)) -(7 rows) +(5 rows) explain (costs off) select * from boolpart where a is not true; - QUERY PLAN ------------------------------------- + QUERY PLAN +--------------------------------- Append -> Seq Scan on boolpart_f Filter: (a IS NOT TRUE) - -> Seq Scan on boolpart_t - Filter: (a IS NOT TRUE) - -> Seq Scan on boolpart_default - Filter: (a IS NOT TRUE) -(7 rows) +(3 rows) explain (costs off) select * from boolpart where a is not true and a is not false; - QUERY PLAN --------------------------------------------------------- - Append - -> Seq Scan on boolpart_f - Filter: ((a IS NOT TRUE) AND (a IS NOT FALSE)) - -> Seq Scan on boolpart_t - Filter: ((a IS NOT TRUE) AND (a IS NOT FALSE)) - -> Seq Scan on boolpart_default - Filter: ((a IS NOT TRUE) AND (a IS NOT FALSE)) -(7 rows) + QUERY PLAN +-------------------------- + Result + One-Time Filter: false +(2 rows) explain (costs off) select * from boolpart where a is unknown; QUERY PLAN @@ -1229,13 +1220,7 @@ explain (costs off) select * from hp where a = 1 and b = 'xxx'; Append -> Seq Scan on hp0 Filter: ((a = 1) AND (b = 'xxx'::text)) - -> Seq Scan on hp1 - Filter: ((a = 1) AND (b = 'xxx'::text)) - -> Seq Scan on hp2 - Filter: ((a = 1) AND (b = 'xxx'::text)) - -> Seq Scan on hp3 - Filter: ((a = 1) AND (b = 'xxx'::text)) -(9 rows) +(3 rows) explain (costs off) select * from hp where a is null and b = 'xxx'; QUERY PLAN @@ -1255,29 +1240,17 @@ explain (costs off) select * from hp where a = 10 and b = 'xxx'; QUERY PLAN -------------------------------------------------- Append - -> Seq Scan on hp0 - Filter: ((a = 10) AND (b = 'xxx'::text)) - -> Seq Scan on hp1 - Filter: ((a = 10) AND (b = 'xxx'::text)) -> Seq Scan on hp2 Filter: ((a = 10) AND (b = 'xxx'::text)) - -> Seq Scan on hp3 - Filter: ((a = 10) AND (b = 'xxx'::text)) -(9 rows) +(3 rows) explain (costs off) select * from hp where a = 10 and b = 'yyy'; QUERY PLAN -------------------------------------------------- Append - -> Seq Scan on hp0 - Filter: ((a = 10) AND (b = 'yyy'::text)) - -> Seq Scan on hp1 - Filter: ((a = 10) AND (b = 'yyy'::text)) - -> Seq Scan on hp2 - Filter: ((a = 10) AND (b = 'yyy'::text)) -> Seq Scan on hp3 Filter: ((a = 10) AND (b = 'yyy'::text)) -(9 rows) +(3 rows) explain (costs off) select * from hp where (a = 10 and b = 'yyy') or (a = 10 and b = 'xxx') or (a is null and b is null); QUERY PLAN @@ -1305,11 +1278,13 @@ explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 ----------------------------------------------------------------------- Nested Loop -> Append - -> Seq Scan on mc2p1 t1 + -> Seq Scan on mc2p0 t1 Filter: (a = 1) - -> Seq Scan on mc2p2 t1_1 + -> Seq Scan on mc2p1 t1_1 Filter: (a = 1) - -> Seq Scan on mc2p_default t1_2 + -> Seq Scan on mc2p2 t1_2 + Filter: (a = 1) + -> Seq Scan on mc2p_default t1_3 Filter: (a = 1) -> Aggregate -> Append @@ -1317,13 +1292,21 @@ explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) -> Seq Scan on mc3p1 t2_1 Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) - -> Seq Scan on mc3p5 t2_2 + -> Seq Scan on mc3p2 t2_2 Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) - -> Seq Scan on mc3p7 t2_3 + -> Seq Scan on mc3p3 t2_3 Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) - -> Seq Scan on mc3p_default t2_4 + -> Seq Scan on mc3p4 t2_4 Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) -(20 rows) + -> Seq Scan on mc3p5 t2_5 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) + -> Seq Scan on mc3p6 t2_6 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) + -> Seq Scan on mc3p7 t2_7 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) + -> Seq Scan on mc3p_default t2_8 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) +(30 rows) -- pruning should work fine, because prefix of keys is available explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 where t2.c = t1.b and abs(t2.b) = 1 and t2.a = 1) s where t1.a = 1; @@ -1331,11 +1314,13 @@ explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 ----------------------------------------------------------------------- Nested Loop -> Append - -> Seq Scan on mc2p1 t1 + -> Seq Scan on mc2p0 t1 Filter: (a = 1) - -> Seq Scan on mc2p2 t1_1 + -> Seq Scan on mc2p1 t1_1 Filter: (a = 1) - -> Seq Scan on mc2p_default t1_2 + -> Seq Scan on mc2p2 t1_2 + Filter: (a = 1) + -> Seq Scan on mc2p_default t1_3 Filter: (a = 1) -> Aggregate -> Append @@ -1345,7 +1330,7 @@ explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 Filter: ((c = t1.b) AND (a = 1) AND (abs(b) = 1)) -> Seq Scan on mc3p_default t2_2 Filter: ((c = t1.b) AND (a = 1) AND (abs(b) = 1)) -(16 rows) +(18 rows) -- pruning should work fine in this case, too. explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 where t2.a = 1 and abs(t2.b) = 1 and t2.c = 1) s where t1.a = 1; @@ -1357,13 +1342,15 @@ explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 -> Seq Scan on mc3p1 t2 Filter: ((a = 1) AND (c = 1) AND (abs(b) = 1)) -> Append - -> Seq Scan on mc2p1 t1 + -> Seq Scan on mc2p0 t1 Filter: (a = 1) - -> Seq Scan on mc2p2 t1_1 + -> Seq Scan on mc2p1 t1_1 Filter: (a = 1) - -> Seq Scan on mc2p_default t1_2 + -> Seq Scan on mc2p2 t1_2 Filter: (a = 1) -(12 rows) + -> Seq Scan on mc2p_default t1_3 + Filter: (a = 1) +(14 rows) -- -- pruning with clauses containing <> operator @@ -1492,22 +1479,16 @@ explain (costs off) select * from coll_pruning_multi where substr(a, 1) = 'a' co Filter: (substr(a, 1) = 'a'::text COLLATE "POSIX") -> Seq Scan on coll_pruning_multi2 Filter: (substr(a, 1) = 'a'::text COLLATE "POSIX") - -> Seq Scan on coll_pruning_multi3 - Filter: (substr(a, 1) = 'a'::text COLLATE "POSIX") -(7 rows) +(5 rows) -- pruning with just both columns constrained explain (costs off) select * from coll_pruning_multi where substr(a, 1) = 'e' collate "C" and substr(a, 1) = 'a' collate "POSIX"; QUERY PLAN --------------------------------------------------------------------------------------------------------- Append - -> Seq Scan on coll_pruning_multi1 - Filter: ((substr(a, 1) = 'e'::text COLLATE "C") AND (substr(a, 1) = 'a'::text COLLATE "POSIX")) -> Seq Scan on coll_pruning_multi2 Filter: ((substr(a, 1) = 'e'::text COLLATE "C") AND (substr(a, 1) = 'a'::text COLLATE "POSIX")) - -> Seq Scan on coll_pruning_multi3 - Filter: ((substr(a, 1) = 'e'::text COLLATE "C") AND (substr(a, 1) = 'a'::text COLLATE "POSIX")) -(7 rows) +(3 rows) -- -- LIKE operators don't prune diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index 317ff479aa..9e75f456bc 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -60,7 +60,7 @@ explain (costs off) select * from rlp where 1 > a; /* commuted */ explain (costs off) select * from rlp where a <= 1; explain (costs off) select * from rlp where a = 1; explain (costs off) select * from rlp where a = 1::bigint; /* same as above */ -explain (costs off) select * from rlp where a = 1::numeric; /* only null can be pruned */ +explain (costs off) select * from rlp where a = 1::numeric; /* no pruning */ explain (costs off) select * from rlp where a <= 10; explain (costs off) select * from rlp where a > 10; explain (costs off) select * from rlp where a < 15; -- 2.11.0