From 6140f2378a9d253e8cafcc10c0b41468246d6f6e Mon Sep 17 00:00:00 2001 From: amit Date: Tue, 22 Aug 2017 13:48:13 +0900 Subject: [PATCH v18 2/5] Introduce a get_partitions_from_clauses() Whereas get_partition_for_tuple() takes a tuple and returns index of the partition of the table that should contain that tuple, get_partitions_from_clauses() will take a list of query clauses and return a set of indexes of the partitions that satisfy all of those clauses. It is meant as a faster alternative to the planner's current method of selecting a table's partitions by running contraint exclusion algorithm against the partition constraint of each of the partitions. Callers must have checked that each of the clauses matches one of the partition keys. --- src/backend/catalog/partition.c | 1983 ++++++++++++++++++++++++++++++++++ src/backend/optimizer/util/clauses.c | 4 +- src/include/catalog/partition.h | 3 + src/include/catalog/pg_opfamily.h | 3 + src/include/optimizer/clauses.h | 2 + 5 files changed, 1992 insertions(+), 3 deletions(-) diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index d937edcd83..0d9c774005 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -28,6 +28,8 @@ #include "catalog/pg_inherits.h" #include "catalog/pg_inherits_fn.h" #include "catalog/pg_opclass.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_opfamily.h" #include "catalog/pg_partitioned_table.h" #include "catalog/pg_type.h" #include "commands/tablecmds.h" @@ -38,6 +40,8 @@ #include "nodes/parsenodes.h" #include "optimizer/clauses.h" #include "optimizer/planmain.h" +#include "optimizer/planner.h" +#include "optimizer/predtest.h" #include "optimizer/prep.h" #include "optimizer/var.h" #include "parser/parse_coerce.h" @@ -163,6 +167,80 @@ typedef struct PartitionBoundCmpArg int ndatums; } PartitionBoundCmpArg; +/* + * Information about a clause matched with a partition key column kept to + * avoid recomputing the same in remove_redundant_clauses(). + */ +typedef struct PartClause +{ + OpExpr *op; + Expr *constarg; + + /* cached info. */ + bool valid_cache; /* Is the following information initialized? */ + int op_strategy; + Oid op_subtype; + FmgrInfo op_func; +} PartClause; + +/* + * Strategy of a partition clause operator per the partitioing operator class + * definition. + */ +typedef enum PartOpStrategy +{ + PART_OP_EQUAL, + PART_OP_LESS, + PART_OP_GREATER +} PartOpStrategy; + +/* + * PartScanKeyInfo + * Bounding scan keys to look up a table's partitions obtained from + * mutually-ANDed clauses containing partitioning-compatible operators + */ +typedef struct PartScanKeyInfo +{ + /* + * Constants constituting the *whole* partition key compared using + * partitioning-compatible equality operator(s). When n_eqkeys > 0, other + * keys (minkeys and maxkeys) are irrelevant. + * + * Equal keys are not required to be in any particular order, unlike the + * keys below which must appear in the same order as partition keys. + */ + Datum eqkeys[PARTITION_MAX_KEYS]; + int n_eqkeys; + + /* + * Constants that constitute the lower bound on the partition key or a + * prefix thereof. The last of those constants is compared using > or >= + * operator compatible with partitioning, making this the lower bound in + * a range query. + */ + Datum minkeys[PARTITION_MAX_KEYS]; + int n_minkeys; + bool min_incl; + + /* + * Constants that constitute the upper bound on the partition key or a + * prefix thereof. The last of those constants is compared using < or <= + * operator compatible with partitioning, making this the upper bound in + * a range query. + */ + Datum maxkeys[PARTITION_MAX_KEYS]; + int n_maxkeys; + bool max_incl; + + /* + * Does the query specify a key to be null or not null? Partitioning + * handles null partition keys specially depending on the partitioning + * strategy in use, we store this information. + */ + Bitmapset *keyisnull; + Bitmapset *keyisnotnull; +} PartScanKeyInfo; + static int32 qsort_partition_hbound_cmp(const void *a, const void *b); static int32 qsort_partition_list_value_cmp(const void *a, const void *b, void *arg); @@ -211,6 +289,35 @@ static uint64 compute_hash_value(PartitionKey key, Datum *values, bool *isnull); /* SQL-callable function for use in hash partition CHECK constraints */ PG_FUNCTION_INFO_V1(satisfies_hash_partition); +static Bitmapset *get_partitions_from_clauses_recurse(Relation relation, + int rt_index, List *clauses); +static Bitmapset *get_partitions_from_ne_clauses(Relation relation, + List *ne_clauses); +static Bitmapset *get_partitions_from_or_clause_args(Relation relation, + int rt_index, List *or_clause_args); +static bool classify_partition_bounding_keys(Relation relation, List *clauses, + int rt_index, + PartScanKeyInfo *keys, bool *constfalse, + List **or_clauses, List **ne_clauses); +static void remove_redundant_clauses(PartitionKey partkey, + int partkeyidx, List *all_clauses, + List **result, bool *constfalse); +static bool partition_cmp_args(PartitionKey key, int partkeyidx, + PartClause *op, PartClause *leftarg, PartClause *rightarg, + bool *result); +static PartOpStrategy partition_op_strategy(PartitionKey key, PartClause *op, + bool *incl); +static bool partkey_datum_from_expr(PartitionKey key, int partkeyidx, + Expr *expr, Datum *value); +static Bitmapset *get_partitions_for_keys(Relation rel, + PartScanKeyInfo *keys); +static Bitmapset *get_partitions_for_keys_hash(Relation rel, + PartScanKeyInfo *keys); +static Bitmapset *get_partitions_for_keys_list(Relation rel, + PartScanKeyInfo *keys); +static Bitmapset *get_partitions_for_keys_range(Relation rel, + PartScanKeyInfo *keys); + /* * RelationBuildPartitionDesc * Form rel's partition descriptor @@ -1581,9 +1688,1885 @@ get_partition_qual_relid(Oid relid) return result; } +/* + * get_partitions_from_clauses + * Determine the set of partitions of 'relation' that will satisfy all + * the clauses contained in 'partclauses' + * + * Outputs: + * A Bitmapset containing indexes of all selected partitions. + */ +Bitmapset * +get_partitions_from_clauses(Relation relation, int rt_index, + List *partclauses) +{ + Bitmapset *result; + List *partconstr; + PartitionDesc partdesc = RelationGetPartitionDesc(relation); + PartitionBoundInfo boundinfo = partdesc->boundinfo; + + Assert(partclauses != NIL); + + /* + * If relation is a partition itself, add its partition constraint + * clauses to the list of clauses to use for partition pruning. This + * is done to facilitate correct decision regarding the default + * partition. Adding the partition constraint clauses to the list helps + * restrict the possible key space to only that allowed by the partition + * and thus avoids the default partition being inadvertently added to the + * set of selected partitions for a query whose clauses select a key space + * bigger than the partition's. + */ + if (partition_bound_has_default(boundinfo) && + (partconstr = RelationGetPartitionQual(relation)) != NIL) + { + partconstr = (List *) expression_planner((Expr *) partconstr); + partclauses = list_concat(partclauses, partconstr); + } + + result = get_partitions_from_clauses_recurse(relation, rt_index, + partclauses); + + return result; +} + /* Module-local functions */ /* + * get_partitions_from_clauses_recurse + * Determine relation's partitions that satisfy *all* of the clauses + * in the list + * + * Return value is a Bitmapset containing the indexes of selected partitions. + */ +static Bitmapset * +get_partitions_from_clauses_recurse(Relation relation, int rt_index, + List *clauses) +{ + Bitmapset *result = NULL; + PartScanKeyInfo keys; + bool constfalse; + List *or_clauses, + *ne_clauses; + ListCell *lc; + + /* + * Try to reduce the set of clauses into a form that + * get_partitions_for_keys() can work with. + */ + if (classify_partition_bounding_keys(relation, clauses, rt_index, + &keys, &constfalse, + &or_clauses, &ne_clauses)) + { + /* + * classify_partition_bounding_keys() may have found clauses marked + * pseudo-constant that are false that the planner didn't or it may + * have itself found contradictions among clauses. + */ + if (constfalse) + return NULL; + + result = get_partitions_for_keys(relation, &keys); + } + else + { + PartitionDesc partdesc = RelationGetPartitionDesc(relation); + + result = bms_add_range(result, 0, partdesc->nparts - 1); + } + + /* + * No point in trying to look at other conjunctive clauses, if we got + * an empty set in the first place. + */ + if (constfalse || bms_is_empty(result)) + return NULL; + + /* Select partitions by applying the clauses containing <> operators. */ + if (ne_clauses) + { + Bitmapset *ne_clause_parts; + + ne_clause_parts = get_partitions_from_ne_clauses(relation, ne_clauses); + + /* + * Clauses in ne_clauses are in conjunction with the clauses that + * selected the partitions contained in result, so combine the + * partitions thus selected with those in result using set + * intersection. + */ + result = bms_int_members(result, ne_clause_parts); + bms_free(ne_clause_parts); + } + + /* Select partitions by applying OR clauses. */ + foreach(lc, or_clauses) + { + BoolExpr *or = (BoolExpr *) lfirst(lc); + Bitmapset *or_parts; + + or_parts = get_partitions_from_or_clause_args(relation, rt_index, + or->args); + /* + * Clauses in or_clauses are mutually conjunctive and also in + * in conjunction with the rest of the clauses above, so combine the + * partitions thus selected with those in result using set + * intersection. + */ + result = bms_int_members(result, or_parts); + bms_free(or_parts); + } + + return result; +} + +/* Assumes partkey exists in the scope and is of a list partitioned table. */ +#define partkey_datums_equal(d1, d2)\ + (0 == DatumGetInt32(FunctionCall2Coll(&partkey->partsupfunc[0],\ + partkey->partcollation[0],\ + (d1), (d2)))) +/* + * Check if d is equal to some member of darray where equality is determined + * by the partitioning comparison function. + */ +static bool +datum_in_array(PartitionKey partkey, Datum d, Datum *darray, int n) +{ + int i; + + if (darray == NULL || n == 0) + return false; + + for (i = 0; i < n; i++) + if (partkey_datums_equal(d, darray[i])) + return true; + + return false; +} + +/* + * count_partition_datums + * + * Returns the number of non-null datums allowed by a non-default list + * partition with given index. + */ +static int +count_partition_datums(Relation rel, int index) +{ + PartitionBoundInfo boundinfo = RelationGetPartitionDesc(rel)->boundinfo; + int i, + result = 0; + + Assert(index != boundinfo->default_index); + + /* + * The answer is as many as the count of occurrence of the value index + * in boundinfo->indexes[]. + */ + for (i = 0; i < boundinfo->ndatums; i++) + if (index == boundinfo->indexes[i]) + result += 1; + + return result; +} + +/* + * get_partitions_from_ne_clauses + * + * Return partitions of relation that satisfy all <> operator clauses in + * ne_clauses. Only ever called if relation is a list partitioned table. + */ +static Bitmapset * +get_partitions_from_ne_clauses(Relation relation, List *ne_clauses) +{ + ListCell *lc; + Bitmapset *result, + *excluded_parts; + PartitionKey partkey = RelationGetPartitionKey(relation); + PartitionDesc partdesc = RelationGetPartitionDesc(relation); + PartitionBoundInfo boundinfo = partdesc->boundinfo; + Datum *exclude_datums; + int *count_excluded, + n_exclude_datums, + i; + + Assert(partkey->strategy == PARTITION_STRATEGY_LIST); + + /* + * How this works: + * + * For each constant expression, we look up the partition that would + * contain its value and mark the same as excluded partition. After + * doing the same for all clauses we'll have set of partitions that + * are excluded. For each excluded partition, check if there exist + * values that it allows but are not specified in the clauses, if so + * the partition won't actually be excluded. + */ + + /* De-duplicate constant values. */ + exclude_datums = (Datum *) palloc0(list_length(ne_clauses) * + sizeof(Datum)); + n_exclude_datums = 0; + foreach(lc, ne_clauses) + { + PartClause *pc = lfirst(lc); + Datum datum; + + if (partkey_datum_from_expr(partkey, 0, pc->constarg, &datum) && + !datum_in_array(partkey, datum, exclude_datums, n_exclude_datums)) + exclude_datums[n_exclude_datums++] = datum; + } + + /* + * For each value, if it's found in boundinfo, increment the count of its + * partition as excluded due to that value. + */ + count_excluded = (int *) palloc0(partdesc->nparts * sizeof(int)); + for (i = 0; i < n_exclude_datums; i++) + { + int offset, + excluded_part; + bool is_equal; + PartitionBoundCmpArg arg; + Datum argdatums[] = {exclude_datums[i]}; + + memset(&arg, 0, sizeof(arg)); + arg.datums = argdatums; + arg.ndatums = 1; + offset = partition_bound_bsearch(partkey, boundinfo, &arg, &is_equal); + if (offset >= 0 && is_equal && boundinfo->indexes[offset] >= 0) + { + excluded_part = boundinfo->indexes[offset]; + count_excluded[excluded_part]++; + } + } + + excluded_parts = NULL; + for (i = 0; i < partdesc->nparts; i++) + { + /* + * If all datums of this partition appeared in ne_clauses, exclude + * this partition. + */ + if (count_excluded[i] > 0 && + count_excluded[i] == count_partition_datums(relation, i)) + excluded_parts = bms_add_member(excluded_parts, i); + } + + /* + * Also, exclude the "null-only" partition, because strict clauses in + * ne_clauses will not select any rows from it. + */ + if (partition_bound_accepts_nulls(boundinfo) && + count_partition_datums(relation, boundinfo->null_index) == 0) + excluded_parts = bms_add_member(excluded_parts, + boundinfo->null_index); + + pfree(count_excluded); + pfree(exclude_datums); + + result = bms_add_range(NULL, 0, partdesc->nparts - 1); + result = bms_del_members(result, excluded_parts); + bms_free(excluded_parts); + + return result; +} + +/* + * get_partitions_from_or_clause_args + * + * Returns the set of partitions of relation, each of which satisfies some + * clause in or_clause_args. + */ +static Bitmapset * +get_partitions_from_or_clause_args(Relation relation, int rt_index, + List *or_clause_args) +{ + ListCell *lc; + Bitmapset *result = NULL; + + foreach(lc, or_clause_args) + { + List *arg_clauses = list_make1(lfirst(lc)); + List *partconstr = RelationGetPartitionQual(relation); + Bitmapset *arg_partset; + + /* + * It's possible that this clause is never true for this relation + * due to the latter's partition constraint, which means we must + * not add its partitions to or_partset. But the clause may not + * contain this relation's partition key expressions (instead the + * parent's), so we could not depend on just calling + * get_partitions_from_clauses_recurse(relation, ...) to determine + * that the clause indeed prunes all of the relation's partition. + * + * Use predicate refutation proof instead. + */ + if (partconstr) + { + partconstr = (List *) expression_planner((Expr *) partconstr); + if (rt_index != 1) + ChangeVarNodes((Node *) partconstr, 1, rt_index, 0); + if (predicate_refuted_by(partconstr, arg_clauses, false)) + continue; + } + + arg_partset = get_partitions_from_clauses_recurse(relation, rt_index, + arg_clauses); + result = bms_add_members(result, arg_partset); + bms_free(arg_partset); + } + + return result; +} + +/* Match partition key (partattno/partexpr) to an expression (expr). */ +#define EXPR_MATCHES_PARTKEY(expr, partattno, partexpr) \ + ((partattno) != 0 ?\ + (IsA((expr), Var) &&\ + ((Var *) (expr))->varattno == (partattno)) :\ + equal((expr), (partexpr))) + +/* + * classify_partition_bounding_keys + * Classify partition clauses into equal, min, and max keys, along with + * any Nullness constraints and return that information in the output + * argument 'keys' (Returns true if 'keys' contains valid information + * upon return, otherwise false.) + * + * Clauses in the provided list are implicitly ANDed, each of which is known + * to match some partition key column. Map them to individual key columns + * and for each column, determine the equal bound or "best" min and max + * bounds. For example, of a > 1, a > 2, and a >= 5, "5" is the best min + * bound for the column a, which also happens to be an inclusive bound. + * When analyzing multiple clauses referencing the same key, it is checked + * if there are mutually contradictory clauses and if so, we set *constfalse + * to true to indicate to the caller that the set of clauses cannot be true + * for any partition. It is also set if the list already contains a + * pseudo-constant clause. + * + * For multi-column keys, an equal bound is returned only if all the columns + * are constrained by clauses containing equality operators, unless hash + * partitioning is in use, in which case, it's possible that some keys have + * IS NULL clauses while remaining have clauses with equality operators. + * Min and max bounds could contain bound values for only a prefix of keys. + * + * All the OR clauses encountered in the list and those generated from certain + * ScalarArrayOpExprs are added to *or_clauses. It's the responsibility of the + * caller to process the argument clauses of each of the OR clauses, which + * would involve recursively calling this function. + * + * Clauses containing a <> operator are added to *ne_clauses, provided its + * negator is a valid partitioning equality operator and that too only if + * list partitioning is in use. + */ +static bool +classify_partition_bounding_keys(Relation relation, List *clauses, + int rt_index, + PartScanKeyInfo *keys, bool *constfalse, + List **or_clauses, + List **ne_clauses) +{ + PartitionKey partkey = RelationGetPartitionKey(relation); + int i; + ListCell *lc; + List *keyclauses_all[PARTITION_MAX_KEYS], + *keyclauses[PARTITION_MAX_KEYS]; + bool will_compute_keys = false; + Bitmapset *keyisnull = NULL, + *keyisnotnull = NULL; + bool need_next_eq, + need_next_min, + need_next_max; + int n_keynullness = 0; + + *or_clauses = NIL; + *ne_clauses = NIL; + *constfalse = false; + memset(keyclauses_all, 0, sizeof(keyclauses_all)); + + foreach(lc, clauses) + { + Expr *clause; + ListCell *partexprs_item; + + if (IsA(lfirst(lc), RestrictInfo)) + { + RestrictInfo *rinfo = lfirst(lc); + + clause = rinfo->clause; + if (rinfo->pseudoconstant && + !DatumGetBool(((Const *) clause)->constvalue)) + { + *constfalse = true; + continue; + } + } + else + clause = (Expr *) lfirst(lc); + + /* Get the BoolExpr's out of the way.*/ + if (IsA(clause, BoolExpr)) + { + if (or_clause((Node *) clause)) + { + *or_clauses = lappend(*or_clauses, clause); + continue; + } + else if (and_clause((Node *) clause)) + { + clauses = list_concat(clauses, + list_copy(((BoolExpr *) clause)->args)); + continue; + } + /* Fall-through for a NOT clause, which is handled below. */ + } + + partexprs_item = list_head(partkey->partexprs); + for (i = 0; i < partkey->partnatts; i++) + { + Oid partopfamily = partkey->partopfamily[i]; + AttrNumber partattno = partkey->partattrs[i]; + Expr *partexpr = NULL; + PartClause *pc; + + /* + * A non-zero partattno refers to a simple column reference that + * will be matched against varattno of a Var appearing the clause. + * partattno == 0 refers to arbitrary expressions, which get the + * current one from PartitionKey. + */ + if (partattno == 0) + { + if (partexprs_item == NULL) + elog(ERROR, "wrong number of partition key expressions"); + + /* Copy to avoid overwriting the relcache's content. */ + partexpr = copyObject(lfirst(partexprs_item)); + + /* + * Expressions stored in PartitionKey in the relcache all + * contain a dummy varno (that is, 1), but we must switch to + * the RT index of the table in this query so that it can be + * correctly matched to the expressions coming from the query. + */ + if (rt_index != 1) + ChangeVarNodes((Node *) partexpr, 1, rt_index, 0); + + partexprs_item = lnext(partexprs_item); + } + + if (IsA(clause, OpExpr)) + { + OpExpr *opclause = (OpExpr *) clause; + Expr *leftop, + *rightop, + *constexpr; + bool is_ne_listp = false; + + leftop = (Expr *) get_leftop(clause); + if (IsA(leftop, RelabelType)) + leftop = ((RelabelType *) leftop)->arg; + rightop = (Expr *) get_rightop(clause); + if (IsA(rightop, RelabelType)) + rightop = ((RelabelType *) rightop)->arg; + if (EXPR_MATCHES_PARTKEY(leftop, partattno, partexpr)) + constexpr = rightop; + else if (EXPR_MATCHES_PARTKEY(rightop, partattno, partexpr)) + constexpr = leftop; + else + /* Clause does not match this partition key. */ + continue; + + /* + * Handle cases where the clause's operator does not belong to + * the partitioning operator family. We currently handle two + * such cases: 1. Operators named '<>' are not listed in any + * operator family whatsoever, 2. Ordering opertors like '<' + * are not listed in the hash operator families. For 1, check + * if list partitioning is in use and if so, proceed to pass + * the clause to the caller without doing any more processing + * ourselves. 2 cannot be handled at all, so the clause is + * simply skipped. + */ + if (!op_in_opfamily(opclause->opno, partopfamily)) + { + int strategy; + Oid negator, + lefttype, + righttype; + + /* + * To confirm if the operator is really '<>', check if its + * negator is a equality operator. If it's a btree + * equality operator *and* this is a list partitioned + * table, we can use it prune partitions. + */ + negator = get_negator(opclause->opno); + if (OidIsValid(negator) && + op_in_opfamily(negator, partopfamily)) + { + get_op_opfamily_properties(negator, partopfamily, + false, + &strategy, + &lefttype, &righttype); + if (strategy == BTEqualStrategyNumber && + partkey->strategy == PARTITION_STRATEGY_LIST) + is_ne_listp = true; + } + + /* Cannot handle this clause. */ + if (!is_ne_listp) + continue; + } + + pc = palloc0(sizeof(PartClause)); + pc->constarg = constexpr; + + /* + * Flip the left and right args if we have to, because the + * code which extract the constant value to use for + * partition-pruning expects to find it as the rightop of the + * clause. (See below in this function.) + */ + if (constexpr == rightop) + pc->op = opclause; + else + { + OpExpr *commuted; + Oid commutator = get_commutator(opclause->opno); + + /* + * Caller must have made sure to check that the commutator + * indeed exists. + */ + Assert(OidIsValid(commutator)); + commuted = (OpExpr *) copyObject(opclause); + commuted->opno = commutator; + commuted->opfuncid = get_opcode(commuted->opno); + commuted->args = list_make2(rightop, leftop); + pc->op = commuted; + } + + /* + * We don't turn a <> operator clause into a key right away. + * Instead, the caller will hand over such clauses to + * get_partitions_from_ne_clauses(). + */ + if (is_ne_listp) + *ne_clauses = lappend(*ne_clauses, pc); + else + { + keyclauses_all[i] = lappend(keyclauses_all[i], pc); + will_compute_keys = true; + + /* + * Since we only allow strict operators, require keys to + * be not null. + */ + keyisnotnull = bms_add_member(keyisnotnull, i); + } + } + else if (IsA(clause, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; + Oid saop_op = saop->opno; + Oid saop_opfuncid = saop->opfuncid; + Oid saop_coll = saop->inputcollid; + Node *leftop = (Node *) linitial(saop->args), + *rightop = (Node *) lsecond(saop->args); + List *elem_exprs, + *elem_clauses; + ListCell *lc1; + bool negated = false; + + /* + * In case of NOT IN (..), we get a '<>', which while not + * listed as part of any operator family, we are able to + * handle the same if its negator is indeed a part of the + * partitioning operator family. + */ + if (!op_in_opfamily(saop_op, partopfamily)) + { + Oid negator = get_negator(saop_op); + int strategy; + Oid lefttype, + righttype; + + if (!OidIsValid(negator)) + continue; + get_op_opfamily_properties(negator, partopfamily, false, + &strategy, + &lefttype, &righttype); + if (strategy == BTEqualStrategyNumber) + negated = true; + } + + /* + * First generate a list of Const nodes, one for each array + * element. + */ + elem_exprs = NIL; + if (IsA(rightop, Const)) + { + Const *arr = (Const *) lsecond(saop->args); + ArrayType *arrval = DatumGetArrayTypeP(arr->constvalue); + int16 elemlen; + bool elembyval; + char elemalign; + Datum *elem_values; + bool *elem_nulls; + int num_elems; + + get_typlenbyvalalign(ARR_ELEMTYPE(arrval), + &elemlen, &elembyval, &elemalign); + deconstruct_array(arrval, + ARR_ELEMTYPE(arrval), + elemlen, elembyval, elemalign, + &elem_values, &elem_nulls, + &num_elems); + for (i = 0; i < num_elems; i++) + { + if (!elem_nulls[i]) + elem_exprs = lappend(elem_exprs, + makeConst(ARR_ELEMTYPE(arrval), + -1, arr->constcollid, + elemlen, elem_values[i], + false, elembyval)); + else + elem_exprs = lappend(elem_exprs, + makeNullConst(ARR_ELEMTYPE(arrval), + -1, + arr->constcollid)); + } + } + else + { + ArrayExpr *arrexpr = castNode(ArrayExpr, rightop); + + /* + * For a nested ArrayExpr, we don't know how to get the + * actual scalar values out into a flat list, so we give + * up doing anything with this ScalarArrayOpExpr. + */ + if (arrexpr->multidims) + continue; + + elem_exprs = arrexpr->elements; + } + + /* + * Now generate a list of clauses, one for each array element, + * of the form: saop_leftop saop_op elem_expr + */ + elem_clauses = NIL; + foreach(lc1, elem_exprs) + { + Const *rightop = castNode(Const, lfirst(lc1)); + Expr *elem_clause; + + if (rightop->constisnull) + { + NullTest *nulltest = makeNode(NullTest); + + nulltest->arg = (Expr *) leftop; + nulltest->nulltesttype = !negated ? IS_NULL + : IS_NOT_NULL; + nulltest->argisrow = false; + nulltest->location = -1; + elem_clause = (Expr *) nulltest; + } + else + { + OpExpr *opexpr = makeNode(OpExpr); + + opexpr->opno = saop_op; + opexpr->opfuncid = saop_opfuncid; + opexpr->opresulttype = BOOLOID; + opexpr->opretset = false; + opexpr->opcollid = InvalidOid; + opexpr->inputcollid = saop_coll; + opexpr->args = list_make2(leftop, rightop); + opexpr->location = -1; + elem_clause = (Expr *) opexpr; + } + + elem_clauses = lappend(elem_clauses, elem_clause); + } + + /* + * Build the OR clause if needed or add the clauses to the end + * of the list that's being processed currently. + */ + if (saop->useOr) + *or_clauses = lappend(*or_clauses, + makeBoolExpr(OR_EXPR, elem_clauses, + -1)); + else + clauses = list_concat(clauses, elem_clauses); + } + else if (IsA(clause, NullTest)) + { + NullTest *nulltest = (NullTest *) clause; + Expr *arg = nulltest->arg; + + if (IsA(arg, RelabelType)) + arg = ((RelabelType *) arg)->arg; + + /* Does leftop match with this partition key column? */ + if (EXPR_MATCHES_PARTKEY(arg, partattno, partexpr)) + { + if (nulltest->nulltesttype == IS_NULL) + keyisnull = bms_add_member(keyisnull, i); + else + keyisnotnull = bms_add_member(keyisnotnull, i); + n_keynullness++; + will_compute_keys = true; + } + } + /* + * Boolean conditions have a special shape, which would've been + * accepted if the partitioning opfamily accepts Boolean + * conditions. + */ + else if (IsBooleanOpfamily(partopfamily) && + (IsA(clause, BooleanTest) || + IsA(clause, Var) || + not_clause((Node *) clause))) + { + Expr *leftop, + *rightop; + + pc = palloc0(sizeof(PartClause)); + + if (IsA(clause, BooleanTest)) + { + BooleanTest *btest = (BooleanTest *) clause; + + leftop = btest->arg; + rightop = (btest->booltesttype == IS_TRUE || + btest->booltesttype == IS_NOT_FALSE) + ? (Expr *) makeBoolConst(true, false) + : (Expr *) makeBoolConst(false, false); + } + else + { + leftop = IsA(clause, Var) + ? (Expr *) clause + : (Expr *) get_notclausearg((Expr *) clause); + rightop = IsA(clause, Var) + ? (Expr *) makeBoolConst(true, false) + : (Expr *) makeBoolConst(false, false); + } + pc->op = (OpExpr *) make_opclause(BooleanEqualOperator, + BOOLOID, false, + leftop, rightop, + InvalidOid, InvalidOid); + pc->constarg = rightop; + keyclauses_all[i] = lappend(keyclauses_all[i], pc); + will_compute_keys = true; + } + } + } + + /* Return if no work to do below. */ + if (!will_compute_keys || *constfalse) + return 0; + + /* + * Try to eliminate redundant keys. In the process, we might find out + * that clauses are mutually contradictory and hence can never be true + * for any rows. + */ + memset(keyclauses, 0, PARTITION_MAX_KEYS * sizeof(List *)); + for (i = 0; i < partkey->partnatts; i++) + { + remove_redundant_clauses(partkey, i, + keyclauses_all[i], &keyclauses[i], + constfalse); + if (*constfalse) + return 0; + } + + /* + * Generate bounding tuple(s). + * + * We look up partitions in the partition bound descriptor using, say, + * partition_bound_bsearch(), which expects a Datum (or Datums if multi- + * column key). So, extract the same out of the constant argument of + * each clause. + * + * Further, based on the strategies of clause operators (=, />=), + * try to construct tuples out of those datums that serve as the exact + * look-up tuple or minimum/maximum bounding tuple(s). If we find datums + * for all partition key columns that appear in = operator clauses, then + * we have the look-up tuple to be exactly matched, which will return just + * one partition if one exists. If the last value of the tuple comes from + * a />= operator, then that constitutes the minimum and maximum + * bounding tuple, respectively. There is one exception -- if the tuple + * constitutes a proper prefix of partition key columns, with none of its + * values coming from a />= operator, we consider such tuple both + * the minimum and maximum bounding tuple. For a multi-column range + * partitioned table, there usually exists a sequence of consecutive + * partitions that share a prefix of partition bound, which are all + * matched by a bounding tuple of the aforementioned shape. + */ + need_next_eq = true; + need_next_min = true; + need_next_max = true; + memset(keys, 0, sizeof(PartScanKeyInfo)); + for (i = 0; i < partkey->partnatts; i++) + { + /* + * Min and max keys must constitute a prefix of the partition key and + * must appear in the same order as partition keys. Equal keys have + * to satisfy that requirement only for non-hash partitioning. + */ + if (i > keys->n_eqkeys && + partkey->strategy != PARTITION_STRATEGY_HASH) + need_next_eq = false; + + if (i > keys->n_minkeys) + need_next_min = false; + + if (i > keys->n_maxkeys) + need_next_max = false; + + foreach(lc, keyclauses[i]) + { + PartClause *clause = lfirst(lc); + Expr *constarg = clause->constarg; + bool incl; + PartOpStrategy op_strategy; + + op_strategy = partition_op_strategy(partkey, clause, &incl); + switch (op_strategy) + { + case PART_OP_EQUAL: + Assert(incl); + if (need_next_eq && + partkey_datum_from_expr(partkey, i, constarg, + &keys->eqkeys[i])) + keys->n_eqkeys++; + + if (need_next_max && + partkey_datum_from_expr(partkey, i, constarg, + &keys->maxkeys[i])) + { + keys->n_maxkeys++; + keys->max_incl = true; + } + + if (need_next_min && + partkey_datum_from_expr(partkey, i, constarg, + &keys->minkeys[i])) + { + keys->n_minkeys++; + keys->min_incl = true; + } + break; + + case PART_OP_LESS: + if (need_next_max && + partkey_datum_from_expr(partkey, i, constarg, + &keys->maxkeys[i])) + { + keys->n_maxkeys++; + keys->max_incl = incl; + if (!incl) + need_next_eq = need_next_max = false; + } + break; + + case PART_OP_GREATER: + if (need_next_min && + partkey_datum_from_expr(partkey, i, constarg, + &keys->minkeys[i])) + { + keys->n_minkeys++; + keys->min_incl = incl; + if (!incl) + need_next_eq = need_next_min = false; + } + break; + + default: + Assert(false); + } + } + } + + /* + * To set eqkeys, we must have found matching clauses containing equality + * operators for all partition key columns and if present we don't need + * the values in minkeys and maxkeys anymore. In the case hash + * partitioning, we don't require all of eqkeys to be operator clausses. + * In that case, any IS NULL clauses involving partition key columns are + * also considered as equality keys by the code for hash partition pruning, + * which checks that all partition columns are covered before actually + * performing the pruning. + */ + if (keys->n_eqkeys == partkey->partnatts || + partkey->strategy == PARTITION_STRATEGY_HASH) + keys->n_minkeys = keys->n_maxkeys = 0; + else + keys->n_eqkeys = 0; + + /* Finally, also set the keyisnull and keyisnotnull values. */ + keys->keyisnull = keyisnull; + keys->keyisnotnull = keyisnotnull; + + return keys->n_eqkeys + keys->n_minkeys + keys->n_maxkeys + n_keynullness; +} + +/* + * partition_op_strategy + * Returns whether the clause in 'op' contains an =, />= + * operator and sets *incl if equality is implied + */ +static PartOpStrategy +partition_op_strategy(PartitionKey key, PartClause *op, bool *incl) +{ + PartOpStrategy result; + + *incl = false; /* overwritten as appropriate below */ + switch (key->strategy) + { + /* Hash partitioning allows only hash equality. */ + case PARTITION_STRATEGY_HASH: + if (op->op_strategy == HTEqualStrategyNumber) + { + *incl = true; + result = PART_OP_EQUAL; + } + break; + + /* List and range partitioning support all btree operators. */ + case PARTITION_STRATEGY_LIST: + case PARTITION_STRATEGY_RANGE: + switch (op->op_strategy) + { + case BTLessEqualStrategyNumber: + *incl = true; + /* fall through */ + case BTLessStrategyNumber: + result = PART_OP_LESS; + break; + case BTEqualStrategyNumber: + *incl = true; + result = PART_OP_EQUAL; + break; + case BTGreaterEqualStrategyNumber: + *incl = true; + /* fall through */ + case BTGreaterStrategyNumber: + result = PART_OP_GREATER; + break; + } + break; + + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) key->strategy); + } + + return result; +} + +/* + * partkey_datum_from_expr + * Extract constant value from expr and set *datum to that value + */ +static bool +partkey_datum_from_expr(PartitionKey key, int partkeyidx, + Expr *expr, Datum *value) +{ + Oid exprtype = exprType((Node *) expr); + + if (exprtype != key->parttypid[partkeyidx]) + { + ParseState *pstate = make_parsestate(NULL); + + expr = (Expr *) coerce_to_target_type(pstate, (Node *) expr, + exprtype, + key->parttypid[partkeyidx], -1, + COERCION_EXPLICIT, + COERCE_IMPLICIT_CAST, -1); + free_parsestate(pstate); + + /* + * If we couldn't coerce to the partition key's type, that is, the + * type of the datums stored in PartitionBoundInfo for this partition + * key, there's no hope of using this expression for anything + * partitioning-related. + */ + if (expr == NULL) + return false; + + /* + * Transform into a form that the following code can do something + * useful with. + */ + expr = evaluate_expr(expr, + exprType((Node *) expr), + exprTypmod((Node *) expr), + exprCollation((Node *) expr)); + } + + /* + * Add more expression types here as needed to support higher-level + * code. + */ + if (IsA(expr, Const)) + { + *value = ((Const *) expr)->constvalue; + return true; + } + + return false; +} + +/* + * For a given partition key column, find the most restrictive of the clauses + * contained in all_clauses that are known to match the column. If in the + * process, it is found that two clauses are mutually contradictory, we simply + * stop, set *constfalse to true, and return. + */ +static void +remove_redundant_clauses(PartitionKey partkey, int partkeyidx, + List *all_clauses, List **result, + bool *constfalse) +{ + PartClause *hash_clause, + *btree_clauses[BTMaxStrategyNumber]; + ListCell *lc; + int s; + bool test_result; + + *result = NIL; + + hash_clause = NULL; + memset(btree_clauses, 0, sizeof(btree_clauses)); + foreach(lc, all_clauses) + { + PartClause *cur = lfirst(lc); + + if (!cur->valid_cache) + { + Oid lefttype; + + get_op_opfamily_properties(cur->op->opno, + partkey->partopfamily[partkeyidx], + false, + &cur->op_strategy, + &lefttype, + &cur->op_subtype); + fmgr_info(get_opcode(cur->op->opno), &cur->op_func); + cur->valid_cache = true; + } + + /* + * Hash-partitioning knows only about equality. So, if we've matched + * a clause and found another whose constant operand doesn't match + * the constant operand of the former, we have a case of mutually + * contradictory clauses. + */ + if (partkey->strategy == PARTITION_STRATEGY_HASH) + { + if (hash_clause == NULL) + hash_clause = cur; + /* check if another clause would contradict the one we have */ + else if (partition_cmp_args(partkey, partkeyidx, + cur, cur, hash_clause, + &test_result)) + { + if (!test_result) + { + *constfalse = true; + return; + } + } + /* + * Couldn't compare; keep hash_clause set to the previous value, + * and add this one directly to the result. Caller would + * arbitrarily choose one of the many and perform + * partition-pruning with the same. It's possible that mutual + * contradiction is proved at some higher level, but it's just + * that we couldn't do so here. + */ + else + *result = lappend(*result, cur); + + /* + * The code below handles btree operators, so not relevant for + * hash partitioning. + */ + continue; + } + + /* + * The code that follows closely mimics similar processing done by + * nbtutils.c: _bt_preprocess_keys(). + * + * btree_clauses[s] points to the currently best scan key of strategy + * type s+1; it is NULL if we haven't yet found such a key for this + * attr. + */ + s = cur->op_strategy - 1; + if (btree_clauses[s] == NULL) + { + btree_clauses[s] = cur; + } + else + { + /* + * Is this one more restrictive than what we already have? + * + * Consider some examples: 1. If btree_clauses[BTLT] now contains + * a < 5, and cur is a < 3, then because 3 < 5 is true, a < 5 + * currently at btree_clauses[BTLT] will be replaced by a < 3. + * + * 2. If btree_clauses[BTEQ] now contains a = 5 and cur is a = 7, + * then because 5 = 7 is false, we found a mutual contradiction, + * so we set *constfalse to true and return. + * + * 3. If btree_clauses[BTLT] now contains a < 5 and cur is a < 7, + * then because 7 < 5 is false, we leave a < 5 where it is and + * effectively discard a < 7 as being redundant. + */ + if (partition_cmp_args(partkey, partkeyidx, + cur, cur, btree_clauses[s], + &test_result)) + { + /* cur is more restrictive, replace old key. */ + if (test_result) + btree_clauses[s] = cur; + else if (s == BTEqualStrategyNumber - 1) + { + *constfalse = true; + return; + } + + /* The old key is more restrictive, keep around. */ + } + else + { + /* + * we couldn't determine which one is more restrictive. Keep + * the previous one in btree_clauses[s] and push this one directly + * to the output list. + */ + *result = lappend(*result, cur); + } + } + } + + if (partkey->strategy == PARTITION_STRATEGY_HASH) + { + /* Note we didn't add this one to the result yet. */ + if (hash_clause) + *result = lappend(*result, hash_clause); + return; + } + + /* Compare btree operator clauses across strategies. */ + + /* Compare the equal key with keys of other strategies. */ + if (btree_clauses[BTEqualStrategyNumber - 1]) + { + PartClause *eq = btree_clauses[BTEqualStrategyNumber - 1]; + + for (s = 0; s < BTMaxStrategyNumber; s++) + { + PartClause *chk = btree_clauses[s]; + + if (!chk || s == (BTEqualStrategyNumber - 1)) + continue; + + /* + * Suppose btree_clauses[BTLT] contained a < 5 and the eq key is + * a = 5, then because 5 < 5 is false, we found contradiction. + * That is, a < 5 and a = 5 are mutually contradictory. OTOH, if + * eq key is a = 3, then because 3 < 5, we no longer need a < 5, + * because a = 3 is more restrictive. + */ + if (partition_cmp_args(partkey, partkeyidx, + chk, eq, chk, + &test_result)) + { + if (!test_result) + { + *constfalse = true; + return; + } + /* discard the redundant key. */ + btree_clauses[s] = NULL; + } + } + } + + /* + * Try to keep only one of <, <=. + * + * Suppose btree_clauses[BTLT] contains a < 3 and btree_clauses[BTLE] + * contains a <= 3 (or a <= 4), then because 3 <= 3 (or 3 <= 4) is true, + * we discard the a <= 3 (or a <= 4) as redundant. If the latter contains + * contains a <= 2, then because 3 <= 2 is false, we dicard a < 3 as + * redundant. + */ + if (btree_clauses[BTLessStrategyNumber - 1] && + btree_clauses[BTLessEqualStrategyNumber - 1]) + { + PartClause *lt = btree_clauses[BTLessStrategyNumber - 1], + *le = btree_clauses[BTLessEqualStrategyNumber - 1]; + + if (partition_cmp_args(partkey, partkeyidx, + le, lt, le, + &test_result)) + { + if (test_result) + btree_clauses[BTLessEqualStrategyNumber - 1] = NULL; + else + btree_clauses[BTLessStrategyNumber - 1] = NULL; + } + } + + /* Try to keep only one of >, >=. See the example above. */ + if (btree_clauses[BTGreaterStrategyNumber - 1] && + btree_clauses[BTGreaterEqualStrategyNumber - 1]) + { + PartClause *gt = btree_clauses[BTGreaterStrategyNumber - 1], + *ge = btree_clauses[BTGreaterEqualStrategyNumber - 1]; + + if (partition_cmp_args(partkey, partkeyidx, + ge, gt, ge, + &test_result)) + { + if (test_result) + btree_clauses[BTGreaterEqualStrategyNumber - 1] = NULL; + else + btree_clauses[BTGreaterStrategyNumber - 1] = NULL; + } + } + + /* + * btree_clauses now contains the "best" clause or NULL for each btree + * strategy number. Add to the result. + */ + for (s = 0; s < BTMaxStrategyNumber; s++) + if (btree_clauses[s]) + *result = lappend(*result, btree_clauses[s]); +} + +/* + * Evaluate 'leftarg op rightarg' and set *result to its value. + * + * leftarg and rightarg referred to above actually refer to the constant + * operand (Datum) of the clause contained in the parameters leftarg and + * rightarg below, respectively. And op refers to the operator of the + * clause contained in the parameter op below. + * + * Returns true if we could actually perform the evaluation. False is + * returned otherwise, that is, in cases where we couldn't perform the + * evaluation for reasons such as operands values being unavailable or + * types of operands being incompatible with the operator. + */ +static bool +partition_cmp_args(PartitionKey key, int partkeyidx, + PartClause *op, PartClause *leftarg, PartClause *rightarg, + bool *result) +{ + Oid partopfamily = key->partopfamily[partkeyidx]; + Datum leftarg_const, + rightarg_const; + + Assert(op->valid_cache && leftarg->valid_cache && rightarg->valid_cache); + /* Get the constant values from the operands */ + if (!partkey_datum_from_expr(key, partkeyidx, + leftarg->constarg, &leftarg_const)) + return false; + if (!partkey_datum_from_expr(key, partkeyidx, + rightarg->constarg, &rightarg_const)) + return false; + + /* + * If the leftarg_const and rightarg_const are both of the type expected + * by op's operator, then compare them using the latter. + */ + if (leftarg->op_subtype == op->op_subtype && + rightarg->op_subtype == op->op_subtype) + { + *result = DatumGetBool(FunctionCall2Coll(&op->op_func, + op->op->inputcollid, + leftarg_const, + rightarg_const)); + return true; + } + else + { + /* Otherwise, look one up in the partitioning operator family. */ + Oid cmp_op = get_opfamily_member(partopfamily, + leftarg->op_subtype, + rightarg->op_subtype, + op->op_strategy); + if (OidIsValid(cmp_op)) + { + *result = DatumGetBool(OidFunctionCall2Coll(get_opcode(cmp_op), + op->op->inputcollid, + leftarg_const, + rightarg_const)); + return true; + } + } + + /* Couldn't do the comparison. */ + *result = false; + return false; +} + +/* + * get_partitions_for_keys + * Returns the partitions that will need to be scanned for the given + * bounding keys + * + * Input: + * See the comments above the definition of PartScanKeyInfo to see what + * kind of information is received here. + * + * Outputs: + * Partition set satisfying the keys. + */ +static Bitmapset * +get_partitions_for_keys(Relation rel, PartScanKeyInfo *keys) +{ + /* Return an empty set if no partitions to see. */ + if (RelationGetPartitionDesc(rel)->nparts == 0) + return NULL; + + switch (RelationGetPartitionKey(rel)->strategy) + { + case PARTITION_STRATEGY_HASH: + return get_partitions_for_keys_hash(rel, keys); + + case PARTITION_STRATEGY_LIST: + return get_partitions_for_keys_list(rel, keys); + + case PARTITION_STRATEGY_RANGE: + return get_partitions_for_keys_range(rel, keys); + + default: + elog(ERROR, "unexpected partition strategy: %d", + RelationGetPartitionKey(rel)->strategy); + } + + return NULL; /* keep compiler quiet */ +} + +/* + * get_partitions_for_keys_hash + * Return partitions of a hash partitioned table for requested + * keys + * + * This interprets the keys and looks up the partition bound descriptor + * using the hash partitioning semantics. + */ +static Bitmapset * +get_partitions_for_keys_hash(Relation rel, PartScanKeyInfo *keys) +{ + PartitionKey partkey = RelationGetPartitionKey(rel); + PartitionDesc partdesc = RelationGetPartitionDesc(rel); + PartitionBoundInfo boundinfo = partdesc->boundinfo; + bool keyisnull[PARTITION_MAX_KEYS]; + int i; + + Assert(partdesc->nparts > 0); + + /* + * Hash partitioning stores partition keys containing nulls in regular + * partitions. That is, the code that determines the hash partition for + * a given row admits nulls in the partition key when computing the key's + * hash. So, here we treat any IS NULL clauses on partition key columns as + * equality keys, along with any other non-null values coming from equality + * operator clauses. + */ + memset(keyisnull, false, sizeof(keyisnull)); + for (i = 0; i < partkey->partnatts; i++) + { + if (bms_is_member(i, keys->keyisnull)) + { + keys->n_eqkeys++; + keyisnull[i] = true; + } + } + + /* + * Can only do pruning if we know all the keys and they're all equality + * keys including the nulls that we just counted above. + */ + if (keys->n_eqkeys == partkey->partnatts) + { + uint64 rowHash; + int greatest_modulus = get_greatest_modulus(boundinfo), + result_index; + + rowHash = compute_hash_value(partkey, keys->eqkeys, keyisnull); + result_index = boundinfo->indexes[rowHash % greatest_modulus]; + if (result_index >= 0) + return bms_make_singleton(result_index); + } + else + /* Can't do pruning otherwise, so return all partitions. */ + return bms_add_range(NULL, 0, partdesc->nparts - 1); + + return NULL; +} + +/* + * get_partitions_for_keys_list + * Return partitions of a list partitioned table for requested keys + * + * This interprets the keys and looks up the partition bound descriptor using + * the list partitioning semantics. + */ +static Bitmapset * +get_partitions_for_keys_list(Relation rel, PartScanKeyInfo *keys) +{ + PartitionKey partkey = RelationGetPartitionKey(rel); + PartitionBoundInfo boundinfo = RelationGetPartitionDesc(rel)->boundinfo; + PartitionBoundCmpArg arg; + int i, + eqoff, + minoff, + maxoff; + bool is_equal; + + Assert(RelationGetPartitionDesc(rel)->nparts > 0); + Assert(partkey->partnatts == 1); + + /* + * We might be able to get the answer sooner based on the nullness of + * keys, so get that out of the way. + */ + if (!bms_is_empty(keys->keyisnull)) + { + int other_idx = -1; + + /* + * Only a designated partition accepts nulls, which if there + * exists one, return the same. + */ + if (partition_bound_accepts_nulls(boundinfo) || + partition_bound_has_default(boundinfo)) + other_idx = partition_bound_accepts_nulls(boundinfo) + ? boundinfo->null_index + : boundinfo->default_index; + if (other_idx >= 0) + return bms_make_singleton(other_idx); + else + return NULL; + } + + /* + * If there are no datums to compare keys with, but there exist + * partitions, it must be the default partition. + */ + if (boundinfo->ndatums == 0) + { + if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(boundinfo->default_index); + else + return NULL; + } + + if (keys->n_eqkeys == partkey->partnatts) + { + /* Look up using binary search if eqkeys matches any of the datums. */ + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.datums = keys->eqkeys; + arg.ndatums = keys->n_eqkeys; + eqoff = partition_bound_bsearch(partkey, boundinfo, &arg, &is_equal); + + if (eqoff >= 0 && is_equal) + { + Assert(boundinfo->indexes[eqoff] >= 0); + return bms_make_singleton(boundinfo->indexes[eqoff]); + } + else if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(boundinfo->default_index); + else + return NULL; + } + + /* + * Find the leftmost bound that satisfies the query, i.e., the one that + * satisfies minkeys. + */ + minoff = 0; + if (keys->n_minkeys > 0) + { + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.datums = keys->minkeys; + arg.ndatums = keys->n_minkeys; + minoff = partition_bound_bsearch(partkey, boundinfo, &arg, &is_equal); + + /* + * minoff set to -1 means all datums are greater than minkeys, which + * means all partitions satisfy minkeys. In that case, set minoff to + * the index of the leftmost datum, viz. 0. + * + * If the bound at minoff doesn't exactly match minkey or if it does, + * but minkey isn't inclusive, move to the bound on the right. + */ + if (minoff == -1 || !is_equal || !keys->min_incl) + minoff++; + + /* + * boundinfo->ndatums - 1 is the last valid list partition datums + * index. + */ + if (minoff > boundinfo->ndatums - 1) + minoff = -1; + } + + /* + * Find the rightmost bound that satisfies the query, i.e., one that + * satisfies maxkeys. + */ + maxoff = boundinfo->ndatums - 1; + if (keys->n_maxkeys > 0) + { + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.datums = keys->maxkeys; + arg.ndatums = keys->n_maxkeys; + maxoff = partition_bound_bsearch(partkey, boundinfo, &arg, &is_equal); + + /* + * Unlike minoff, we leave maxoff that is set to -1 unchanged, because + * it simply means none of the partitions satisfies maxkeys. + * + * If the bound at maxoff exactly matches maxkey (is_equal), but the + * maxkey is not inclusive, then go to the bound on left. + */ + if (is_equal && !keys->max_incl) + maxoff--; + } + + /* + * minoff or maxoff set to -1 means none of the datums in + * PartitionBoundInfo satisfies both minkeys and maxkeys. If both are set + * to a valid datum offset, that means there exists at least some datums + * (and hence partitions) satisfying both minkeys and maxkeys. + */ + if (minoff >= 0 && maxoff >= 0) + { + Bitmapset *result = NULL; + + /* + * All datums between those at minoff and maxoff satisfy the query + * keys, so add the corresponding partitions to the result set. + */ + for (i = minoff; i <= maxoff; i++) + result = bms_add_member(result, boundinfo->indexes[i]); + + /* + * For range queries, always include the default list partition, + * because list partitions divide the key space in a discontinuous + * manner, not all values in the given range will have a partition + * assigned. + */ + if (partition_bound_has_default(boundinfo)) + return bms_add_member(result, boundinfo->default_index); + else + return result; + } + else if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(boundinfo->default_index); + + return NULL; +} + +/* + * get_partitions_for_keys_range + * Return partitions of a ranget partitioned table for requested keys + * + * This interprets the keys and looks up the partition bound descriptor using + * the range partitioning semantics. + */ +static Bitmapset * +get_partitions_for_keys_range(Relation rel, PartScanKeyInfo *keys) +{ + PartitionKey partkey = RelationGetPartitionKey(rel); + PartitionBoundInfo boundinfo = RelationGetPartitionDesc(rel)->boundinfo; + PartitionBoundCmpArg arg; + int i, + eqoff, + minoff, + maxoff; + bool is_equal; + + Assert(RelationGetPartitionDesc(rel)->nparts > 0); + + /* + * We might be able to get the answer sooner based on the nullness of + * keys, so get that out of the way. + */ + for (i = 0; i < partkey->partnatts; i++) + { + if (bms_is_member(i, keys->keyisnull)) + { + /* Only the default partition accepts nulls. */ + if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(boundinfo->default_index); + else + return NULL; + } + } + + /* + * If there are no datums to compare keys with, but there exist + * partitions, it must be the default partition. + */ + if (boundinfo->ndatums == 0) + { + if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(boundinfo->default_index); + else + return NULL; + } + + if (keys->n_eqkeys == partkey->partnatts) + { + /* Look up using binary search if eqkeys matches any of the datums. */ + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.datums = keys->eqkeys; + arg.ndatums = keys->n_eqkeys; + eqoff = partition_bound_bsearch(partkey, boundinfo, &arg, &is_equal); + + /* + * The bound at eqoff is known to be <= eqkeys, given the way + * partition_bound_bsearch works. Considering the same as the lower + * bound of the partition that eqkeys falls into, the bound at + * eqoff + 1 would be its upper bound, so use eqoff + 1 to get the + * desired partition's index. + */ + if (eqoff >= 0 && boundinfo->indexes[eqoff + 1] >= 0) + return bms_make_singleton(boundinfo->indexes[eqoff+1]); + /* + * eqkeys falls into a range of values for which no non-default + * partition exists. + */ + else if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(boundinfo->default_index); + else + return NULL; + } + + /* + * Find the leftmost bound that satisfies the query, i.e., the one that + * satisfies minkeys. + */ + minoff = 0; + if (keys->n_minkeys > 0) + { + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.datums = keys->minkeys; + arg.ndatums = keys->n_minkeys; + minoff = partition_bound_bsearch(partkey, boundinfo, &arg, &is_equal); + + /* + * If only a prefix of the whole partition key is provided, there may + * be multiple partitions whose bound share the same prefix. If minkey + * is inclusive, we must make minoff point to the leftmost such bound, + * making the result contain all such partitions. If it is exclusive, + * we must move minoff to the right such that minoff points to the + * first partition whose bound is greater than this prefix, thus + * excluding all aforementioned partitions from appearing in the + * result. + */ + if (is_equal && arg.ndatums < partkey->partnatts) + { + int32 cmpval; + + is_equal = false; + do + { + if (keys->min_incl) + minoff -= 1; + else + minoff += 1; + if (minoff < 0 || minoff >= boundinfo->ndatums) + break; + cmpval = partition_bound_cmp(partkey, boundinfo, minoff, + &arg); + } while (cmpval == 0); + + /* Back up if went too far. */ + if (!keys->min_incl) + minoff -= 1; + } + + /* + * At this point, minoff gives us the leftmost bound that is known to + * be <= query's minkey. The bound at minoff + 1 (if there is one), + * then, would be the upper bound of the leftmost partition that needs + * to be scanned. + */ + minoff += 1; + } + + /* + * Find the rightmost bound that satisfies the query, i.e., one that + * satisfies maxkeys. + * + * 1 more index than range partition datums + */ + maxoff = boundinfo->ndatums; + if (keys->n_maxkeys > 0) + { + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.datums = keys->maxkeys; + arg.ndatums = keys->n_maxkeys; + maxoff = partition_bound_bsearch(partkey, boundinfo, &arg, &is_equal); + + /* See the comment above for minkeys. */ + if (is_equal && arg.ndatums < partkey->partnatts) + { + int32 cmpval; + + is_equal = false; + do + { + if (keys->max_incl) + maxoff += 1; + else + maxoff -= 1; + if (maxoff < 0 || maxoff >= boundinfo->ndatums) + break; + cmpval = partition_bound_cmp(partkey, boundinfo, maxoff, + &arg); + } while (cmpval == 0); + + /* Back up if went too far. */ + if (keys->max_incl) + maxoff -= 1; + } + + /* + * At this point, maxoff gives us the rightmost bound that is known to + * be <= query's maxkey. The bound at maxoff+1, then, would be the + * upper bound of the rightmost partition that needs to be scanned. + * Although, if the bound is equal to maxkeys and the latter is not + * inclusive, then the bound at maxoff itself is the upper bound of + * the rightmost partition that needs to be scanned. + */ + if (!is_equal || keys->max_incl) + maxoff += 1; + } + + /* + * minoff or maxoff set to -1 means none of the datums in + * PartitionBoundInfo satisfies both minkeys and maxkeys. If both are set + * to a valid datum offset, that means there exists at least some + * datums (and hence partitions) satisfying both minkeys and maxkeys. + */ + if (minoff >= 0 && maxoff >= 0) + { + bool include_def = false; + Bitmapset *result = NULL; + + + /* + * If the bound at minoff or maxoff looks like it's an upper bound of + * an unassigned range of values, move to the adjacent bound which must + * be the upper bound of the leftmost or rightmost partition, + * respectively, that needs to be scanned. + * + * By doing that, we skip over a portion of values that do indeed + * satisfy the query, but don't have a valid partition assigned. The + * default partition will have to be included to cover those values. + * Although, if the original bound in question is an infinite value, + * there would not be any unassigned range to speak of, because the + * range is unbounded in that direction by definition, so no need to + * include the default. + */ + if (boundinfo->indexes[minoff] < 0) + { + int lastkey = partkey->partnatts - 1; + + if (keys->n_minkeys > 0) + lastkey = keys->n_minkeys - 1; + if (minoff >=0 && + minoff < boundinfo->ndatums && + boundinfo->kind[minoff][lastkey] == PARTITION_RANGE_DATUM_VALUE) + { + include_def = true; + } + minoff += 1; + } + + if (maxoff >= 1 && boundinfo->indexes[maxoff] < 0) + { + int lastkey = partkey->partnatts - 1; + + if (keys->n_maxkeys > 0) + lastkey = keys->n_maxkeys - 1; + if (maxoff >=0 && + maxoff <= boundinfo->ndatums && + boundinfo->kind[maxoff - 1][lastkey] == PARTITION_RANGE_DATUM_VALUE) + { + include_def = true; + } + maxoff -= 1; + } + + if (minoff <= maxoff) + result = bms_add_range(result, + boundinfo->indexes[minoff], + boundinfo->indexes[maxoff]); + /* + * There might exist a range of values unassigned to any non-default + * range partition between the datums at minoff and maxoff. + */ + for (i = minoff; i <= maxoff; i++) + { + if (boundinfo->indexes[i] < 0) + { + include_def = true; + break; + } + } + + /* + * Since partition keys with nulls are mapped to the default range + * partition, we must include the default partition if some keys + * could be null. + */ + if (keys->n_minkeys < partkey->partnatts || + keys->n_maxkeys < partkey->partnatts) + { + for (i = 0; i < partkey->partnatts; i++) + { + if (!bms_is_member(i, keys->keyisnotnull)) + { + include_def = true; + break; + } + } + } + + if (include_def && partition_bound_has_default(boundinfo)) + return bms_add_member(result, boundinfo->default_index); + else + return result; + } + else if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(boundinfo->default_index); + + Assert(false); + return NULL; +} + +/* * get_partition_operator * * Return oid of the operator of given strategy for a given partition key diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index cf38b4eb5e..ccfae4f31e 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -152,8 +152,6 @@ static Node *substitute_actual_parameters(Node *expr, int nargs, List *args, static Node *substitute_actual_parameters_mutator(Node *node, substitute_actual_parameters_context *context); static void sql_inline_error_callback(void *arg); -static Expr *evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod, - Oid result_collation); static Query *substitute_actual_srf_parameters(Query *expr, int nargs, List *args); static Node *substitute_actual_srf_parameters_mutator(Node *node, @@ -4833,7 +4831,7 @@ sql_inline_error_callback(void *arg) * We use the executor's routine ExecEvalExpr() to avoid duplication of * code and ensure we get the same result as the executor would get. */ -static Expr * +Expr * evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod, Oid result_collation) { diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h index 2faf0ca26e..8423c6e886 100644 --- a/src/include/catalog/partition.h +++ b/src/include/catalog/partition.h @@ -73,4 +73,7 @@ extern List *get_proposed_default_constraint(List *new_part_constaints); extern int get_partition_for_tuple(Relation relation, Datum *values, bool *isnull); +/* For partition-pruning */ +extern Bitmapset *get_partitions_from_clauses(Relation relation, int rt_index, + List *partclauses); #endif /* PARTITION_H */ diff --git a/src/include/catalog/pg_opfamily.h b/src/include/catalog/pg_opfamily.h index b544474254..0847df97ff 100644 --- a/src/include/catalog/pg_opfamily.h +++ b/src/include/catalog/pg_opfamily.h @@ -188,4 +188,7 @@ DATA(insert OID = 4104 ( 3580 box_inclusion_ops PGNSP PGUID )); DATA(insert OID = 5000 ( 4000 box_ops PGNSP PGUID )); DATA(insert OID = 5008 ( 4000 poly_ops PGNSP PGUID )); +#define IsBooleanOpfamily(opfamily) \ + ((opfamily) == BOOL_BTREE_FAM_OID || (opfamily) == BOOL_HASH_FAM_OID) + #endif /* PG_OPFAMILY_H */ diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index ba4fa4b68b..3c2f54964b 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -84,5 +84,7 @@ extern Node *estimate_expression_value(PlannerInfo *root, Node *node); extern Query *inline_set_returning_function(PlannerInfo *root, RangeTblEntry *rte); +extern Expr *evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod, + Oid result_collation); #endif /* CLAUSES_H */ -- 2.11.0