From dec7931f2cca258a26d3d04afd3b89be333d5836 Mon Sep 17 00:00:00 2001 From: amit Date: Wed, 18 Oct 2017 17:14:53 +0900 Subject: [PATCH 5/5] Implement get_partitions_for_keys Disable constraint_exclusion using internal partition constraints. --- src/backend/catalog/partition.c | 376 +++++++++++++++++++++++++++++++- src/test/regress/expected/inherit.out | 4 +- src/test/regress/expected/partition.out | 72 ++---- 3 files changed, 398 insertions(+), 54 deletions(-) diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index caf52f4210..6b663bb11f 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -2622,7 +2622,381 @@ partition_cmp_args(Oid partopfamily, Oid partopcintype, static PartitionSet * get_partitions_for_keys(Relation rel, PartScanKeyInfo *keys) { - return partset_new(false, true); + PartitionSet *partset; + int i, + eqoff, + minoff, + maxoff; + PartitionKey partkey = RelationGetPartitionKey(rel); + PartitionDesc partdesc = RelationGetPartitionDesc(rel); + PartitionBoundInfo boundinfo = partdesc->boundinfo; + PartitionBoundCmpArg arg; + bool is_equal; + + /* Return an empty set if no partitions to see. */ + if (partdesc->nparts == 0) + return partset_new(true, false); + + /* + * Initialize the set as one that's neither empty nor contains all + * partitions. The code below will set min_part_idx and max_part_idx + * and/or other_parts as found out by comparing keys to the partition + * bounds, as well as considering special partitions like null-accepting + * and default partitions. If it turns out that no partitions need to + * be scanned, partset->empty will be set to true. + */ + partset = partset_new(false, false); + + /* + * Check if any of the scan keys are null. If so, return the only + * null-accepting partition if boundinfo says there is one. + */ + for (i = 0; i < partkey->partnatts; i++) + { + if (keys->keynullness[i] == IS_NULL) + { + int other_idx = -1; + + /* + * Note that only one of the null-accepting partition and the + * default partition can be holding null values at any given + * time. + */ + if (partition_bound_accepts_nulls(boundinfo)|| + partition_bound_has_default(boundinfo)) + other_idx = partition_bound_accepts_nulls(boundinfo) + ? boundinfo->null_index + : boundinfo->default_index; + if (other_idx >= 0) + partset->other_parts = bms_make_singleton(other_idx); + + return partset; + } + } + + /* + * If there are no datums to compare keys with, but there exists a + * partition, the latter must be a partition that accepts only nulls + * or a default partition. If it is the former and we didn't already + * return it as the only scannable partition, that means the query + * doesn't want null values in its output. So, all of what the query + * wants instead must be in the default partition. + */ + if (boundinfo->ndatums == 0) + { + if (partition_bound_has_default(boundinfo)) + partset->other_parts = + bms_make_singleton(boundinfo->default_index); + else + partset->empty = true; + return partset; + } + /* No bounding keys, so just return all partitions. */ + else if (keys->n_eqkeys + keys->n_minkeys + keys->n_maxkeys == 0) + { + partset->all_parts = true; + return partset; + } + + /* Valid keys->eqkeys must provide all partition keys. */ + Assert(keys->n_eqkeys == 0 || keys->n_eqkeys == partkey->partnatts); + if (keys->n_eqkeys > 0) + { + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.datums = keys->eqkeys; + arg.ndatums = keys->n_eqkeys; + eqoff = partition_bound_bsearch(partkey, boundinfo, &arg, &is_equal); + + if (eqoff >= 0) + { + switch (partkey->strategy) + { + case PARTITION_STRATEGY_LIST: + /* For list partition, must exactly match the datum. */ + if (!is_equal) + eqoff = -1; + break; + + case PARTITION_STRATEGY_RANGE: + /* + * eqoff is gives us the bound that is known to be <= + * eqkeys given how partition_bound_bsearch works. The + * bound at eqoff + 1, then, would be the upper bound of + * the only partition that needs to be scanned. + */ + if (partkey->strategy == PARTITION_STRATEGY_RANGE) + eqoff += 1; + } + } + + /* + * Ask later code to include the default partition, because eqkeys + * didn't identify a specific partition or identified a range + * of unassigned values. + */ + if (eqoff >= 0 && boundinfo->indexes[eqoff] >= 0) + partset->other_parts = + bms_make_singleton(boundinfo->indexes[eqoff]); + else if (partition_bound_has_default(boundinfo)) + partset->other_parts = + bms_make_singleton(boundinfo->default_index); + else + partset->empty = true; + + /* There are no minkeys and maxkeys when eqkeys is valid. */ + return partset; + } + + /* + * Find the leftmost bound that satisfies the query, i.e., one that + * satisfies minkeys. + */ + minoff = 0; + if (keys->n_minkeys > 0) + { + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.datums = keys->minkeys; + arg.ndatums = keys->n_minkeys; + minoff = partition_bound_bsearch(partkey, boundinfo, &arg, &is_equal); + + switch (partkey->strategy) + { + case PARTITION_STRATEGY_LIST: + /* + * minkeys matched one of the datums (because, is_equal), but + * the query may have asked to exclude that value. If so, + * move to the bound on the right, which doesn't necessarily + * mean we're excluding the list partition containing that + * value, because there very well might be values in the range + * thus selected that belong to the partition to which the + * matched value (minkeys) also belongs. + */ + if (is_equal && !keys->min_incl) + minoff++; + break; + + case PARTITION_STRATEGY_RANGE: + /* + * If only a prefix of the whole partition key is provided, + * there will be multiple partitions whose bound share the + * same prefix. If minkey is inclusive, we must make minoff + * point to the leftmost such bound, making the result contain + * all such partitions. If it is exclusive, we must move + * minoff to the right such that minoff points to the first + * partition whose bound is greater than this prefix, thus + * excluding all aforementioned partitions from appearing in + * the result. + */ + if (is_equal && arg.ndatums < partkey->partnatts) + { + int32 cmpval; + + is_equal = false; + do + { + if (keys->min_incl) + minoff -= 1; + else + minoff += 1; + if (minoff < 0 || minoff >= boundinfo->ndatums) + break; + cmpval = partition_bound_cmp(partkey, boundinfo, + minoff, &arg); + } while (cmpval == 0); + + /* Back up if went too far. */ + if (!keys->min_incl) + minoff -= 1; + } + + /* + * At this point, minoff gives us the leftmost bound that is + * known to be <= query's minkey. The bound at minoff + 1 (if + * there is one), then, would be the upper bound of the + * leftmost partition that needs to be scanned. + */ + if (minoff < boundinfo->ndatums - 1) + minoff += 1; + break; + } + } + + /* + * Find the rightmost bound that satisfies the query, i.e., one that + * satisfies maxkeys. + */ + if (partkey->strategy == PARTITION_STRATEGY_RANGE) + /* 1 more index than datums in this case */ + maxoff = boundinfo->ndatums; + else + maxoff = boundinfo->ndatums - 1; + if (keys->n_maxkeys > 0) + { + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.datums = keys->maxkeys; + arg.ndatums = keys->n_maxkeys; + maxoff = partition_bound_bsearch(partkey, boundinfo, &arg, &is_equal); + + switch (partkey->strategy) + { + case PARTITION_STRATEGY_LIST: + /* See the comment above for minkeys. */ + if (is_equal && !keys->max_incl) + maxoff--; + break; + + case PARTITION_STRATEGY_RANGE: + /* See the comment above for minkeys. */ + if (is_equal && arg.ndatums < partkey->partnatts) + { + int32 cmpval; + + is_equal = false; + do + { + if (keys->max_incl) + maxoff += 1; + else + maxoff -= 1; + if (maxoff < 0 || maxoff >= boundinfo->ndatums) + break; + cmpval = partition_bound_cmp(partkey, boundinfo, + maxoff, &arg); + } while (cmpval == 0); + + /* Back up if went too far. */ + if (keys->max_incl) + maxoff -= 1; + } + + /* + * At this point, maxoff gives us the rightmost bound that is + * known to be <= query's maxkey. The bound at maxoff + 1, + * then, would be the upper bound of the rightmost partition + * that needs to be scanned. Although, if the bound is equal + * to maxkeys and the latter is not inclusive, then the bound + * at maxoff itself is the upper bound of the rightmost + * partition that needs to be scanned. + */ + if (!is_equal || keys->max_incl) + maxoff += 1; + + break; + } + } + + /* + * minoff/maxoff set to -1 means none of the datums in PartitionBoundInfo + * satisfies minkeys/maxkeys. If both are set to a valid datum offset, + * that means there exists at least some datums (and hence partitions) + * satisfying both minkeys and maxkeys. + */ + if (minoff >= 0 && maxoff >= 0) + { + bool list_include_def = false, + range_include_def = false; + + switch (partkey->strategy) + { + case PARTITION_STRATEGY_LIST: + /* + * Add to the other_parts, list partition indexes are not + * monotonously increasing like range partitions' are. + */ + for (i = minoff; i <= maxoff; i++) + partset->other_parts = + bms_add_member(partset->other_parts, + boundinfo->indexes[i]); + + /* + * If the query doesn't specify either the lower or the upper + * bound, consider including the default partition in the + * result set, because the existing partitions may not cover + * all of the values that such an unbounded range contains. + * + * Also, if minoff != maxoff, there might be datums in that + * range that don't have a non-default partition assigned. + */ + if (keys->n_minkeys == 0 || keys->n_maxkeys == 0 || + minoff != maxoff) + list_include_def = true; + break; + + case PARTITION_STRATEGY_RANGE: + /* + * If the bound at minoff or maxoff looks like it's an upper + * bound of an unassigned range of values, move to the + * adjacent bound which must be the upper bound of the + * leftmost or rightmost partition, respectively, that needs + * to be scanned. + * + * By doing that, we skip over a portion of values that do + * indeed satisfy the query, but don't have a valid partition + * assigned. The default partition would've been included to + * cover those values. Although, if the original bound in + * question is an infinite value, there would not be any + * unassigned range to speak of, because the range is unbounded + * in that direction by definition, so no need to include the + * default. + */ + if (boundinfo->indexes[minoff] < 0) + { + int lastkey = partkey->partnatts - 1; + + if (keys->n_minkeys > 0) + lastkey = keys->n_minkeys - 1; + if (boundinfo->kind[minoff][lastkey] == + PARTITION_RANGE_DATUM_VALUE) + { + range_include_def = true; + } + minoff += 1; + } + + if (maxoff >= 1 && boundinfo->indexes[maxoff] < 0) + { + int lastkey = partkey->partnatts - 1; + + if (keys->n_maxkeys > 0) + lastkey = keys->n_maxkeys - 1; + if (boundinfo->kind[maxoff - 1][lastkey] == + PARTITION_RANGE_DATUM_VALUE) + { + range_include_def = true; + } + maxoff -= 1; + } + + partset->min_part_idx = boundinfo->indexes[minoff]; + partset->max_part_idx = boundinfo->indexes[maxoff]; + /* + * There might exist a range of values unassigned to any + * non-default range partition between the datums at + * minoff and maxoff. + */ + for (i = minoff; i <= maxoff; i++) + { + if (boundinfo->indexes[i] < 0) + { + range_include_def = true; + break; + } + } + break; + } + + if ((list_include_def || range_include_def) && + partition_bound_has_default(boundinfo)) + partset->other_parts = bms_add_member(partset->other_parts, + boundinfo->default_index); + } + else if (partition_bound_has_default(boundinfo)) + partset->other_parts = bms_add_member(partset->other_parts, + boundinfo->default_index); + else + partset->empty = true; + + return partset; } /* diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out index c698faff2f..661f137122 100644 --- a/src/test/regress/expected/inherit.out +++ b/src/test/regress/expected/inherit.out @@ -1713,11 +1713,9 @@ explain (costs off) select * from list_parted where a = 'ab' or a in (null, 'cd' Append -> Seq Scan on part_ab_cd Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[]))) - -> Seq Scan on part_ef_gh - Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[]))) -> Seq Scan on part_null_xy Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[]))) -(7 rows) +(5 rows) explain (costs off) select * from list_parted where a = 'ab'; QUERY PLAN diff --git a/src/test/regress/expected/partition.out b/src/test/regress/expected/partition.out index 44f8713319..21267dad98 100644 --- a/src/test/regress/expected/partition.out +++ b/src/test/regress/expected/partition.out @@ -207,16 +207,14 @@ explain (costs off) select * from rlp where 1 > a; /* commutates */ (3 rows) explain (costs off) select * from rlp where a <= 1; - QUERY PLAN ---------------------------------------- + QUERY PLAN +-------------------------- Append -> Seq Scan on rlp1 Filter: (a <= 1) -> Seq Scan on rlp2 Filter: (a <= 1) - -> Seq Scan on rlp_default_default - Filter: (a <= 1) -(7 rows) +(5 rows) explain (costs off) select * from rlp where a = 1; QUERY PLAN @@ -482,15 +480,13 @@ explain (costs off) select * from rlp where a <= 31; Filter: (a <= 31) -> Seq Scan on rlp5_1 Filter: (a <= 31) - -> Seq Scan on rlp5_default - Filter: (a <= 31) -> Seq Scan on rlp_default_10 Filter: (a <= 31) -> Seq Scan on rlp_default_30 Filter: (a <= 31) -> Seq Scan on rlp_default_default Filter: (a <= 31) -(29 rows) +(27 rows) explain (costs off) select * from rlp where a = 1 or a = 7; QUERY PLAN @@ -536,9 +532,7 @@ explain (costs off) select * from rlp where a > 20 and a < 27; Filter: ((a > 20) AND (a < 27)) -> Seq Scan on rlp4_2 Filter: ((a > 20) AND (a < 27)) - -> Seq Scan on rlp4_default - Filter: ((a > 20) AND (a < 27)) -(7 rows) +(5 rows) explain (costs off) select * from rlp where a = 29; QUERY PLAN @@ -612,8 +606,6 @@ explain (costs off) select * from rlp where (a = 1 and a = 3) or (a > 1 and a = QUERY PLAN ------------------------------------------------------------------- Append - -> Seq Scan on rlp2 - Filter: (((a = 1) AND (a = 3)) OR ((a > 1) AND (a = 15))) -> Seq Scan on rlp3abcd Filter: (((a = 1) AND (a = 3)) OR ((a > 1) AND (a = 15))) -> Seq Scan on rlp3efgh @@ -622,7 +614,7 @@ explain (costs off) select * from rlp where (a = 1 and a = 3) or (a > 1 and a = Filter: (((a = 1) AND (a = 3)) OR ((a > 1) AND (a = 15))) -> Seq Scan on rlp3_default Filter: (((a = 1) AND (a = 3)) OR ((a > 1) AND (a = 15))) -(11 rows) +(9 rows) -- multi-column keys create table mc3p (a int, b int, c int) partition by range (a, abs(b), c); @@ -636,16 +628,14 @@ create table mc3p5 partition of mc3p for values from (11, 1, 1) to (20, 10, 10); create table mc3p6 partition of mc3p for values from (20, 10, 10) to (20, 20, 20); create table mc3p7 partition of mc3p for values from (20, 20, 20) to (maxvalue, maxvalue, maxvalue); explain (costs off) select * from mc3p where a = 1; - QUERY PLAN --------------------------------- + QUERY PLAN +------------------------- Append -> Seq Scan on mc3p0 Filter: (a = 1) -> Seq Scan on mc3p1 Filter: (a = 1) - -> Seq Scan on mc3p_default - Filter: (a = 1) -(7 rows) +(5 rows) explain (costs off) select * from mc3p where a = 1 and abs(b) < 1; QUERY PLAN @@ -663,9 +653,7 @@ explain (costs off) select * from mc3p where a = 1 and abs(b) = 1; Filter: ((a = 1) AND (abs(b) = 1)) -> Seq Scan on mc3p1 Filter: ((a = 1) AND (abs(b) = 1)) - -> Seq Scan on mc3p_default - Filter: ((a = 1) AND (abs(b) = 1)) -(7 rows) +(5 rows) explain (costs off) select * from mc3p where a = 1 and abs(b) = 1 and c < 8; QUERY PLAN @@ -675,9 +663,7 @@ explain (costs off) select * from mc3p where a = 1 and abs(b) = 1 and c < 8; Filter: ((c < 8) AND (a = 1) AND (abs(b) = 1)) -> Seq Scan on mc3p1 Filter: ((c < 8) AND (a = 1) AND (abs(b) = 1)) - -> Seq Scan on mc3p_default - Filter: ((c < 8) AND (a = 1) AND (abs(b) = 1)) -(7 rows) +(5 rows) explain (costs off) select * from mc3p where a = 10 and abs(b) between 5 and 35; QUERY PLAN @@ -691,9 +677,7 @@ explain (costs off) select * from mc3p where a = 10 and abs(b) between 5 and 35; Filter: ((a = 10) AND (abs(b) >= 5) AND (abs(b) <= 35)) -> Seq Scan on mc3p4 Filter: ((a = 10) AND (abs(b) >= 5) AND (abs(b) <= 35)) - -> Seq Scan on mc3p_default - Filter: ((a = 10) AND (abs(b) >= 5) AND (abs(b) <= 35)) -(11 rows) +(9 rows) explain (costs off) select * from mc3p where a > 10; QUERY PLAN @@ -732,16 +716,14 @@ explain (costs off) select * from mc3p where a >= 10; (17 rows) explain (costs off) select * from mc3p where a < 10; - QUERY PLAN --------------------------------- + QUERY PLAN +-------------------------- Append -> Seq Scan on mc3p0 Filter: (a < 10) -> Seq Scan on mc3p1 Filter: (a < 10) - -> Seq Scan on mc3p_default - Filter: (a < 10) -(7 rows) +(5 rows) explain (costs off) select * from mc3p where a <= 10 and abs(b) < 10; QUERY PLAN @@ -753,9 +735,7 @@ explain (costs off) select * from mc3p where a <= 10 and abs(b) < 10; Filter: ((a <= 10) AND (abs(b) < 10)) -> Seq Scan on mc3p2 Filter: ((a <= 10) AND (abs(b) < 10)) - -> Seq Scan on mc3p_default - Filter: ((a <= 10) AND (abs(b) < 10)) -(9 rows) +(7 rows) explain (costs off) select * from mc3p where a = 11 and abs(b) = 0; /* empty */ QUERY PLAN @@ -782,8 +762,8 @@ explain (costs off) select * from mc3p where a > 20; (3 rows) explain (costs off) select * from mc3p where a >= 20; - QUERY PLAN --------------------------------- + QUERY PLAN +--------------------------- Append -> Seq Scan on mc3p5 Filter: (a >= 20) @@ -791,9 +771,7 @@ explain (costs off) select * from mc3p where a >= 20; Filter: (a >= 20) -> Seq Scan on mc3p7 Filter: (a >= 20) - -> Seq Scan on mc3p_default - Filter: (a >= 20) -(9 rows) +(7 rows) explain (costs off) select * from mc3p where (a = 1 and abs(b) = 1 and c = 1) or (a = 10 and abs(b) = 5 and c = 10) or (a > 11 and a < 20); QUERY PLAN @@ -833,9 +811,7 @@ explain (costs off) select * from mc3p where (a = 1 and abs(b) = 1 and c = 1) or Filter: (((a = 1) AND (abs(b) = 1) AND (c = 1)) OR ((a = 10) AND (abs(b) = 5) AND (c = 10)) OR ((a > 11) AND (a < 20)) OR (a < 1) OR (a = 1)) -> Seq Scan on mc3p5 Filter: (((a = 1) AND (abs(b) = 1) AND (c = 1)) OR ((a = 10) AND (abs(b) = 5) AND (c = 10)) OR ((a > 11) AND (a < 20)) OR (a < 1) OR (a = 1)) - -> Seq Scan on mc3p_default - Filter: (((a = 1) AND (abs(b) = 1) AND (c = 1)) OR ((a = 10) AND (abs(b) = 5) AND (c = 10)) OR ((a > 11) AND (a < 20)) OR (a < 1) OR (a = 1)) -(11 rows) +(9 rows) explain (costs off) select * from mc3p where a = 1 or abs(b) = 1 or c = 1; QUERY PLAN @@ -873,9 +849,7 @@ explain (costs off) select * from mc3p where (a = 1 and abs(b) = 1) or (a = 10 a Filter: (((a = 1) AND (abs(b) = 1)) OR ((a = 10) AND (abs(b) = 10))) -> Seq Scan on mc3p4 Filter: (((a = 1) AND (abs(b) = 1)) OR ((a = 10) AND (abs(b) = 10))) - -> Seq Scan on mc3p_default - Filter: (((a = 1) AND (abs(b) = 1)) OR ((a = 10) AND (abs(b) = 10))) -(13 rows) +(11 rows) explain (costs off) select * from mc3p where (a = 1 and abs(b) = 1) or (a = 10 and abs(b) = 9); QUERY PLAN @@ -887,9 +861,7 @@ explain (costs off) select * from mc3p where (a = 1 and abs(b) = 1) or (a = 10 a Filter: (((a = 1) AND (abs(b) = 1)) OR ((a = 10) AND (abs(b) = 9))) -> Seq Scan on mc3p2 Filter: (((a = 1) AND (abs(b) = 1)) OR ((a = 10) AND (abs(b) = 9))) - -> Seq Scan on mc3p_default - Filter: (((a = 1) AND (abs(b) = 1)) OR ((a = 10) AND (abs(b) = 9))) -(9 rows) +(7 rows) -- a simpler multi-column keys case create table mc2p (a int, b int) partition by range (a, b); -- 2.11.0