From c9a097462bc99b2450bbb9886fadbbf47555468f Mon Sep 17 00:00:00 2001 From: amit Date: Wed, 18 Oct 2017 17:14:53 +0900 Subject: [PATCH 5/5] Implement get_partitions_for_keys Disable constraint_exclusion using internal partition constraints. --- src/backend/catalog/partition.c | 365 +++++++++++++++++++++++++++++++- src/test/regress/expected/inherit.out | 4 +- src/test/regress/expected/partition.out | 64 ++---- 3 files changed, 385 insertions(+), 48 deletions(-) diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index 857cd5f707..b7cca33f1f 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -2507,7 +2507,370 @@ partition_cmp_args(Oid partopfamily, Oid partopcintype, static PartitionSet * get_partitions_for_keys(Relation rel, PartitionScanKeyInfo *keys) { - return partset_new(false, true); + PartitionSet *partset; + int i, + eqoff, + minoff, + maxoff; + PartitionKey partkey = RelationGetPartitionKey(rel); + PartitionDesc partdesc = RelationGetPartitionDesc(rel); + PartitionBoundInfo boundinfo = partdesc->boundinfo; + PartitionBoundCmpArg arg; + bool is_equal, + include_default = false; + + /* Quick exit, if no partitions to see. */ + if (partdesc->nparts == 0) + return partset_new(true, false); + + /* + * partset->empty will be set to true if we find out below that that's + * the case. + */ + partset = partset_new(false, false); + + /* + * Check if any of the scan keys are null. If so, return the only + * null-accepting partition if boundinfo says there is one. + */ + for (i = 0; i < partkey->partnatts; i++) + { + if (keys->keynullness[i] == IS_NULL) + { + int other_idx = -1; + + /* + * Note that only one of the two can hold nulls at any given + * time. + */ + if (partition_bound_accepts_nulls(boundinfo)|| + partition_bound_has_default(boundinfo)) + other_idx = partition_bound_accepts_nulls(boundinfo) + ? boundinfo->null_index + : boundinfo->default_index; + if (other_idx >= 0) + partset->other_parts = bms_make_singleton(other_idx); + + return partset; + } + } + + /* + * If there are no datums to compare keys with but there exists a + * partition, the latter must be a partition that accepts only nulls + * or a default partition, or there could be both. We already checked + * above if we should return the null-accepting partition, but being + * here means the query doesn't want nulls. All remaining data must + * be in the default partition if there is one. + */ + if (boundinfo->ndatums == 0) + { + if (partition_bound_has_default(boundinfo)) + partset->other_parts = + bms_make_singleton(boundinfo->default_index); + else + partset->empty = true; + + return partset; + } + + /* + * keys->eqkeys identifies a unique partition only if values for all + * partition keys are provided. If only a prefix of all the partition + * keys is provided, then they identify a sequence of partitions each of + * whose upper bound allows the values in that prefix. + */ + eqoff = -1; + if (keys->n_eqkeys > 0) + { + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.datums = keys->eqkeys; + arg.ndatums = keys->n_eqkeys; + eqoff = partition_bound_bsearch(partkey, boundinfo, &arg, &is_equal); + + if (eqoff >= 0) + { + switch (partkey->strategy) + { + case PARTITION_STRATEGY_LIST: + /* For list partition, key must exactly match datum. */ + if (!is_equal) + eqoff = -1; + break; + + case PARTITION_STRATEGY_RANGE: + /* + * The bound at offset eqoff is <= eqkeys given how + * partition_bound_bsearch works, so the partition we're + * looking for is the one whose upper bound is at offset + * eqoff + 1. + */ + if (partkey->strategy == PARTITION_STRATEGY_RANGE) + eqoff += 1; + } + } + + /* No minkeys and maxkeys to look at in this case. */ + minoff = maxoff = -1; + goto collect_partitions; + } + + /* + * Using minkeys and maxkeys, identify the offsets of qualifying minimum + * and maximum bounds. + */ + minoff = 0; + if (keys->n_minkeys > 0) + { + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.datums = keys->minkeys; + arg.ndatums = keys->n_minkeys; + minoff = partition_bound_bsearch(partkey, boundinfo, &arg, &is_equal); + + if (is_equal && arg.ndatums < partkey->partnatts) + { + int32 cmpval; + + is_equal = false; + + do + { + if (keys->min_incl) + minoff -= 1; + else + minoff += 1; + if (minoff < 0 || minoff >= boundinfo->ndatums) + break; + cmpval = partition_bound_cmp(partkey, boundinfo, minoff, + &arg); + } while (cmpval == 0); + + /* Back up if went too far. */ + if (!keys->min_incl) + minoff -= 1; + } + + /* Interpret the result per partition strategy. */ + switch (partkey->strategy) + { + case PARTITION_STRATEGY_LIST: + /* + * Found a bound matching the query specified value, but this + * may be a range query and we may have been asked us to + * exclude the value itself. So, go to the next bound value. + */ + if (is_equal && !keys->min_incl) + minoff++; + break; + + case PARTITION_STRATEGY_RANGE: + /* + * Rows returned by the query will be > the bound value at + * minoff, because query's minkey is known to be >= that bound + * value given how partition_bound_bsearch works. IOW, no + * rows of the partition whose upper bound is the value at + * minoff will be returned, so go to the next bound value. + */ + if (minoff < boundinfo->ndatums - 1) + minoff += 1; + } + } + + /* + * Skip a gap, which might exist in the case of range partitioning, but + * also instruct the later code to consider default partition (if one + * exists) and could potentially contain data that satisfies the keys, + * which it only could if there are still still finite keys left on + * that side. + */ + if (minoff >= 0 && boundinfo->indexes[minoff] < 0) + { + if (keys->n_minkeys > 0 && + boundinfo->kind[minoff][keys->n_minkeys - 1] == + PARTITION_RANGE_DATUM_VALUE) + include_default = true; + minoff += 1; + } + + maxoff = boundinfo->ndatums - 1; + if (keys->n_maxkeys > 0) + { + memset(&arg, 0, sizeof(PartitionBoundCmpArg)); + arg.datums = keys->maxkeys; + arg.ndatums = keys->n_maxkeys; + maxoff = partition_bound_bsearch(partkey, boundinfo, &arg, &is_equal); + if (is_equal && arg.ndatums < partkey->partnatts) + { + int32 cmpval; + + is_equal = false; + + do + { + if (keys->max_incl) + maxoff += 1; + else + maxoff -= 1; + if (maxoff < 0 || maxoff >= boundinfo->ndatums) + break; + cmpval = partition_bound_cmp(partkey, boundinfo, maxoff, + &arg); + } while (cmpval == 0); + + /* Back up if went too far. */ + if (keys->max_incl) + maxoff -= 1; + } + + /* Interpret the result per partition strategy. */ + switch (partkey->strategy) + { + case PARTITION_STRATEGY_LIST: + /* + * Found, but if the query may have asked us to exclude it. + */ + if (is_equal && !keys->max_incl) + maxoff--; + break; + + case PARTITION_STRATEGY_RANGE: + /* + * Because the bound value at maxoff is known to be <= query's + * maxkey, we may need to consider the next partition as well. + * That would be in addition to the partition whose upper + * bound is the value at maxoff and earlier partitions. + */ + if (!is_equal || keys->max_incl) + maxoff += 1; + break; + } + } + + /* + * Skip a gap, which might exist in the case of range partitioning, but + * also instruct the later code to consider default partition (if one + * exists) and could potentially contain data that satisfies the keys, + * which it only could if there are still still finite keys left on + * that side. + */ + if (maxoff >= 0 && boundinfo->indexes[maxoff] < 0) + { + if (keys->n_maxkeys > 0 && + boundinfo->kind[maxoff][keys->n_maxkeys-1] == + PARTITION_RANGE_DATUM_VALUE) + include_default = true; + maxoff -= 1; + } + +collect_partitions: + /* + * eqoff set to a valid bound offset means a unique partition has been + * identified. Otherwise, generate a sequence of partitions corresponding + * to bounds at offsets in range given by minoff and maxoff. + */ + if (eqoff >= 0) + { + if (boundinfo->indexes[eqoff] >= 0) + partset->other_parts = bms_add_member(partset->other_parts, + boundinfo->indexes[eqoff]); + else + include_default = true; + } + else if (minoff >= 0 && maxoff >= 0) + { + switch (partkey->strategy) + { + case PARTITION_STRATEGY_LIST: + /* Add out-of-line partitions to the *other_parts set. */ + for (i = minoff; i <= maxoff; i++) + partset->other_parts = + bms_add_member(partset->other_parts, + boundinfo->indexes[i]); + break; + + case PARTITION_STRATEGY_RANGE: + partset->min_part_idx = boundinfo->indexes[minoff]; + partset->max_part_idx = boundinfo->indexes[maxoff]; + break; + } + } + + /* + * Check if we need to add the null-accepting partition to the set, if + * it's not been already included by virtue of it being in the above + * range of partitions. + * + * There are couple of cases when it will need to be scanned: + * + * 1. If there are no quals, null partition trivially needs to be + * scanned. + * + * 2. If it's not already contained in *other_parts and the query does + * not prevent nulls in the result. If such a partition also accepts + * non-null datums, it would already be in *other_parts if one of those + * datums also qualify for the query. + */ + if (partition_bound_accepts_nulls(boundinfo) && + !bms_is_member(boundinfo->null_index, partset->other_parts) && + keys->n_eqkeys + keys->n_minkeys + keys->n_maxkeys == 0 && + (keys->keynullness[0] == -1 || keys->keynullness[0] != IS_NOT_NULL)) + { + partset->other_parts = bms_add_member(partset->other_parts, + boundinfo->null_index); + } + + /* + * Check if we need to add the default partition to the set. There are + * couple of cases when it will need to be scanned: + * + * 1. If there are no quals, default partitions trivially needs to be + * scanned. + * + * 2. If there exist datums between those at offsets minoff and maxoff + * which don't have a partition that accepts them, then default partition + * would have caught them. With range partitioning, simply check if + * there is a -1 in boundinfo->indexes, which indicates unassigned + * portion of range. With list partitioning, if minoff != maxoff, it + * means there might be datums in that range that don't have a + * non-default partition assigned, whereas minoff == maxoff means only + * the partition containing that datum needs to be scanned. + * containing that datum needs to be scanned. + */ + if (boundinfo->default_index >= 0) + { + if (keys->n_eqkeys + keys->n_minkeys + keys->n_maxkeys == 0) + include_default = true; + else + { + switch (partkey->strategy) + { + case PARTITION_STRATEGY_LIST: + include_default = (minoff != maxoff); + break; + + case PARTITION_STRATEGY_RANGE: + for (i = minoff; i <= maxoff; i++) + { + if (boundinfo->indexes[i] < 0) + { + include_default = true; + break; + } + } + break; + } + } + + if (include_default) + partset->other_parts = bms_add_member(partset->other_parts, + boundinfo->default_index); + } + + if (partset->min_part_idx < 0 && partset->max_part_idx < 0 && + partset->other_parts == NULL) + partset->empty = true; + + return partset; } /* diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out index c698faff2f..661f137122 100644 --- a/src/test/regress/expected/inherit.out +++ b/src/test/regress/expected/inherit.out @@ -1713,11 +1713,9 @@ explain (costs off) select * from list_parted where a = 'ab' or a in (null, 'cd' Append -> Seq Scan on part_ab_cd Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[]))) - -> Seq Scan on part_ef_gh - Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[]))) -> Seq Scan on part_null_xy Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[]))) -(7 rows) +(5 rows) explain (costs off) select * from list_parted where a = 'ab'; QUERY PLAN diff --git a/src/test/regress/expected/partition.out b/src/test/regress/expected/partition.out index 61c4596bc7..69a7819171 100644 --- a/src/test/regress/expected/partition.out +++ b/src/test/regress/expected/partition.out @@ -198,16 +198,14 @@ explain (costs off) select * from rlp where 1 > a; /* commutates */ (3 rows) explain (costs off) select * from rlp where a <= 1; - QUERY PLAN -------------------------------- + QUERY PLAN +-------------------------- Append -> Seq Scan on rlp1 Filter: (a <= 1) -> Seq Scan on rlp2 Filter: (a <= 1) - -> Seq Scan on rlp_default - Filter: (a <= 1) -(7 rows) +(5 rows) explain (costs off) select * from rlp where a = 1; QUERY PLAN @@ -453,15 +451,13 @@ explain (costs off) select * from rlp where (a = 1 and a = 3) or (a > 1 and a = QUERY PLAN ------------------------------------------------------------------- Append - -> Seq Scan on rlp2 - Filter: (((a = 1) AND (a = 3)) OR ((a > 1) AND (a = 15))) -> Seq Scan on rlp3abcd Filter: (((a = 1) AND (a = 3)) OR ((a > 1) AND (a = 15))) -> Seq Scan on rlp3efgh Filter: (((a = 1) AND (a = 3)) OR ((a > 1) AND (a = 15))) -> Seq Scan on rlp3nullxy Filter: (((a = 1) AND (a = 3)) OR ((a > 1) AND (a = 15))) -(9 rows) +(7 rows) -- multi-column keys create table mc3p (a int, b int, c int) partition by range (a, abs(b), c); @@ -475,16 +471,14 @@ create table mc3p5 partition of mc3p for values from (11, 1, 1) to (20, 10, 10); create table mc3p6 partition of mc3p for values from (20, 10, 10) to (20, 20, 20); create table mc3p7 partition of mc3p for values from (20, 20, 20) to (maxvalue, maxvalue, maxvalue); explain (costs off) select * from mc3p where a = 1; - QUERY PLAN --------------------------------- + QUERY PLAN +------------------------- Append -> Seq Scan on mc3p0 Filter: (a = 1) -> Seq Scan on mc3p1 Filter: (a = 1) - -> Seq Scan on mc3p_default - Filter: (a = 1) -(7 rows) +(5 rows) explain (costs off) select * from mc3p where a = 1 and abs(b) < 1; QUERY PLAN @@ -502,9 +496,7 @@ explain (costs off) select * from mc3p where a = 1 and abs(b) = 1; Filter: ((a = 1) AND (abs(b) = 1)) -> Seq Scan on mc3p1 Filter: ((a = 1) AND (abs(b) = 1)) - -> Seq Scan on mc3p_default - Filter: ((a = 1) AND (abs(b) = 1)) -(7 rows) +(5 rows) explain (costs off) select * from mc3p where a = 1 and abs(b) = 1 and c < 8; QUERY PLAN @@ -514,9 +506,7 @@ explain (costs off) select * from mc3p where a = 1 and abs(b) = 1 and c < 8; Filter: ((c < 8) AND (a = 1) AND (abs(b) = 1)) -> Seq Scan on mc3p1 Filter: ((c < 8) AND (a = 1) AND (abs(b) = 1)) - -> Seq Scan on mc3p_default - Filter: ((c < 8) AND (a = 1) AND (abs(b) = 1)) -(7 rows) +(5 rows) explain (costs off) select * from mc3p where a = 10 and abs(b) between 5 and 35; QUERY PLAN @@ -530,9 +520,7 @@ explain (costs off) select * from mc3p where a = 10 and abs(b) between 5 and 35; Filter: ((a = 10) AND (abs(b) >= 5) AND (abs(b) <= 35)) -> Seq Scan on mc3p4 Filter: ((a = 10) AND (abs(b) >= 5) AND (abs(b) <= 35)) - -> Seq Scan on mc3p_default - Filter: ((a = 10) AND (abs(b) >= 5) AND (abs(b) <= 35)) -(11 rows) +(9 rows) explain (costs off) select * from mc3p where a > 10; QUERY PLAN @@ -571,16 +559,14 @@ explain (costs off) select * from mc3p where a >= 10; (17 rows) explain (costs off) select * from mc3p where a < 10; - QUERY PLAN --------------------------------- + QUERY PLAN +-------------------------- Append -> Seq Scan on mc3p0 Filter: (a < 10) -> Seq Scan on mc3p1 Filter: (a < 10) - -> Seq Scan on mc3p_default - Filter: (a < 10) -(7 rows) +(5 rows) explain (costs off) select * from mc3p where a <= 10 and abs(b) < 10; QUERY PLAN @@ -592,9 +578,7 @@ explain (costs off) select * from mc3p where a <= 10 and abs(b) < 10; Filter: ((a <= 10) AND (abs(b) < 10)) -> Seq Scan on mc3p2 Filter: ((a <= 10) AND (abs(b) < 10)) - -> Seq Scan on mc3p_default - Filter: ((a <= 10) AND (abs(b) < 10)) -(9 rows) +(7 rows) explain (costs off) select * from mc3p where a = 11 and abs(b) = 0; /* empty */ QUERY PLAN @@ -621,8 +605,8 @@ explain (costs off) select * from mc3p where a > 20; (3 rows) explain (costs off) select * from mc3p where a >= 20; - QUERY PLAN --------------------------------- + QUERY PLAN +--------------------------- Append -> Seq Scan on mc3p5 Filter: (a >= 20) @@ -630,9 +614,7 @@ explain (costs off) select * from mc3p where a >= 20; Filter: (a >= 20) -> Seq Scan on mc3p7 Filter: (a >= 20) - -> Seq Scan on mc3p_default - Filter: (a >= 20) -(9 rows) +(7 rows) explain (costs off) select * from mc3p where (a = 1 and abs(b) = 1 and c = 1) or (a = 10 and abs(b) = 5 and c = 10) or (a > 11 and a < 20); QUERY PLAN @@ -672,9 +654,7 @@ explain (costs off) select * from mc3p where (a = 1 and abs(b) = 1 and c = 1) or Filter: (((a = 1) AND (abs(b) = 1) AND (c = 1)) OR ((a = 10) AND (abs(b) = 5) AND (c = 10)) OR ((a > 11) AND (a < 20)) OR (a < 1) OR (a = 1)) -> Seq Scan on mc3p5 Filter: (((a = 1) AND (abs(b) = 1) AND (c = 1)) OR ((a = 10) AND (abs(b) = 5) AND (c = 10)) OR ((a > 11) AND (a < 20)) OR (a < 1) OR (a = 1)) - -> Seq Scan on mc3p_default - Filter: (((a = 1) AND (abs(b) = 1) AND (c = 1)) OR ((a = 10) AND (abs(b) = 5) AND (c = 10)) OR ((a > 11) AND (a < 20)) OR (a < 1) OR (a = 1)) -(11 rows) +(9 rows) explain (costs off) select * from mc3p where a = 1 or abs(b) = 1 or c = 1; QUERY PLAN @@ -712,9 +692,7 @@ explain (costs off) select * from mc3p where (a = 1 and abs(b) = 1) or (a = 10 a Filter: (((a = 1) AND (abs(b) = 1)) OR ((a = 10) AND (abs(b) = 10))) -> Seq Scan on mc3p4 Filter: (((a = 1) AND (abs(b) = 1)) OR ((a = 10) AND (abs(b) = 10))) - -> Seq Scan on mc3p_default - Filter: (((a = 1) AND (abs(b) = 1)) OR ((a = 10) AND (abs(b) = 10))) -(13 rows) +(11 rows) explain (costs off) select * from mc3p where (a = 1 and abs(b) = 1) or (a = 10 and abs(b) = 9); QUERY PLAN @@ -726,8 +704,6 @@ explain (costs off) select * from mc3p where (a = 1 and abs(b) = 1) or (a = 10 a Filter: (((a = 1) AND (abs(b) = 1)) OR ((a = 10) AND (abs(b) = 9))) -> Seq Scan on mc3p2 Filter: (((a = 1) AND (abs(b) = 1)) OR ((a = 10) AND (abs(b) = 9))) - -> Seq Scan on mc3p_default - Filter: (((a = 1) AND (abs(b) = 1)) OR ((a = 10) AND (abs(b) = 9))) -(9 rows) +(7 rows) drop table lp, coll_pruning, rlp, mc3p; -- 2.11.0