From 7fe537f8e8efeac51c3b0cc91ac51a1aa39399cd Mon Sep 17 00:00:00 2001
From: amit <amitlangote09@gmail.com>
Date: Tue, 10 Jan 2017 17:43:36 +0900
Subject: [PATCH] Fix some wrong thinking in check_new_partition_bound()

Because a given range bound in the PartitionBoundInfo.datums array
is sometimes a lower range bound and at other times an upper range
bound, we must be careful when assuming which, especially when
interpreting the result of partition_bound_bsearch which returns
the index of the greatest bound that is less than or equal to probe.
Due to an error in thinking about the same, the relevant code in
check_new_partition_bound() caused invalid partition (index == -1)
to be chosen as the partition being overlapped.

Also, we need not continue searching for even greater bound in
partition_bound_bsearch() once we find the first bound that is *equal*
to the probe, because we don't have duplicate datums.  That spends
cycles needlessly.  Per suggestion from Amul Sul.

Reported by: Amul Sul
Patch by: Amit Langote
Reports: https://www.postgresql.org/message-id/CAAJ_b94XgbqVoXMyxxs63CaqWoMS1o2gpHiU0F7yGnJBnvDc_A%40mail.gmail.com
---
 src/backend/catalog/partition.c            | 62 ++++++++++++++++++++++--------
 src/test/regress/expected/create_table.out | 10 ++++-
 src/test/regress/sql/create_table.sql      |  4 ++
 3 files changed, 60 insertions(+), 16 deletions(-)

diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c
index f54e1bdf3f..df5652de4c 100644
--- a/src/backend/catalog/partition.c
+++ b/src/backend/catalog/partition.c
@@ -741,35 +741,64 @@ check_new_partition_bound(char *relname, Relation parent, Node *bound)
 						   boundinfo->strategy == PARTITION_STRATEGY_RANGE);
 
 					/*
-					 * Find the greatest index of a range bound that is less
-					 * than or equal with the new lower bound.
+					 * Firstly, find the greatest range bound that is less
+					 * than or equal to the new lower bound.
 					 */
 					off1 = partition_bound_bsearch(key, boundinfo, lower, true,
 												   &equal);
 
 					/*
-					 * If equal has been set to true, that means the new lower
-					 * bound is found to be equal with the bound at off1,
-					 * which clearly means an overlap with the partition at
-					 * index off1+1).
-					 *
-					 * Otherwise, check if there is a "gap" that could be
-					 * occupied by the new partition.  In case of a gap, the
-					 * new upper bound should not cross past the upper
-					 * boundary of the gap, that is, off2 == off1 should be
-					 * true.
+					 * off1 == -1 means that all existing bounds are greater
+					 * than the new lower bound.  In that case and the case
+					 * where no partition is defined between the bounds at
+					 * off1 and off1 + 1, we have a "gap" in the range that
+					 * could be occupied by the new partition.  We confirm if
+					 * so by checking whether the new upper bound is confined
+					 * within the gap.
 					 */
 					if (!equal && boundinfo->indexes[off1 + 1] < 0)
 					{
 						off2 = partition_bound_bsearch(key, boundinfo, upper,
 													   true, &equal);
 
+						/*
+						 * If the new upper bound is returned to be equal to
+						 * the bound at off2, the latter must be the upper
+						 * bound of some partition with which the new partition
+						 * clearly overlaps.
+						 *
+						 * Also, if bound at off2 is not same as the one
+						 * returned for the new lower bound (IOW,
+						 * off1 != off2), then the new partition overlaps at
+						 * least one partition.
+						 */
 						if (equal || off1 != off2)
 						{
 							overlap = true;
-							with = boundinfo->indexes[off2 + 1];
+							/*
+							 * The bound at off2 could be the lower bound of
+							 * the partition with which the new partition
+							 * overlaps.  In that case, use the upper bound
+							 * (that is, the bound at off2 + 1) to get the
+							 * index of that partition.
+							 */
+							if (boundinfo->indexes[off2] < 0)
+								with = boundinfo->indexes[off2 + 1];
+							else
+								with = boundinfo->indexes[off2];
 						}
 					}
+					/*
+					 * If equal has been set to true or if there is no "gap"
+					 * between the bound at off1 and that at off1 + 1, the new
+					 * partition will overlap some partition.  In the former
+					 * case, the new lower bound is found to be equal to the
+					 * bound at off1, which could only ever be true if the
+					 * latter is the lower bound of some partition.  It's
+					 * clear in such a case that the new partition overlaps
+					 * that partition, whose index we get using its upper
+					 * bound (that is, using the bound at off1 + 1).
+					 */
 					else
 					{
 						overlap = true;
@@ -1956,8 +1985,8 @@ partition_bound_cmp(PartitionKey key, PartitionBoundInfo boundinfo,
 }
 
 /*
- * Binary search on a collection of partition bounds. Returns greatest index
- * of bound in array boundinfo->datums which is less or equal with *probe.
+ * Binary search on a collection of partition bounds. Returns greatest
+ * bound in array boundinfo->datums which is less than or equal to *probe
  * If all bounds in the array are greater than *probe, -1 is returned.
  *
  * *probe could either be a partition bound or a Datum array representing
@@ -1989,6 +2018,9 @@ partition_bound_bsearch(PartitionKey key, PartitionBoundInfo boundinfo,
 		{
 			lo = mid;
 			*is_equal = (cmpval == 0);
+
+			if (*is_equal)
+				break;
 		}
 		else
 			hi = mid - 1;
diff --git a/src/test/regress/expected/create_table.out b/src/test/regress/expected/create_table.out
index 783602314c..f01de7c04c 100644
--- a/src/test/regress/expected/create_table.out
+++ b/src/test/regress/expected/create_table.out
@@ -549,6 +549,12 @@ ERROR:  partition "fail_part" would overlap partition "part0"
 CREATE TABLE part1 PARTITION OF range_parted2 FOR VALUES FROM (1) TO (10);
 CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (9) TO (unbounded);
 ERROR:  partition "fail_part" would overlap partition "part1"
+CREATE TABLE part2 PARTITION OF range_parted2 FOR VALUES FROM (20) TO (30);
+CREATE TABLE part3 PARTITION OF range_parted2 FOR VALUES FROM (30) TO (40);
+CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (10) TO (30);
+ERROR:  partition "fail_part" would overlap partition "part2"
+CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (10) TO (50);
+ERROR:  partition "fail_part" would overlap partition "part3"
 -- now check for multi-column range partition key
 CREATE TABLE range_parted3 (
 	a int,
@@ -651,13 +657,15 @@ table part_c depends on table parted
 table part_c_1_10 depends on table part_c
 HINT:  Use DROP ... CASCADE to drop the dependent objects too.
 DROP TABLE parted, list_parted, range_parted, list_parted2, range_parted2, range_parted3 CASCADE;
-NOTICE:  drop cascades to 14 other objects
+NOTICE:  drop cascades to 16 other objects
 DETAIL:  drop cascades to table part00
 drop cascades to table part10
 drop cascades to table part11
 drop cascades to table part12
 drop cascades to table part0
 drop cascades to table part1
+drop cascades to table part2
+drop cascades to table part3
 drop cascades to table part_null_z
 drop cascades to table part_ab
 drop cascades to table part_1
diff --git a/src/test/regress/sql/create_table.sql b/src/test/regress/sql/create_table.sql
index f1a67fddfa..c773709eaf 100644
--- a/src/test/regress/sql/create_table.sql
+++ b/src/test/regress/sql/create_table.sql
@@ -517,6 +517,10 @@ CREATE TABLE part0 PARTITION OF range_parted2 FOR VALUES FROM (unbounded) TO (1)
 CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (unbounded) TO (2);
 CREATE TABLE part1 PARTITION OF range_parted2 FOR VALUES FROM (1) TO (10);
 CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (9) TO (unbounded);
+CREATE TABLE part2 PARTITION OF range_parted2 FOR VALUES FROM (20) TO (30);
+CREATE TABLE part3 PARTITION OF range_parted2 FOR VALUES FROM (30) TO (40);
+CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (10) TO (30);
+CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (10) TO (50);
 
 -- now check for multi-column range partition key
 CREATE TABLE range_parted3 (
-- 
2.11.0

