From 171c58bc37c211cab11f6b588220e9e773f67e67 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 2 Nov 2020 08:24:28 +0100 Subject: [PATCH] Enable hash partitioning of text arrays hash_array_extended() needs to pass PG_GET_COLLATION() to the hash function of the element type. Otherwise, the hash function of a collation-aware data type such as text will error out, since the introduction of nondeterministic collation made hash functions require a collation, too. The consequence of this is that before this change, hash partitioning using an array over text in the partition key would not work. --- src/backend/utils/adt/arrayfuncs.c | 2 +- .../regress/expected/collate.icu.utf8.out | 50 +++++++++++++++++++ src/test/regress/sql/collate.icu.utf8.sql | 26 ++++++++++ 3 files changed, 77 insertions(+), 1 deletion(-) diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index 392445ea03..a7ea7656c7 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -4071,7 +4071,7 @@ hash_array_extended(PG_FUNCTION_ARGS) typalign = typentry->typalign; InitFunctionCallInfoData(*locfcinfo, &typentry->hash_extended_proc_finfo, 2, - InvalidOid, NULL, NULL); + PG_GET_COLLATION(), NULL, NULL); /* Loop over source data */ nitems = ArrayGetNItems(ndims, dims); diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index 2b86ce9028..70133df804 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -1215,6 +1215,30 @@ SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_nondet; 2 | äbc (2 rows) +-- same with arrays +CREATE TABLE test6a (a int, b text[]); +INSERT INTO test6a VALUES (1, ARRAY[U&'\00E4bc']); +INSERT INTO test6a VALUES (2, ARRAY[U&'\0061\0308bc']); +SELECT * FROM test6a; + a | b +---+------- + 1 | {äbc} + 2 | {äbc} +(2 rows) + +SELECT * FROM test6a WHERE b = ARRAY['äbc'] COLLATE ctest_det; + a | b +---+------- + 1 | {äbc} +(1 row) + +SELECT * FROM test6a WHERE b = ARRAY['äbc'] COLLATE ctest_nondet; + a | b +---+------- + 1 | {äbc} + 2 | {äbc} +(2 rows) + CREATE COLLATION case_sensitive (provider = icu, locale = ''); CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=secondary', deterministic = false); SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive; @@ -1842,6 +1866,19 @@ SELECT (SELECT count(*) FROM test22_0) = (SELECT count(*) FROM test22_1); t (1 row) +-- same with arrays +CREATE TABLE test22a (a int, b text[] COLLATE case_sensitive) PARTITION BY HASH (b); +CREATE TABLE test22a_0 PARTITION OF test22a FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test22a_1 PARTITION OF test22a FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test22a VALUES (1, ARRAY['def']); +INSERT INTO test22a VALUES (2, ARRAY['DEF']); +-- they end up in different partitions +SELECT (SELECT count(*) FROM test22a_0) = (SELECT count(*) FROM test22a_1); + ?column? +---------- + t +(1 row) + CREATE TABLE test23 (a int, b text COLLATE case_insensitive) PARTITION BY HASH (b); CREATE TABLE test23_0 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 0); CREATE TABLE test23_1 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 1); @@ -1854,6 +1891,19 @@ SELECT (SELECT count(*) FROM test23_0) <> (SELECT count(*) FROM test23_1); t (1 row) +-- same with arrays +CREATE TABLE test23a (a int, b text[] COLLATE case_insensitive) PARTITION BY HASH (b); +CREATE TABLE test23a_0 PARTITION OF test23a FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test23a_1 PARTITION OF test23a FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test23a VALUES (1, ARRAY['def']); +INSERT INTO test23a VALUES (2, ARRAY['DEF']); +-- they end up in the same partition (but it's platform-dependent which one) +SELECT (SELECT count(*) FROM test23a_0) <> (SELECT count(*) FROM test23a_1); + ?column? +---------- + t +(1 row) + CREATE TABLE test30 (a int, b char(3) COLLATE case_insensitive) PARTITION BY LIST (b); CREATE TABLE test30_1 PARTITION OF test30 FOR VALUES IN ('abc'); INSERT INTO test30 VALUES (1, 'abc'); diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index 67de7d9794..9cee3d0042 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -483,6 +483,14 @@ CREATE TABLE test6 (a int, b text); SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_det; SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_nondet; +-- same with arrays +CREATE TABLE test6a (a int, b text[]); +INSERT INTO test6a VALUES (1, ARRAY[U&'\00E4bc']); +INSERT INTO test6a VALUES (2, ARRAY[U&'\0061\0308bc']); +SELECT * FROM test6a; +SELECT * FROM test6a WHERE b = ARRAY['äbc'] COLLATE ctest_det; +SELECT * FROM test6a WHERE b = ARRAY['äbc'] COLLATE ctest_nondet; + CREATE COLLATION case_sensitive (provider = icu, locale = ''); CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=secondary', deterministic = false); @@ -685,6 +693,15 @@ CREATE TABLE test22_1 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER -- they end up in different partitions SELECT (SELECT count(*) FROM test22_0) = (SELECT count(*) FROM test22_1); +-- same with arrays +CREATE TABLE test22a (a int, b text[] COLLATE case_sensitive) PARTITION BY HASH (b); +CREATE TABLE test22a_0 PARTITION OF test22a FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test22a_1 PARTITION OF test22a FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test22a VALUES (1, ARRAY['def']); +INSERT INTO test22a VALUES (2, ARRAY['DEF']); +-- they end up in different partitions +SELECT (SELECT count(*) FROM test22a_0) = (SELECT count(*) FROM test22a_1); + CREATE TABLE test23 (a int, b text COLLATE case_insensitive) PARTITION BY HASH (b); CREATE TABLE test23_0 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 0); CREATE TABLE test23_1 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 1); @@ -693,6 +710,15 @@ CREATE TABLE test23_1 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER -- they end up in the same partition (but it's platform-dependent which one) SELECT (SELECT count(*) FROM test23_0) <> (SELECT count(*) FROM test23_1); +-- same with arrays +CREATE TABLE test23a (a int, b text[] COLLATE case_insensitive) PARTITION BY HASH (b); +CREATE TABLE test23a_0 PARTITION OF test23a FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test23a_1 PARTITION OF test23a FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test23a VALUES (1, ARRAY['def']); +INSERT INTO test23a VALUES (2, ARRAY['DEF']); +-- they end up in the same partition (but it's platform-dependent which one) +SELECT (SELECT count(*) FROM test23a_0) <> (SELECT count(*) FROM test23a_1); + CREATE TABLE test30 (a int, b char(3) COLLATE case_insensitive) PARTITION BY LIST (b); CREATE TABLE test30_1 PARTITION OF test30 FOR VALUES IN ('abc'); INSERT INTO test30 VALUES (1, 'abc'); -- 2.28.0