From 48337b4076a7aaf7cc5d020515cf8b1d4e0a4b0c Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 8 Mar 2019 11:03:54 +0100 Subject: [PATCH v8] Collations with nondeterministic comparison This adds a flag "deterministic" to collations. If that is false, such a collation disables various optimizations that assume that strings are equal only if they are byte-wise equal. That then allows use cases such as case-insensitive or accent-insensitive comparisons or handling of strings with different Unicode normal forms. The term "deterministic comparison" in this context is from Unicode Technical Standard #10 (https://unicode.org/reports/tr10/#Deterministic_Comparison). This patch makes changes in three areas: - CREATE COLLATION DDL changes and system catalog changes to support this new flag. - Many executor nodes and auxiliary code are extended to track collations. Previously, this code would just throw away collation information, because the eventually-called user-defined functions didn't use it since they only cared about equality, which didn't need collation information. - String data type functions that do equality comparisons and hashing are changed to take the (non-)deterministic flag into account. For comparison, this just means skipping various shortcuts and tie breakers that use byte-wise comparison. For hashing, we first need to convert the input string to a canonical "sort key" using the ICU analogue of strxfrm(). Discussion: https://www.postgresql.org/message-id/flat/1ccc668f-4cbc-0bef-af67-450b47cdfee7@2ndquadrant.com --- contrib/bloom/bloom.h | 1 + contrib/bloom/blutils.c | 3 +- doc/src/sgml/catalogs.sgml | 7 + doc/src/sgml/charset.sgml | 61 +- doc/src/sgml/citext.sgml | 21 + doc/src/sgml/func.sgml | 6 + doc/src/sgml/ref/create_collation.sgml | 22 + src/backend/access/hash/hashfunc.c | 100 ++- src/backend/access/spgist/spgtextproc.c | 3 +- src/backend/catalog/pg_collation.c | 2 + src/backend/commands/collationcmds.c | 25 +- src/backend/commands/extension.c | 6 +- src/backend/executor/execExpr.c | 4 +- src/backend/executor/execGrouping.c | 14 +- src/backend/executor/execPartition.c | 1 + src/backend/executor/execReplication.c | 5 +- src/backend/executor/nodeAgg.c | 9 +- src/backend/executor/nodeGroup.c | 1 + src/backend/executor/nodeHash.c | 14 +- src/backend/executor/nodeHashjoin.c | 5 + src/backend/executor/nodeRecursiveunion.c | 1 + src/backend/executor/nodeSetOp.c | 2 + src/backend/executor/nodeSubplan.c | 14 +- src/backend/executor/nodeUnique.c | 1 + src/backend/executor/nodeWindowAgg.c | 2 + src/backend/nodes/copyfuncs.c | 7 + src/backend/nodes/outfuncs.c | 7 + src/backend/nodes/readfuncs.c | 7 + src/backend/optimizer/plan/createplan.c | 54 +- src/backend/optimizer/util/tlist.c | 25 + src/backend/partitioning/partbounds.c | 4 +- src/backend/partitioning/partprune.c | 3 +- src/backend/regex/regc_pg_locale.c | 5 + src/backend/utils/adt/arrayfuncs.c | 2 +- src/backend/utils/adt/like.c | 27 +- src/backend/utils/adt/like_support.c | 14 + src/backend/utils/adt/name.c | 32 +- src/backend/utils/adt/orderedsetaggs.c | 3 +- src/backend/utils/adt/pg_locale.c | 1 + src/backend/utils/adt/ri_triggers.c | 33 +- src/backend/utils/adt/varchar.c | 194 ++++- src/backend/utils/adt/varlena.c | 333 +++++--- src/backend/utils/cache/catcache.c | 9 +- src/backend/utils/cache/lsyscache.c | 16 + src/bin/initdb/initdb.c | 4 +- src/bin/pg_dump/pg_dump.c | 39 +- src/bin/psql/describe.c | 17 +- src/include/catalog/pg_collation.h | 2 + src/include/executor/executor.h | 4 + src/include/executor/hashjoin.h | 1 + src/include/executor/nodeHash.h | 2 +- src/include/nodes/execnodes.h | 3 + src/include/nodes/plannodes.h | 7 + src/include/optimizer/planmain.h | 2 +- src/include/optimizer/tlist.h | 1 + src/include/partitioning/partbounds.h | 1 + src/include/utils/lsyscache.h | 1 + src/include/utils/pg_locale.h | 1 + .../regress/expected/collate.icu.utf8.out | 710 ++++++++++++++++++ .../regress/expected/collate.linux.utf8.out | 5 + src/test/regress/expected/collate.out | 15 + src/test/regress/expected/subselect.out | 19 + src/test/regress/sql/collate.icu.utf8.sql | 250 ++++++ src/test/regress/sql/collate.linux.utf8.sql | 7 + src/test/regress/sql/collate.sql | 5 + src/test/regress/sql/subselect.sql | 17 + src/test/subscription/Makefile | 2 + src/test/subscription/t/012_collation.pl | 103 +++ 68 files changed, 2086 insertions(+), 238 deletions(-) create mode 100644 src/test/subscription/t/012_collation.pl diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h index d641361aef..7c18eaa508 100644 --- a/contrib/bloom/bloom.h +++ b/contrib/bloom/bloom.h @@ -137,6 +137,7 @@ typedef struct BloomMetaPageData typedef struct BloomState { FmgrInfo hashFn[INDEX_MAX_KEYS]; + Oid collations[INDEX_MAX_KEYS]; BloomOptions opts; /* copy of options on index's metapage */ int32 nColumns; diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c index 6458376578..d078dfbd46 100644 --- a/contrib/bloom/blutils.c +++ b/contrib/bloom/blutils.c @@ -163,6 +163,7 @@ initBloomState(BloomState *state, Relation index) fmgr_info_copy(&(state->hashFn[i]), index_getprocinfo(index, i + 1, BLOOM_HASH_PROC), CurrentMemoryContext); + state->collations[i] = index->rd_indcollation[i]; } /* Initialize amcache if needed with options from metapage */ @@ -267,7 +268,7 @@ signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno) * different columns will be mapped into different bits because of step * above */ - hashVal = DatumGetInt32(FunctionCall1(&state->hashFn[attno], value)); + hashVal = DatumGetInt32(FunctionCall1Coll(&state->hashFn[attno], state->collations[attno], value)); mySrand(hashVal ^ myRand()); for (j = 0; j < state->opts.bitSize[attno]; j++) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 0fd792ff1a..45ed077654 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -2077,6 +2077,13 @@ <structname>pg_collation</structname> Columns default, c = libc, i = icu + + collisdeterministic + bool + + Is the collation deterministic? + + collencoding int4 diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index a6143ef8a7..555d1b4ac6 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -847,11 +847,13 @@ ICU collations Note that while this system allows creating collations that ignore - case or ignore accents or similar (using - the ks key), PostgreSQL does not at the moment allow - such collations to act in a truly case- or accent-insensitive manner. Any - strings that compare equal according to the collation but are not - byte-wise equal will be sorted according to their byte values. + case or ignore accents or similar (using the + ks key), in order for such collations to act in a + truly case- or accent-insensitive manner, they also need to be declared as not + deterministic in CREATE COLLATION; + see . + Otherwise, any strings that compare equal according to the collation but + are not byte-wise equal will be sorted according to their byte values. @@ -883,6 +885,55 @@ Copying Collations + + + Nondeterminstic Collations + + + A collation is either deterministic or + nondeterministic. A deterministic collation uses + deterministic comparisons, which means that it considers strings to be + equal only if they consist of the same byte sequence. Nondeterministic + comparison may determine strings to be equal even if they consist of + different bytes. Typical situations include case-insensitive comparison, + accent-insensitive comparison, as well as comparion of strings in + different Unicode normal forms. It is up to the collation provider to + actually implement such insensitive comparisons; the deterministic flag + only determines whether ties are to be broken using bytewise comparison. + See also Unicode Technical + Standard 10 for more information on the terminology. + + + + To create a nondeterministic collation, specify the property + deterministic = false to CREATE + COLLATION, for example: + +CREATE COLLATION ndcoll (provider = icu, locale = 'und', deterministic = false); + + This example would use the standard Unicode collation in a + nondeterministic way. In particular, this would allow strings in + different normal forms to be compared correctly. More interesting + examples make use of the ICU customization facilities explained above. + For example: + +CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false); +CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-true', deterministic = false); + + + + + All standard and predefined collations are deterministic, all + user-defined collations are deterministic by default. While + nondeterministic collations give a more correct behavior, + especially when considering the full power of Unicode and its many + special cases, they also have some drawbacks. Foremost, their use leads + to a performance penalty. Also, certain operations are not possible with + nondeterministic collations, such as pattern matching operations. + Therefore, they should be used only in cases where they are specifically + wanted. + + diff --git a/doc/src/sgml/citext.sgml b/doc/src/sgml/citext.sgml index b1fe7101b2..85aa339d8b 100644 --- a/doc/src/sgml/citext.sgml +++ b/doc/src/sgml/citext.sgml @@ -14,6 +14,16 @@ citext exactly like text. + + + Consider using nondeterministic collations (see + ) instead of this module. They + can be used for case-insensitive comparisons, accent-insensitive + comparisons, and other combinations, and they handle more Unicode special + cases correctly. + + + Rationale @@ -246,6 +256,17 @@ Limitations will be invoked instead. + + + + The approach of lower-casing strings for comparison does not handle some + Unicode special cases correctly, for example when one upper-case letter + has two lower-case letter equivalents. Unicode distinguishes between + case mapping and case + folding for this reason. Use nondeterministic collations + instead of citext to handle that correctly. + + diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 03859a78ea..7faec35f91 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -3962,6 +3962,12 @@ Pattern Matching + + The pattern matching operators of all three kinds do not support + nondeterministic collations. If required, apply a different collation to + the expression to work around this limitation. + + <function>LIKE</function> diff --git a/doc/src/sgml/ref/create_collation.sgml b/doc/src/sgml/ref/create_collation.sgml index 038797fce1..def4dda6e8 100644 --- a/doc/src/sgml/ref/create_collation.sgml +++ b/doc/src/sgml/ref/create_collation.sgml @@ -23,6 +23,7 @@ [ LC_COLLATE = lc_collate, ] [ LC_CTYPE = lc_ctype, ] [ PROVIDER = provider, ] + [ DETERMINISTIC = boolean, ] [ VERSION = version ] ) CREATE COLLATION [ IF NOT EXISTS ] name FROM existing_collation @@ -124,6 +125,27 @@ Parameters + + DETERMINISTIC + + + + Specifies whether the collation should use deterministic comparisons. + The default is true. A deterministic comparison considers strings that + are not byte-wise equal to be unequal even if they are considered + logically equal by the comparison. PostgreSQL breaks ties using a + byte-wise comparison. Comparison that is not deterministic can make the + collation be, say, case- or accent-insensitive. For that, you need to + choose an appropriate LC_COLLATE setting + and set the collation to not deterministic here. + + + + Nondeterministic collations are only supported with the ICU provider. + + + + version diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index 63005ddc4d..cc116f5ece 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -27,7 +27,9 @@ #include "postgres.h" #include "access/hash.h" +#include "catalog/pg_collation.h" #include "utils/builtins.h" +#include "utils/pg_locale.h" /* * Datatype-specific hash functions. @@ -242,15 +244,51 @@ Datum hashtext(PG_FUNCTION_ARGS) { text *key = PG_GETARG_TEXT_PP(0); + Oid collid = PG_GET_COLLATION(); + pg_locale_t mylocale = 0; Datum result; - /* - * Note: this is currently identical in behavior to hashvarlena, but keep - * it as a separate function in case we someday want to do something - * different in non-C locales. (See also hashbpchar, if so.) - */ - result = hash_any((unsigned char *) VARDATA_ANY(key), - VARSIZE_ANY_EXHDR(key)); + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + + if (!mylocale || mylocale->deterministic) + { + result = hash_any((unsigned char *) VARDATA_ANY(key), + VARSIZE_ANY_EXHDR(key)); + } + else + { +#ifdef USE_ICU + if (mylocale->provider == COLLPROVIDER_ICU) + { + int32_t ulen = -1; + UChar *uchar = NULL; + Size bsize; + uint8_t *buf; + + ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); + + bsize = ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, NULL, 0); + buf = palloc(bsize); + ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, buf, bsize); + + result = hash_any(buf, bsize); + + pfree(buf); + } + else +#endif + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", mylocale->provider); + } /* Avoid leaking memory for toasted inputs */ PG_FREE_IF_COPY(key, 0); @@ -262,12 +300,52 @@ Datum hashtextextended(PG_FUNCTION_ARGS) { text *key = PG_GETARG_TEXT_PP(0); + Oid collid = PG_GET_COLLATION(); + pg_locale_t mylocale = 0; Datum result; - /* Same approach as hashtext */ - result = hash_any_extended((unsigned char *) VARDATA_ANY(key), - VARSIZE_ANY_EXHDR(key), - PG_GETARG_INT64(1)); + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + + if (!mylocale || mylocale->deterministic) + { + result = hash_any_extended((unsigned char *) VARDATA_ANY(key), + VARSIZE_ANY_EXHDR(key), + PG_GETARG_INT64(1)); + } + else + { +#ifdef USE_ICU + if (mylocale->provider == COLLPROVIDER_ICU) + { + int32_t ulen = -1; + UChar *uchar = NULL; + Size bsize; + uint8_t *buf; + + ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); + + bsize = ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, NULL, 0); + buf = palloc(bsize); + ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, buf, bsize); + + result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1)); + + pfree(buf); + } + else +#endif + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", mylocale->provider); + } PG_FREE_IF_COPY(key, 0); diff --git a/src/backend/access/spgist/spgtextproc.c b/src/backend/access/spgist/spgtextproc.c index 39cd391529..d22998c54b 100644 --- a/src/backend/access/spgist/spgtextproc.c +++ b/src/backend/access/spgist/spgtextproc.c @@ -630,7 +630,8 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS) * query (prefix) string, so we don't need to check it again. */ res = (level >= queryLen) || - DatumGetBool(DirectFunctionCall2(text_starts_with, + DatumGetBool(DirectFunctionCall2Coll(text_starts_with, + PG_GET_COLLATION(), out->leafValue, PointerGetDatum(query))); diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c index 74e1e82cb9..dd99d53547 100644 --- a/src/backend/catalog/pg_collation.c +++ b/src/backend/catalog/pg_collation.c @@ -46,6 +46,7 @@ Oid CollationCreate(const char *collname, Oid collnamespace, Oid collowner, char collprovider, + bool collisdeterministic, int32 collencoding, const char *collcollate, const char *collctype, const char *collversion, @@ -160,6 +161,7 @@ CollationCreate(const char *collname, Oid collnamespace, values[Anum_pg_collation_collnamespace - 1] = ObjectIdGetDatum(collnamespace); values[Anum_pg_collation_collowner - 1] = ObjectIdGetDatum(collowner); values[Anum_pg_collation_collprovider - 1] = CharGetDatum(collprovider); + values[Anum_pg_collation_collisdeterministic - 1] = BoolGetDatum(collisdeterministic); values[Anum_pg_collation_collencoding - 1] = Int32GetDatum(collencoding); namestrcpy(&name_collate, collcollate); values[Anum_pg_collation_collcollate - 1] = NameGetDatum(&name_collate); diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index ed3f1c12e5..919e092483 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -59,10 +59,12 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e DefElem *lccollateEl = NULL; DefElem *lcctypeEl = NULL; DefElem *providerEl = NULL; + DefElem *deterministicEl = NULL; DefElem *versionEl = NULL; char *collcollate = NULL; char *collctype = NULL; char *collproviderstr = NULL; + bool collisdeterministic = true; int collencoding = 0; char collprovider = 0; char *collversion = NULL; @@ -91,6 +93,8 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e defelp = &lcctypeEl; else if (strcmp(defel->defname, "provider") == 0) defelp = &providerEl; + else if (strcmp(defel->defname, "deterministic") == 0) + defelp = &deterministicEl; else if (strcmp(defel->defname, "version") == 0) defelp = &versionEl; else @@ -125,6 +129,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate)); collctype = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype)); collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider; + collisdeterministic = ((Form_pg_collation) GETSTRUCT(tp))->collisdeterministic; collencoding = ((Form_pg_collation) GETSTRUCT(tp))->collencoding; ReleaseSysCache(tp); @@ -157,6 +162,9 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e if (providerEl) collproviderstr = defGetString(providerEl); + if (deterministicEl) + collisdeterministic = defGetBoolean(deterministicEl); + if (versionEl) collversion = defGetString(versionEl); @@ -185,6 +193,16 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("parameter \"lc_ctype\" must be specified"))); + /* + * Nondeterministic collations are currently only supported with ICU + * because that's the only case where it can actually make a difference. + * So we can save writing the code for the other providers. + */ + if (!collisdeterministic && collprovider != COLLPROVIDER_ICU) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations not supported with this provider"))); + if (!fromEl) { if (collprovider == COLLPROVIDER_ICU) @@ -203,6 +221,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e collNamespace, GetUserId(), collprovider, + collisdeterministic, collencoding, collcollate, collctype, @@ -586,7 +605,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) * about existing ones. */ collid = CollationCreate(localebuf, nspid, GetUserId(), - COLLPROVIDER_LIBC, enc, + COLLPROVIDER_LIBC, true, enc, localebuf, localebuf, get_collation_actual_version(COLLPROVIDER_LIBC, localebuf), true, true); @@ -647,7 +666,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) int enc = aliases[i].enc; collid = CollationCreate(alias, nspid, GetUserId(), - COLLPROVIDER_LIBC, enc, + COLLPROVIDER_LIBC, true, enc, locale, locale, get_collation_actual_version(COLLPROVIDER_LIBC, locale), true, true); @@ -709,7 +728,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) collid = CollationCreate(psprintf("%s-x-icu", langtag), nspid, GetUserId(), - COLLPROVIDER_ICU, -1, + COLLPROVIDER_ICU, true, -1, collcollate, collcollate, get_collation_actual_version(COLLPROVIDER_ICU, collcollate), true, true); diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c index daf3f51636..d4723fced8 100644 --- a/src/backend/commands/extension.c +++ b/src/backend/commands/extension.c @@ -901,7 +901,8 @@ execute_extension_script(Oid extensionOid, ExtensionControlFile *control, { const char *qSchemaName = quote_identifier(schemaName); - t_sql = DirectFunctionCall3(replace_text, + t_sql = DirectFunctionCall3Coll(replace_text, + C_COLLATION_OID, t_sql, CStringGetTextDatum("@extschema@"), CStringGetTextDatum(qSchemaName)); @@ -913,7 +914,8 @@ execute_extension_script(Oid extensionOid, ExtensionControlFile *control, */ if (control->module_pathname) { - t_sql = DirectFunctionCall3(replace_text, + t_sql = DirectFunctionCall3Coll(replace_text, + C_COLLATION_OID, t_sql, CStringGetTextDatum("MODULE_PATHNAME"), CStringGetTextDatum(control->module_pathname)); diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index 7cbf9d3bc1..0fb31f5c3d 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -3317,6 +3317,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, int numCols, const AttrNumber *keyColIdx, const Oid *eqfunctions, + const Oid *collations, PlanState *parent) { ExprState *state = makeNode(ExprState); @@ -3377,6 +3378,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, Form_pg_attribute latt = TupleDescAttr(ldesc, attno - 1); Form_pg_attribute ratt = TupleDescAttr(rdesc, attno - 1); Oid foid = eqfunctions[natt]; + Oid collid = collations[natt]; FmgrInfo *finfo; FunctionCallInfo fcinfo; AclResult aclresult; @@ -3394,7 +3396,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, fmgr_info(foid, finfo); fmgr_info_set_expr(NULL, finfo); InitFunctionCallInfoData(*fcinfo, finfo, 2, - InvalidOid, NULL, NULL); + collid, NULL, NULL); /* left arg */ scratch.opcode = EEOP_INNER_VAR; diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index a9d80e692b..870a730a66 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -61,6 +61,7 @@ execTuplesMatchPrepare(TupleDesc desc, int numCols, const AttrNumber *keyColIdx, const Oid *eqOperators, + const Oid *collations, PlanState *parent) { Oid *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid)); @@ -76,7 +77,7 @@ execTuplesMatchPrepare(TupleDesc desc, /* build actual expression */ expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL, - numCols, keyColIdx, eqFunctions, + numCols, keyColIdx, eqFunctions, collations, parent); return expr; @@ -156,6 +157,7 @@ BuildTupleHashTableExt(PlanState *parent, int numCols, AttrNumber *keyColIdx, const Oid *eqfuncoids, FmgrInfo *hashfunctions, + Oid *collations, long nbuckets, Size additionalsize, MemoryContext metacxt, MemoryContext tablecxt, @@ -178,6 +180,7 @@ BuildTupleHashTableExt(PlanState *parent, hashtable->numCols = numCols; hashtable->keyColIdx = keyColIdx; hashtable->tab_hash_funcs = hashfunctions; + hashtable->tab_collations = collations; hashtable->tablecxt = tablecxt; hashtable->tempcxt = tempcxt; hashtable->entrysize = entrysize; @@ -213,7 +216,7 @@ BuildTupleHashTableExt(PlanState *parent, hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc, &TTSOpsMinimalTuple, &TTSOpsMinimalTuple, numCols, - keyColIdx, eqfuncoids, + keyColIdx, eqfuncoids, collations, NULL); /* @@ -241,6 +244,7 @@ BuildTupleHashTable(PlanState *parent, int numCols, AttrNumber *keyColIdx, const Oid *eqfuncoids, FmgrInfo *hashfunctions, + Oid *collations, long nbuckets, Size additionalsize, MemoryContext tablecxt, MemoryContext tempcxt, @@ -251,6 +255,7 @@ BuildTupleHashTable(PlanState *parent, numCols, keyColIdx, eqfuncoids, hashfunctions, + collations, nbuckets, additionalsize, tablecxt, tablecxt, @@ -422,8 +427,9 @@ TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple) { uint32 hkey; - hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i], - attr)); + hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], + hashtable->tab_collations[i], + attr)); hashkey ^= hkey; } } diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index aaa81f0620..7b3ad97ff5 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -1249,6 +1249,7 @@ get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull) greatest_modulus = get_hash_partition_greatest_modulus(boundinfo); rowHash = compute_partition_hash_value(key->partnatts, key->partsupfunc, + key->partcollation, values, isnull); part_index = boundinfo->indexes[rowHash % greatest_modulus]; diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 5c5aa96e7f..a964522fdd 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -95,6 +95,8 @@ build_replindex_scan_key(ScanKey skey, Relation rel, Relation idxrel, regop, searchslot->tts_values[mainattno - 1]); + skey[attoff].sk_collation = idxrel->rd_indcollation[attoff]; + /* Check for null value. */ if (searchslot->tts_isnull[mainattno - 1]) { @@ -261,7 +263,8 @@ tuple_equals_slot(TupleDesc desc, HeapTuple tup, TupleTableSlot *slot) errmsg("could not identify an equality operator for type %s", format_type_be(att->atttypid)))); - if (!DatumGetBool(FunctionCall2(&typentry->eq_opr_finfo, + if (!DatumGetBool(FunctionCall2Coll(&typentry->eq_opr_finfo, + att->attcollation, values[attrnum], slot->tts_values[attrnum]))) return false; diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index bae7989a42..47161afbd4 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -746,15 +746,14 @@ process_ordered_aggregate_single(AggState *aggstate, /* * If DISTINCT mode, and not distinct from prior, skip it. - * - * Note: we assume equality functions don't care about collation. */ if (isDistinct && haveOldVal && ((oldIsNull && *isNull) || (!oldIsNull && !*isNull && oldAbbrevVal == newAbbrevVal && - DatumGetBool(FunctionCall2(&pertrans->equalfnOne, + DatumGetBool(FunctionCall2Coll(&pertrans->equalfnOne, + pertrans->aggCollation, oldVal, *newVal))))) { /* equal to prior, so forget this one */ @@ -1287,6 +1286,7 @@ build_hash_table(AggState *aggstate) perhash->hashGrpColIdxHash, perhash->eqfuncoids, perhash->hashfunctions, + perhash->aggnode->grpCollations, perhash->aggnode->numGroups, additionalsize, aggstate->ss.ps.state->es_query_cxt, @@ -2381,6 +2381,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) length, aggnode->grpColIdx, aggnode->grpOperators, + aggnode->grpCollations, (PlanState *) aggstate); } @@ -2392,6 +2393,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) aggnode->numCols, aggnode->grpColIdx, aggnode->grpOperators, + aggnode->grpCollations, (PlanState *) aggstate); } } @@ -3155,6 +3157,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans, numDistinctCols, pertrans->sortColIdx, ops, + pertrans->sortCollations, &aggstate->ss.ps); pfree(ops); } diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c index 655084d7b5..05f1d33150 100644 --- a/src/backend/executor/nodeGroup.c +++ b/src/backend/executor/nodeGroup.c @@ -212,6 +212,7 @@ ExecInitGroup(Group *node, EState *estate, int eflags) node->numCols, node->grpColIdx, node->grpOperators, + node->grpCollations, &grpstate->ss.ps); return grpstate; diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 856daf6a7f..64eec91f8b 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -425,7 +425,7 @@ ExecEndHash(HashState *node) * ---------------------------------------------------------------- */ HashJoinTable -ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls) +ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, bool keepNulls) { Hash *node; HashJoinTable hashtable; @@ -439,6 +439,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls) int nkeys; int i; ListCell *ho; + ListCell *hc; MemoryContext oldcxt; /* @@ -541,8 +542,9 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls) hashtable->inner_hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool)); + hashtable->collations = (Oid *) palloc(nkeys * sizeof(Oid)); i = 0; - foreach(ho, hashOperators) + forboth(ho, hashOperators, hc, hashCollations) { Oid hashop = lfirst_oid(ho); Oid left_hashfn; @@ -554,6 +556,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls) fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]); fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]); hashtable->hashStrict[i] = op_strict(hashop); + hashtable->collations[i] = lfirst_oid(hc); i++; } @@ -1847,7 +1850,7 @@ ExecHashGetHashValue(HashJoinTable hashtable, /* Compute the hash function */ uint32 hkey; - hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i], keyval)); + hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], hashtable->collations[i], keyval)); hashkey ^= hkey; } @@ -2303,8 +2306,9 @@ ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse) uint32 hashvalue; int bucket; - hashvalue = DatumGetUInt32(FunctionCall1(&hashfunctions[0], - sslot.values[i])); + hashvalue = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[0], + hashtable->collations[0], + sslot.values[i])); /* * While we have not hit a hole in the hashtable and have not hit diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 2098708864..aa43296e26 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -278,6 +278,7 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel) */ hashtable = ExecHashTableCreate(hashNode, node->hj_HashOperators, + node->hj_Collations, HJ_FILL_INNER(node)); node->hj_HashTable = hashtable; @@ -603,6 +604,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) List *rclauses; List *rhclauses; List *hoperators; + List *hcollations; TupleDesc outerDesc, innerDesc; ListCell *l; @@ -738,6 +740,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) rclauses = NIL; rhclauses = NIL; hoperators = NIL; + hcollations = NIL; foreach(l, node->hashclauses) { OpExpr *hclause = lfirst_node(OpExpr, l); @@ -749,10 +752,12 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) rhclauses = lappend(rhclauses, ExecInitExpr(lsecond(hclause->args), innerPlanState(hjstate))); hoperators = lappend_oid(hoperators, hclause->opno); + hcollations = lappend_oid(hcollations, hclause->inputcollid); } hjstate->hj_OuterHashKeys = lclauses; hjstate->hj_InnerHashKeys = rclauses; hjstate->hj_HashOperators = hoperators; + hjstate->hj_Collations = hcollations; /* child Hash node needs to evaluate inner hash keys, too */ ((HashState *) innerPlanState(hjstate))->hashkeys = rhclauses; diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c index 9b74ed3208..9c5eed7def 100644 --- a/src/backend/executor/nodeRecursiveunion.c +++ b/src/backend/executor/nodeRecursiveunion.c @@ -43,6 +43,7 @@ build_hash_table(RecursiveUnionState *rustate) node->dupColIdx, rustate->eqfuncoids, rustate->hashfunctions, + node->dupCollations, node->numGroups, 0, rustate->ps.state->es_query_cxt, diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c index 26aeaee083..044246aa09 100644 --- a/src/backend/executor/nodeSetOp.c +++ b/src/backend/executor/nodeSetOp.c @@ -132,6 +132,7 @@ build_hash_table(SetOpState *setopstate) node->dupColIdx, setopstate->eqfuncoids, setopstate->hashfunctions, + node->dupCollations, node->numGroups, 0, setopstate->ps.state->es_query_cxt, @@ -554,6 +555,7 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags) node->numCols, node->dupColIdx, node->dupOperators, + node->dupCollations, &setopstate->ps); if (node->strategy == SETOP_HASHED) diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index d7d076758c..749b4eced3 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -514,6 +514,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) node->keyColIdx, node->tab_eq_funcoids, node->tab_hash_funcs, + node->tab_collations, nbuckets, 0, node->planstate->state->es_query_cxt, @@ -541,6 +542,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) node->keyColIdx, node->tab_eq_funcoids, node->tab_hash_funcs, + node->tab_collations, nbuckets, 0, node->planstate->state->es_query_cxt, @@ -642,6 +644,7 @@ execTuplesUnequal(TupleTableSlot *slot1, int numCols, AttrNumber *matchColIdx, FmgrInfo *eqfunctions, + const Oid *collations, MemoryContext evalContext) { MemoryContext oldContext; @@ -679,8 +682,8 @@ execTuplesUnequal(TupleTableSlot *slot1, continue; /* can't prove anything here */ /* Apply the type-specific equality function */ - - if (!DatumGetBool(FunctionCall2(&eqfunctions[i], + if (!DatumGetBool(FunctionCall2Coll(&eqfunctions[i], + collations[i], attr1, attr2))) { result = true; /* they are unequal */ @@ -722,6 +725,7 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot, if (!execTuplesUnequal(slot, hashtable->tableslot, numCols, keyColIdx, eqfunctions, + hashtable->tab_collations, hashtable->tempcxt)) { TermTupleHashIterator(&hashiter); @@ -817,6 +821,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) sstate->tab_eq_funcoids = NULL; sstate->tab_hash_funcs = NULL; sstate->tab_eq_funcs = NULL; + sstate->tab_collations = NULL; sstate->lhs_hash_funcs = NULL; sstate->cur_eq_funcs = NULL; @@ -915,6 +920,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) sstate->tab_eq_funcoids = (Oid *) palloc(ncols * sizeof(Oid)); sstate->tab_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); sstate->tab_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); + sstate->tab_collations = (Oid *) palloc(ncols * sizeof(Oid)); sstate->lhs_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); sstate->cur_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); i = 1; @@ -965,6 +971,9 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) fmgr_info(left_hashfn, &sstate->lhs_hash_funcs[i - 1]); fmgr_info(right_hashfn, &sstate->tab_hash_funcs[i - 1]); + /* Set collation */ + sstate->tab_collations[i - 1] = opexpr->inputcollid; + i++; } @@ -1001,6 +1010,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) ncols, sstate->keyColIdx, sstate->tab_eq_funcoids, + sstate->tab_collations, parent); } diff --git a/src/backend/executor/nodeUnique.c b/src/backend/executor/nodeUnique.c index ad7039937d..c553f150b8 100644 --- a/src/backend/executor/nodeUnique.c +++ b/src/backend/executor/nodeUnique.c @@ -152,6 +152,7 @@ ExecInitUnique(Unique *node, EState *estate, int eflags) node->numCols, node->uniqColIdx, node->uniqOperators, + node->uniqCollations, &uniquestate->ps); return uniquestate; diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index 157ac042b8..b090828c01 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -2370,6 +2370,7 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) node->partNumCols, node->partColIdx, node->partOperators, + node->partCollations, &winstate->ss.ps); if (node->ordNumCols > 0) @@ -2378,6 +2379,7 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) node->ordNumCols, node->ordColIdx, node->ordOperators, + node->ordCollations, &winstate->ss.ps); /* diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index a8a735c247..d6fe35374b 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -297,6 +297,7 @@ _copyRecursiveUnion(const RecursiveUnion *from) { COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(dupCollations, from->numCols * sizeof(Oid)); } COPY_SCALAR_FIELD(numGroups); @@ -956,6 +957,7 @@ _copyGroup(const Group *from) COPY_SCALAR_FIELD(numCols); COPY_POINTER_FIELD(grpColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(grpCollations, from->numCols * sizeof(Oid)); return newnode; } @@ -977,6 +979,7 @@ _copyAgg(const Agg *from) { COPY_POINTER_FIELD(grpColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(grpCollations, from->numCols * sizeof(Oid)); } COPY_SCALAR_FIELD(numGroups); COPY_BITMAPSET_FIELD(aggParams); @@ -1002,12 +1005,14 @@ _copyWindowAgg(const WindowAgg *from) { COPY_POINTER_FIELD(partColIdx, from->partNumCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(partOperators, from->partNumCols * sizeof(Oid)); + COPY_POINTER_FIELD(partCollations, from->partNumCols * sizeof(Oid)); } COPY_SCALAR_FIELD(ordNumCols); if (from->ordNumCols > 0) { COPY_POINTER_FIELD(ordColIdx, from->ordNumCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(ordOperators, from->ordNumCols * sizeof(Oid)); + COPY_POINTER_FIELD(ordCollations, from->ordNumCols * sizeof(Oid)); } COPY_SCALAR_FIELD(frameOptions); COPY_NODE_FIELD(startOffset); @@ -1040,6 +1045,7 @@ _copyUnique(const Unique *from) COPY_SCALAR_FIELD(numCols); COPY_POINTER_FIELD(uniqColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(uniqOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(uniqCollations, from->numCols * sizeof(Oid)); return newnode; } @@ -1089,6 +1095,7 @@ _copySetOp(const SetOp *from) COPY_SCALAR_FIELD(numCols); COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(dupCollations, from->numCols * sizeof(Oid)); COPY_SCALAR_FIELD(flagColIdx); COPY_SCALAR_FIELD(firstFlag); COPY_SCALAR_FIELD(numGroups); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 69179a07c3..910a738c20 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -463,6 +463,7 @@ _outRecursiveUnion(StringInfo str, const RecursiveUnion *node) WRITE_INT_FIELD(numCols); WRITE_ATTRNUMBER_ARRAY(dupColIdx, node->numCols); WRITE_OID_ARRAY(dupOperators, node->numCols); + WRITE_OID_ARRAY(dupCollations, node->numCols); WRITE_LONG_FIELD(numGroups); } @@ -774,6 +775,7 @@ _outAgg(StringInfo str, const Agg *node) WRITE_INT_FIELD(numCols); WRITE_ATTRNUMBER_ARRAY(grpColIdx, node->numCols); WRITE_OID_ARRAY(grpOperators, node->numCols); + WRITE_OID_ARRAY(grpCollations, node->numCols); WRITE_LONG_FIELD(numGroups); WRITE_BITMAPSET_FIELD(aggParams); WRITE_NODE_FIELD(groupingSets); @@ -791,9 +793,11 @@ _outWindowAgg(StringInfo str, const WindowAgg *node) WRITE_INT_FIELD(partNumCols); WRITE_ATTRNUMBER_ARRAY(partColIdx, node->partNumCols); WRITE_OID_ARRAY(partOperators, node->partNumCols); + WRITE_OID_ARRAY(partCollations, node->partNumCols); WRITE_INT_FIELD(ordNumCols); WRITE_ATTRNUMBER_ARRAY(ordColIdx, node->ordNumCols); WRITE_OID_ARRAY(ordOperators, node->ordNumCols); + WRITE_OID_ARRAY(ordCollations, node->ordNumCols); WRITE_INT_FIELD(frameOptions); WRITE_NODE_FIELD(startOffset); WRITE_NODE_FIELD(endOffset); @@ -814,6 +818,7 @@ _outGroup(StringInfo str, const Group *node) WRITE_INT_FIELD(numCols); WRITE_ATTRNUMBER_ARRAY(grpColIdx, node->numCols); WRITE_OID_ARRAY(grpOperators, node->numCols); + WRITE_OID_ARRAY(grpCollations, node->numCols); } static void @@ -848,6 +853,7 @@ _outUnique(StringInfo str, const Unique *node) WRITE_INT_FIELD(numCols); WRITE_ATTRNUMBER_ARRAY(uniqColIdx, node->numCols); WRITE_OID_ARRAY(uniqOperators, node->numCols); + WRITE_OID_ARRAY(uniqCollations, node->numCols); } static void @@ -875,6 +881,7 @@ _outSetOp(StringInfo str, const SetOp *node) WRITE_INT_FIELD(numCols); WRITE_ATTRNUMBER_ARRAY(dupColIdx, node->numCols); WRITE_OID_ARRAY(dupOperators, node->numCols); + WRITE_OID_ARRAY(dupCollations, node->numCols); WRITE_INT_FIELD(flagColIdx); WRITE_INT_FIELD(firstFlag); WRITE_LONG_FIELD(numGroups); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 4b845b1bb7..eff98febf1 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1677,6 +1677,7 @@ _readRecursiveUnion(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols); READ_OID_ARRAY(dupOperators, local_node->numCols); + READ_OID_ARRAY(dupCollations, local_node->numCols); READ_LONG_FIELD(numGroups); READ_DONE(); @@ -2143,6 +2144,7 @@ _readGroup(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols); READ_OID_ARRAY(grpOperators, local_node->numCols); + READ_OID_ARRAY(grpCollations, local_node->numCols); READ_DONE(); } @@ -2162,6 +2164,7 @@ _readAgg(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols); READ_OID_ARRAY(grpOperators, local_node->numCols); + READ_OID_ARRAY(grpCollations, local_node->numCols); READ_LONG_FIELD(numGroups); READ_BITMAPSET_FIELD(aggParams); READ_NODE_FIELD(groupingSets); @@ -2184,9 +2187,11 @@ _readWindowAgg(void) READ_INT_FIELD(partNumCols); READ_ATTRNUMBER_ARRAY(partColIdx, local_node->partNumCols); READ_OID_ARRAY(partOperators, local_node->partNumCols); + READ_OID_ARRAY(partCollations, local_node->partNumCols); READ_INT_FIELD(ordNumCols); READ_ATTRNUMBER_ARRAY(ordColIdx, local_node->ordNumCols); READ_OID_ARRAY(ordOperators, local_node->ordNumCols); + READ_OID_ARRAY(ordCollations, local_node->ordNumCols); READ_INT_FIELD(frameOptions); READ_NODE_FIELD(startOffset); READ_NODE_FIELD(endOffset); @@ -2212,6 +2217,7 @@ _readUnique(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(uniqColIdx, local_node->numCols); READ_OID_ARRAY(uniqOperators, local_node->numCols); + READ_OID_ARRAY(uniqCollations, local_node->numCols); READ_DONE(); } @@ -2290,6 +2296,7 @@ _readSetOp(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols); READ_OID_ARRAY(dupOperators, local_node->numCols); + READ_OID_ARRAY(dupCollations, local_node->numCols); READ_INT_FIELD(flagColIdx); READ_INT_FIELD(firstFlag); READ_LONG_FIELD(numGroups); diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 9fbe5b2a5f..93c56c657c 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -260,14 +260,14 @@ static Sort *make_sort_from_groupcols(List *groupcls, Plan *lefttree); static Material *make_material(Plan *lefttree); static WindowAgg *make_windowagg(List *tlist, Index winref, - int partNumCols, AttrNumber *partColIdx, Oid *partOperators, - int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, + int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, + int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, int frameOptions, Node *startOffset, Node *endOffset, Oid startInRangeFunc, Oid endInRangeFunc, Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst, Plan *lefttree); static Group *make_group(List *tlist, List *qual, int numGroupCols, - AttrNumber *grpColIdx, Oid *grpOperators, + AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, Plan *lefttree); static Unique *make_unique_from_sortclauses(Plan *lefttree, List *distinctList); static Unique *make_unique_from_pathkeys(Plan *lefttree, @@ -1387,6 +1387,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) bool newitems; int numGroupCols; AttrNumber *groupColIdx; + Oid *groupCollations; int groupColPos; ListCell *l; @@ -1453,6 +1454,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) newtlist = subplan->targetlist; numGroupCols = list_length(uniq_exprs); groupColIdx = (AttrNumber *) palloc(numGroupCols * sizeof(AttrNumber)); + groupCollations = (Oid *) palloc(numGroupCols * sizeof(Oid)); groupColPos = 0; foreach(l, uniq_exprs) @@ -1463,7 +1465,9 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) tle = tlist_member(uniqexpr, newtlist); if (!tle) /* shouldn't happen */ elog(ERROR, "failed to find unique expression in subplan tlist"); - groupColIdx[groupColPos++] = tle->resno; + groupColIdx[groupColPos] = tle->resno; + groupCollations[groupColPos] = exprCollation((Node *) tle->expr); + groupColPos++; } if (best_path->umethod == UNIQUE_PATH_HASH) @@ -1501,6 +1505,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) numGroupCols, groupColIdx, groupOperators, + groupCollations, NIL, NIL, best_path->path.rows, @@ -1883,6 +1888,8 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) extract_grouping_cols(best_path->groupClause, subplan->targetlist), extract_grouping_ops(best_path->groupClause), + extract_grouping_collations(best_path->groupClause, + subplan->targetlist), subplan); copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -1949,6 +1956,8 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) extract_grouping_cols(best_path->groupClause, subplan->targetlist), extract_grouping_ops(best_path->groupClause), + extract_grouping_collations(best_path->groupClause, + subplan->targetlist), NIL, NIL, best_path->numGroups, @@ -2110,6 +2119,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) list_length((List *) linitial(rollup->gsets)), new_grpColIdx, extract_grouping_ops(rollup->groupClause), + extract_grouping_collations(rollup->groupClause, subplan->targetlist), rollup->gsets, NIL, rollup->numGroups, @@ -2147,6 +2157,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) numGroupCols, top_grpColIdx, extract_grouping_ops(rollup->groupClause), + extract_grouping_collations(rollup->groupClause, subplan->targetlist), rollup->gsets, chain, rollup->numGroups, @@ -2246,9 +2257,11 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) int partNumCols; AttrNumber *partColIdx; Oid *partOperators; + Oid *partCollations; int ordNumCols; AttrNumber *ordColIdx; Oid *ordOperators; + Oid *ordCollations; ListCell *lc; /* @@ -2270,6 +2283,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) */ partColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numPart); partOperators = (Oid *) palloc(sizeof(Oid) * numPart); + partCollations = (Oid *) palloc(sizeof(Oid) * numPart); partNumCols = 0; foreach(lc, wc->partitionClause) @@ -2280,11 +2294,13 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) Assert(OidIsValid(sgc->eqop)); partColIdx[partNumCols] = tle->resno; partOperators[partNumCols] = sgc->eqop; + partCollations[partNumCols] = exprCollation((Node *) tle->expr); partNumCols++; } ordColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numOrder); ordOperators = (Oid *) palloc(sizeof(Oid) * numOrder); + ordCollations = (Oid *) palloc(sizeof(Oid) * numOrder); ordNumCols = 0; foreach(lc, wc->orderClause) @@ -2295,6 +2311,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) Assert(OidIsValid(sgc->eqop)); ordColIdx[ordNumCols] = tle->resno; ordOperators[ordNumCols] = sgc->eqop; + ordCollations[ordNumCols] = exprCollation((Node *) tle->expr); ordNumCols++; } @@ -2304,9 +2321,11 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) partNumCols, partColIdx, partOperators, + partCollations, ordNumCols, ordColIdx, ordOperators, + ordCollations, wc->frameOptions, wc->startOffset, wc->endOffset, @@ -5326,10 +5345,12 @@ make_recursive_union(List *tlist, int keyno = 0; AttrNumber *dupColIdx; Oid *dupOperators; + Oid *dupCollations; ListCell *slitem; dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); dupOperators = (Oid *) palloc(sizeof(Oid) * numCols); + dupCollations = (Oid *) palloc(sizeof(Oid) * numCols); foreach(slitem, distinctList) { @@ -5339,11 +5360,13 @@ make_recursive_union(List *tlist, dupColIdx[keyno] = tle->resno; dupOperators[keyno] = sortcl->eqop; + dupCollations[keyno] = exprCollation((Node *) tle->expr); Assert(OidIsValid(dupOperators[keyno])); keyno++; } node->dupColIdx = dupColIdx; node->dupOperators = dupOperators; + node->dupCollations = dupCollations; } node->numGroups = numGroups; @@ -6015,7 +6038,7 @@ materialize_finished_plan(Plan *subplan) Agg * make_agg(List *tlist, List *qual, AggStrategy aggstrategy, AggSplit aggsplit, - int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, + int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, List *groupingSets, List *chain, double dNumGroups, Plan *lefttree) { @@ -6031,6 +6054,7 @@ make_agg(List *tlist, List *qual, node->numCols = numGroupCols; node->grpColIdx = grpColIdx; node->grpOperators = grpOperators; + node->grpCollations = grpCollations; node->numGroups = numGroups; node->aggParams = NULL; /* SS_finalize_plan() will fill this */ node->groupingSets = groupingSets; @@ -6046,8 +6070,8 @@ make_agg(List *tlist, List *qual, static WindowAgg * make_windowagg(List *tlist, Index winref, - int partNumCols, AttrNumber *partColIdx, Oid *partOperators, - int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, + int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, + int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, int frameOptions, Node *startOffset, Node *endOffset, Oid startInRangeFunc, Oid endInRangeFunc, Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst, @@ -6060,9 +6084,11 @@ make_windowagg(List *tlist, Index winref, node->partNumCols = partNumCols; node->partColIdx = partColIdx; node->partOperators = partOperators; + node->partCollations = partCollations; node->ordNumCols = ordNumCols; node->ordColIdx = ordColIdx; node->ordOperators = ordOperators; + node->ordCollations = ordCollations; node->frameOptions = frameOptions; node->startOffset = startOffset; node->endOffset = endOffset; @@ -6087,6 +6113,7 @@ make_group(List *tlist, int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, + Oid *grpCollations, Plan *lefttree) { Group *node = makeNode(Group); @@ -6095,6 +6122,7 @@ make_group(List *tlist, node->numCols = numGroupCols; node->grpColIdx = grpColIdx; node->grpOperators = grpOperators; + node->grpCollations = grpCollations; plan->qual = qual; plan->targetlist = tlist; @@ -6118,6 +6146,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) int keyno = 0; AttrNumber *uniqColIdx; Oid *uniqOperators; + Oid *uniqCollations; ListCell *slitem; plan->targetlist = lefttree->targetlist; @@ -6132,6 +6161,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) Assert(numCols > 0); uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols); + uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols); foreach(slitem, distinctList) { @@ -6140,6 +6170,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) uniqColIdx[keyno] = tle->resno; uniqOperators[keyno] = sortcl->eqop; + uniqCollations[keyno] = exprCollation((Node *) tle->expr); Assert(OidIsValid(uniqOperators[keyno])); keyno++; } @@ -6147,6 +6178,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) node->numCols = numCols; node->uniqColIdx = uniqColIdx; node->uniqOperators = uniqOperators; + node->uniqCollations = uniqCollations; return node; } @@ -6162,6 +6194,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) int keyno = 0; AttrNumber *uniqColIdx; Oid *uniqOperators; + Oid *uniqCollations; ListCell *lc; plan->targetlist = lefttree->targetlist; @@ -6177,6 +6210,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) Assert(numCols >= 0 && numCols <= list_length(pathkeys)); uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols); + uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols); foreach(lc, pathkeys) { @@ -6245,6 +6279,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) uniqColIdx[keyno] = tle->resno; uniqOperators[keyno] = eqop; + uniqCollations[keyno] = ec->ec_collation; keyno++; } @@ -6252,6 +6287,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) node->numCols = numCols; node->uniqColIdx = uniqColIdx; node->uniqOperators = uniqOperators; + node->uniqCollations = uniqCollations; return node; } @@ -6296,6 +6332,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, int keyno = 0; AttrNumber *dupColIdx; Oid *dupOperators; + Oid *dupCollations; ListCell *slitem; plan->targetlist = lefttree->targetlist; @@ -6309,6 +6346,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, */ dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); dupOperators = (Oid *) palloc(sizeof(Oid) * numCols); + dupCollations = (Oid *) palloc(sizeof(Oid) * numCols); foreach(slitem, distinctList) { @@ -6317,6 +6355,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, dupColIdx[keyno] = tle->resno; dupOperators[keyno] = sortcl->eqop; + dupCollations[keyno] = exprCollation((Node *) tle->expr); Assert(OidIsValid(dupOperators[keyno])); keyno++; } @@ -6326,6 +6365,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, node->numCols = numCols; node->dupColIdx = dupColIdx; node->dupOperators = dupOperators; + node->dupCollations = dupCollations; node->flagColIdx = flagColIdx; node->firstFlag = firstFlag; node->numGroups = numGroups; diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index 14d1c67a94..bb3b7969f2 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -503,6 +503,31 @@ extract_grouping_ops(List *groupClause) return groupOperators; } +/* + * extract_grouping_collations - make an array of the grouping column collations + * for a SortGroupClause list + */ +Oid * +extract_grouping_collations(List *groupClause, List *tlist) +{ + int numCols = list_length(groupClause); + int colno = 0; + Oid *grpCollations; + ListCell *glitem; + + grpCollations = (Oid *) palloc(sizeof(Oid) * numCols); + + foreach(glitem, groupClause) + { + SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); + TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist); + + grpCollations[colno++] = exprCollation((Node *) tle->expr); + } + + return grpCollations; +} + /* * extract_grouping_cols - make an array of the grouping column resnos * for a SortGroupClause list diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c index e71eb3793b..83abff94bd 100644 --- a/src/backend/partitioning/partbounds.c +++ b/src/backend/partitioning/partbounds.c @@ -2659,7 +2659,7 @@ get_range_nulltest(PartitionKey key) * Compute the hash value for given partition key values. */ uint64 -compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, +compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation, Datum *values, bool *isnull) { int i; @@ -2680,7 +2680,7 @@ compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, * datatype-specific hash functions of each partition key * attribute. */ - hash = FunctionCall2(&partsupfunc[i], values[i], seed); + hash = FunctionCall2Coll(&partsupfunc[i], partcollation[i], values[i], seed); /* Form a single 64-bit hash value */ rowHash = hash_combine64(rowHash, DatumGetUInt64(hash)); diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index b5c0889935..31e0164ea9 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -2159,6 +2159,7 @@ get_matching_hash_bounds(PartitionPruneContext *context, int i; uint64 rowHash; int greatest_modulus; + Oid *partcollation = context->partcollation; Assert(context->strategy == PARTITION_STRATEGY_HASH); @@ -2179,7 +2180,7 @@ get_matching_hash_bounds(PartitionPruneContext *context, isnull[i] = bms_is_member(i, nullkeys); greatest_modulus = get_hash_partition_greatest_modulus(boundinfo); - rowHash = compute_partition_hash_value(partnatts, partsupfunc, + rowHash = compute_partition_hash_value(partnatts, partsupfunc, partcollation, values, isnull); if (partindices[rowHash % greatest_modulus] >= 0) diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c index a8c0b156fa..4a808b7606 100644 --- a/src/backend/regex/regc_pg_locale.c +++ b/src/backend/regex/regc_pg_locale.c @@ -263,6 +263,11 @@ pg_set_regex_collation(Oid collation) errhint("Use the COLLATE clause to set the collation explicitly."))); } + if (pg_regex_locale && !pg_regex_locale->deterministic) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for regular expressions"))); + #ifdef USE_ICU if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_ICU) pg_regex_strategy = PG_REGEX_LOCALE_ICU; diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index 5b2917d159..61deefbbc8 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -3958,7 +3958,7 @@ hash_array(PG_FUNCTION_ARGS) * apply the hash function to each array element. */ InitFunctionCallInfoData(*locfcinfo, &typentry->hash_proc_finfo, 1, - InvalidOid, NULL, NULL); + PG_GET_COLLATION(), NULL, NULL); /* Loop over source data */ nitems = ArrayGetNItems(ndims, dims); diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 853c9c01e9..704e5720cf 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -45,7 +45,7 @@ static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen, static int SB_IMatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale, bool locale_is_c); -static int GenericMatchText(const char *s, int slen, const char *p, int plen); +static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation); static int Generic_Text_IC_like(text *str, text *pat, Oid collation); /*-------------------- @@ -148,8 +148,18 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c) /* Generic for all cases not requiring inline case-folding */ static inline int -GenericMatchText(const char *s, int slen, const char *p, int plen) +GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation) { + if (collation && !lc_ctype_is_c(collation) && collation != DEFAULT_COLLATION_OID) + { + pg_locale_t locale = pg_newlocale_from_collation(collation); + + if (locale && !locale->deterministic) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for LIKE"))); + } + if (pg_database_encoding_max_length() == 1) return SB_MatchText(s, slen, p, plen, 0, true); else if (GetDatabaseEncoding() == PG_UTF8) @@ -184,6 +194,11 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) errhint("Use the COLLATE clause to set the collation explicitly."))); } locale = pg_newlocale_from_collation(collation); + + if (locale && !locale->deterministic) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for ILIKE"))); } /* @@ -240,7 +255,7 @@ namelike(PG_FUNCTION_ARGS) p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); - result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE); + result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE); PG_RETURN_BOOL(result); } @@ -261,7 +276,7 @@ namenlike(PG_FUNCTION_ARGS) p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); - result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE); + result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE); PG_RETURN_BOOL(result); } @@ -282,7 +297,7 @@ textlike(PG_FUNCTION_ARGS) p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); - result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE); + result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE); PG_RETURN_BOOL(result); } @@ -303,7 +318,7 @@ textnlike(PG_FUNCTION_ARGS) p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); - result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE); + result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE); PG_RETURN_BOOL(result); } diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c index 69509811ef..a65e63736c 100644 --- a/src/backend/utils/adt/like_support.c +++ b/src/backend/utils/adt/like_support.c @@ -257,6 +257,20 @@ match_pattern_prefix(Node *leftop, return NIL; patt = (Const *) rightop; + /* + * Not supported if the expression collation is nondeterministic. The + * optimized equality or prefix tests use bytewise comparisons, which is + * not consistent with nondeterministic collations. The actual + * pattern-matching implementation functions will later error out that + * pattern-matching is not supported with nondeterministic collations. + * (We could also error out here, but by doing it later we get more + * precise error messages.) (It should be possible to support at least + * Pattern_Prefix_Exact, but no point as along as the actual + * pattern-matching implementations don't support it.) + */ + if (!get_collation_isdeterministic(expr_coll)) + return NIL; + /* * Try to extract a fixed prefix from the pattern. */ diff --git a/src/backend/utils/adt/name.c b/src/backend/utils/adt/name.c index 3a7887d455..54425925ed 100644 --- a/src/backend/utils/adt/name.c +++ b/src/backend/utils/adt/name.c @@ -131,14 +131,26 @@ namesend(PG_FUNCTION_ARGS) * have a '\0' terminator. Whatever might be past the terminator is not * considered relevant to comparisons. */ +static int +namecmp(Name arg1, Name arg2, Oid collid) +{ + /* Fast path for common case used in system catalogs */ + if (collid == C_COLLATION_OID) + return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN); + + /* Else rely on the varstr infrastructure */ + return varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)), + NameStr(*arg2), strlen(NameStr(*arg2)), + collid); +} + Datum nameeq(PG_FUNCTION_ARGS) { Name arg1 = PG_GETARG_NAME(0); Name arg2 = PG_GETARG_NAME(1); - /* Collation doesn't matter: equal only if bitwise-equal */ - PG_RETURN_BOOL(strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) == 0); + PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) == 0); } Datum @@ -147,21 +159,7 @@ namene(PG_FUNCTION_ARGS) Name arg1 = PG_GETARG_NAME(0); Name arg2 = PG_GETARG_NAME(1); - /* Collation doesn't matter: equal only if bitwise-equal */ - PG_RETURN_BOOL(strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) != 0); -} - -static int -namecmp(Name arg1, Name arg2, Oid collid) -{ - /* Fast path for common case used in system catalogs */ - if (collid == C_COLLATION_OID) - return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN); - - /* Else rely on the varstr infrastructure */ - return varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)), - NameStr(*arg2), strlen(NameStr(*arg2)), - collid); + PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) != 0); } Datum diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c index 2d384a9944..4db2d0d0e1 100644 --- a/src/backend/utils/adt/orderedsetaggs.c +++ b/src/backend/utils/adt/orderedsetaggs.c @@ -1084,7 +1084,7 @@ mode_final(PG_FUNCTION_ARGS) last_abbrev_val = abbrev_val; } else if (abbrev_val == last_abbrev_val && - DatumGetBool(FunctionCall2(equalfn, val, last_val))) + DatumGetBool(FunctionCall2Coll(equalfn, PG_GET_COLLATION(), val, last_val))) { /* value equal to previous value, count it */ if (last_val_is_mode) @@ -1345,6 +1345,7 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS) numDistinctCols, sortColIdx, osastate->qstate->eqOperators, + osastate->qstate->sortCollations, NULL); MemoryContextSwitchTo(oldContext); osastate->qstate->compareTuple = compareTuple; diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 50b8b31645..1f7fdc0593 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1308,6 +1308,7 @@ pg_newlocale_from_collation(Oid collid) /* We'll fill in the result struct locally before allocating memory */ memset(&result, 0, sizeof(result)); result.provider = collform->collprovider; + result.deterministic = collform->collisdeterministic; if (collform->collprovider == COLLPROVIDER_LIBC) { diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index ef04fa5009..f98db135ad 100644 --- a/src/backend/utils/adt/ri_triggers.c +++ b/src/backend/utils/adt/ri_triggers.c @@ -692,6 +692,8 @@ ri_restrict(TriggerData *trigdata, bool is_no_action) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -700,6 +702,8 @@ ri_restrict(TriggerData *trigdata, bool is_no_action) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); querysep = "AND"; queryoids[i] = pk_type; } @@ -794,6 +798,8 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -802,6 +808,8 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); querysep = "AND"; queryoids[i] = pk_type; } @@ -906,6 +914,8 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -917,6 +927,8 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); querysep = ","; qualsep = "AND"; queryoids[i] = pk_type; @@ -1081,6 +1093,8 @@ ri_set(TriggerData *trigdata, bool is_set_null) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -1093,6 +1107,8 @@ ri_set(TriggerData *trigdata, bool is_set_null) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); querysep = ","; qualsep = "AND"; queryoids[i] = pk_type; @@ -2492,11 +2508,20 @@ ri_AttributesEqual(Oid eq_opr, Oid typeid, } /* - * Apply the comparison operator. We assume it doesn't care about - * collations. + * Apply the comparison operator. + * + * Note: This function is part of a call stack that determines whether an + * update to a row is significant enough that it needs checking or action + * on the other side of a foreign-key constraint. Therefore, the + * comparison here would need to be done with the collation of the *other* + * table. For simplicity (e.g., we might not even have the other table + * open), we'll just use the default collation here, which could lead to + * some false negatives. All this would break if we ever allow + * database-wide collations to be nondeterministic. */ - return DatumGetBool(FunctionCall2(&entry->eq_opr_finfo, - oldvalue, newvalue)); + return DatumGetBool(FunctionCall2Coll(&entry->eq_opr_finfo, + DEFAULT_COLLATION_OID, + oldvalue, newvalue)); } /* diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c index c866af022f..447d57b3a1 100644 --- a/src/backend/utils/adt/varchar.c +++ b/src/backend/utils/adt/varchar.c @@ -24,6 +24,8 @@ #include "nodes/supportnodes.h" #include "utils/array.h" #include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/pg_locale.h" #include "utils/varlena.h" #include "mb/pg_wchar.h" @@ -718,6 +720,22 @@ bpcharoctetlen(PG_FUNCTION_ARGS) * need to be so careful. *****************************************************************************/ +static void +check_collation_set(Oid collid) +{ + if (!OidIsValid(collid)) + { + /* + * This typically means that the parser could not resolve a conflict + * of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string comparison"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } +} + Datum bpchareq(PG_FUNCTION_ARGS) { @@ -726,18 +744,31 @@ bpchareq(PG_FUNCTION_ARGS) int len1, len2; bool result; + Oid collid = PG_GET_COLLATION(); + + check_collation_set(collid); len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); - /* - * Since we only care about equality or not-equality, we can avoid all the - * expense of strcoll() here, and just do bitwise comparison. - */ - if (len1 != len2) - result = false; + if (lc_collate_is_c(collid) || + collid == DEFAULT_COLLATION_OID || + pg_newlocale_from_collation(collid)->deterministic) + { + /* + * Since we only care about equality or not-equality, we can avoid all the + * expense of strcoll() here, and just do bitwise comparison. + */ + if (len1 != len2) + result = false; + else + result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0); + } else - result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0); + { + result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + collid) == 0); + } PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -753,18 +784,29 @@ bpcharne(PG_FUNCTION_ARGS) int len1, len2; bool result; + Oid collid = PG_GET_COLLATION(); len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); - /* - * Since we only care about equality or not-equality, we can avoid all the - * expense of strcoll() here, and just do bitwise comparison. - */ - if (len1 != len2) - result = true; + if (lc_collate_is_c(collid) || + collid == DEFAULT_COLLATION_OID || + pg_newlocale_from_collation(collid)->deterministic) + { + /* + * Since we only care about equality or not-equality, we can avoid all the + * expense of strcoll() here, and just do bitwise comparison. + */ + if (len1 != len2) + result = true; + else + result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0); + } else - result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0); + { + result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + collid) != 0); + } PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -934,23 +976,60 @@ bpchar_smaller(PG_FUNCTION_ARGS) /* * bpchar needs a specialized hash function because we want to ignore * trailing blanks in comparisons. - * - * Note: currently there is no need for locale-specific behavior here, - * but if we ever change the semantics of bpchar comparison to trust - * strcoll() completely, we'd need to do something different in non-C locales. */ Datum hashbpchar(PG_FUNCTION_ARGS) { BpChar *key = PG_GETARG_BPCHAR_PP(0); + Oid collid = PG_GET_COLLATION(); char *keydata; int keylen; + pg_locale_t mylocale = 0; Datum result; + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + keydata = VARDATA_ANY(key); keylen = bcTruelen(key); - result = hash_any((unsigned char *) keydata, keylen); + if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + + if (!mylocale || mylocale->deterministic) + { + result = hash_any((unsigned char *) keydata, keylen); + } + else + { +#ifdef USE_ICU + if (mylocale->provider == COLLPROVIDER_ICU) + { + int32_t ulen = -1; + UChar *uchar = NULL; + Size bsize; + uint8_t *buf; + + ulen = icu_to_uchar(&uchar, keydata, keylen); + + bsize = ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, NULL, 0); + buf = palloc(bsize); + ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, buf, bsize); + + result = hash_any(buf, bsize); + + pfree(buf); + } + else +#endif + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", mylocale->provider); + } /* Avoid leaking memory for toasted inputs */ PG_FREE_IF_COPY(key, 0); @@ -962,15 +1041,56 @@ Datum hashbpcharextended(PG_FUNCTION_ARGS) { BpChar *key = PG_GETARG_BPCHAR_PP(0); + Oid collid = PG_GET_COLLATION(); char *keydata; int keylen; + pg_locale_t mylocale = 0; Datum result; + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + keydata = VARDATA_ANY(key); keylen = bcTruelen(key); - result = hash_any_extended((unsigned char *) keydata, keylen, - PG_GETARG_INT64(1)); + if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + + if (!mylocale || mylocale->deterministic) + { + result = hash_any_extended((unsigned char *) keydata, keylen, + PG_GETARG_INT64(1)); + } + else + { +#ifdef USE_ICU + if (mylocale->provider == COLLPROVIDER_ICU) + { + int32_t ulen = -1; + UChar *uchar = NULL; + Size bsize; + uint8_t *buf; + + ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); + + bsize = ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, NULL, 0); + buf = palloc(bsize); + ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, buf, bsize); + + result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1)); + + pfree(buf); + } + else +#endif + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", mylocale->provider); + } PG_FREE_IF_COPY(key, 0); @@ -986,12 +1106,23 @@ hashbpcharextended(PG_FUNCTION_ARGS) */ static int -internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2) +internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2, Oid collid) { int result; int len1, len2; + check_collation_set(collid); + + /* + * see internal_text_pattern_compare() + */ + if (!get_collation_isdeterministic(collid)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for operator class \"%s\"", + "bpchar_pattern_ops"))); + len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); @@ -1014,7 +1145,7 @@ bpchar_pattern_lt(PG_FUNCTION_ARGS) BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); int result; - result = internal_bpchar_pattern_compare(arg1, arg2); + result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -1030,7 +1161,7 @@ bpchar_pattern_le(PG_FUNCTION_ARGS) BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); int result; - result = internal_bpchar_pattern_compare(arg1, arg2); + result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -1046,7 +1177,7 @@ bpchar_pattern_ge(PG_FUNCTION_ARGS) BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); int result; - result = internal_bpchar_pattern_compare(arg1, arg2); + result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -1062,7 +1193,7 @@ bpchar_pattern_gt(PG_FUNCTION_ARGS) BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); int result; - result = internal_bpchar_pattern_compare(arg1, arg2); + result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -1078,7 +1209,7 @@ btbpchar_pattern_cmp(PG_FUNCTION_ARGS) BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); int result; - result = internal_bpchar_pattern_compare(arg1, arg2); + result = internal_bpchar_pattern_compare(arg1, arg2, PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -1091,8 +1222,17 @@ Datum btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS) { SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + Oid collid = ssup->ssup_collation; MemoryContext oldcontext; + check_collation_set(collid); + + if (!get_collation_isdeterministic(collid)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for operator class \"%s\"", + "bpchar_pattern_ops"))); + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); /* Use generic string SortSupport, forcing "C" collation */ diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 693ccc5149..9f810c2fd5 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -122,13 +122,14 @@ static text *text_substring(Datum str, int32 length, bool length_not_specified); static text *text_overlay(text *t1, text *t2, int sp, int sl); -static int text_position(text *t1, text *t2); -static void text_position_setup(text *t1, text *t2, TextPositionState *state); +static int text_position(text *t1, text *t2, Oid collid); +static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state); static bool text_position_next(TextPositionState *state); static char *text_position_next_internal(char *start_ptr, TextPositionState *state); static char *text_position_get_match_ptr(TextPositionState *state); static int text_position_get_match_pos(TextPositionState *state); static void text_position_cleanup(TextPositionState *state); +static void check_collation_set(Oid collid); static int text_cmp(text *arg1, text *arg2, Oid collid); static bytea *bytea_catenate(bytea *t1, bytea *t2); static bytea *bytea_substring(Datum str, @@ -1094,7 +1095,7 @@ textpos(PG_FUNCTION_ARGS) text *str = PG_GETARG_TEXT_PP(0); text *search_str = PG_GETARG_TEXT_PP(1); - PG_RETURN_INT32((int32) text_position(str, search_str)); + PG_RETURN_INT32((int32) text_position(str, search_str, PG_GET_COLLATION())); } /* @@ -1112,7 +1113,7 @@ textpos(PG_FUNCTION_ARGS) * functions. */ static int -text_position(text *t1, text *t2) +text_position(text *t1, text *t2, Oid collid) { TextPositionState state; int result; @@ -1120,7 +1121,7 @@ text_position(text *t1, text *t2) if (VARSIZE_ANY_EXHDR(t1) < 1 || VARSIZE_ANY_EXHDR(t2) < 1) return 0; - text_position_setup(t1, t2, &state); + text_position_setup(t1, t2, collid, &state); if (!text_position_next(&state)) result = 0; else @@ -1147,10 +1148,21 @@ text_position(text *t1, text *t2) */ static void -text_position_setup(text *t1, text *t2, TextPositionState *state) +text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state) { int len1 = VARSIZE_ANY_EXHDR(t1); int len2 = VARSIZE_ANY_EXHDR(t2); + pg_locale_t mylocale = 0; + + check_collation_set(collid); + + if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + + if (mylocale && !mylocale->deterministic) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for substring searches"))); Assert(len1 > 0); Assert(len2 > 0); @@ -1429,6 +1441,22 @@ text_position_cleanup(TextPositionState *state) /* no cleanup needed */ } +static void +check_collation_set(Oid collid) +{ + if (!OidIsValid(collid)) + { + /* + * This typically means that the parser could not resolve a conflict + * of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string comparison"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } +} + /* varstr_cmp() * Comparison function for text strings with given lengths. * Includes locale support, but must copy strings to temporary memory @@ -1441,6 +1469,8 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) { int result; + check_collation_set(collid); + /* * Unfortunately, there is no strncoll(), so in the non-C locale case we * have to do some memory copying. This turns out to be significantly @@ -1462,20 +1492,7 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) pg_locale_t mylocale = 0; if (collid != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collid)) - { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for string comparison"), - errhint("Use the COLLATE clause to set the collation explicitly."))); - } mylocale = pg_newlocale_from_collation(collid); - } /* * memcmp() can't tell us which of two unequal strings sorts first, @@ -1558,13 +1575,9 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) ereport(ERROR, (errmsg("could not compare Unicode strings: %m"))); - /* - * In some locales wcscoll() can claim that nonidentical strings - * are equal. Believing that would be bad news for a number of - * reasons, so we follow Perl's lead and sort "equal" strings - * according to strcmp (on the UTF-8 representation). - */ - if (result == 0) + /* Break tie if necessary. */ + if (result == 0 && + (!mylocale || mylocale->deterministic)) { result = memcmp(arg1, arg2, Min(len1, len2)); if ((result == 0) && (len1 != len2)) @@ -1649,13 +1662,9 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) else result = strcoll(a1p, a2p); - /* - * In some locales strcoll() can claim that nonidentical strings are - * equal. Believing that would be bad news for a number of reasons, - * so we follow Perl's lead and sort "equal" strings according to - * strcmp(). - */ - if (result == 0) + /* Break tie if necessary. */ + if (result == 0 && + (!mylocale || mylocale->deterministic)) result = strcmp(a1p, a2p); if (a1p != a1buf) @@ -1699,33 +1708,52 @@ text_cmp(text *arg1, text *arg2, Oid collid) Datum texteq(PG_FUNCTION_ARGS) { - Datum arg1 = PG_GETARG_DATUM(0); - Datum arg2 = PG_GETARG_DATUM(1); + Oid collid = PG_GET_COLLATION(); bool result; - Size len1, - len2; - /* - * Since we only care about equality or not-equality, we can avoid all the - * expense of strcoll() here, and just do bitwise comparison. In fact, we - * don't even have to do a bitwise comparison if we can show the lengths - * of the strings are unequal; which might save us from having to detoast - * one or both values. - */ - len1 = toast_raw_datum_size(arg1); - len2 = toast_raw_datum_size(arg2); - if (len1 != len2) - result = false; + check_collation_set(collid); + + if (lc_collate_is_c(collid) || + collid == DEFAULT_COLLATION_OID || + pg_newlocale_from_collation(collid)->deterministic) + { + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + Size len1, + len2; + + /* + * Since we only care about equality or not-equality, we can avoid all the + * expense of strcoll() here, and just do bitwise comparison. In fact, we + * don't even have to do a bitwise comparison if we can show the lengths + * of the strings are unequal; which might save us from having to detoast + * one or both values. + */ + len1 = toast_raw_datum_size(arg1); + len2 = toast_raw_datum_size(arg2); + if (len1 != len2) + result = false; + else + { + text *targ1 = DatumGetTextPP(arg1); + text *targ2 = DatumGetTextPP(arg2); + + result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2), + len1 - VARHDRSZ) == 0); + + PG_FREE_IF_COPY(targ1, 0); + PG_FREE_IF_COPY(targ2, 1); + } + } else { - text *targ1 = DatumGetTextPP(arg1); - text *targ2 = DatumGetTextPP(arg2); + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); - result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2), - len1 - VARHDRSZ) == 0); + result = (text_cmp(arg1, arg2, collid) == 0); - PG_FREE_IF_COPY(targ1, 0); - PG_FREE_IF_COPY(targ2, 1); + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); } PG_RETURN_BOOL(result); @@ -1734,27 +1762,46 @@ texteq(PG_FUNCTION_ARGS) Datum textne(PG_FUNCTION_ARGS) { - Datum arg1 = PG_GETARG_DATUM(0); - Datum arg2 = PG_GETARG_DATUM(1); + Oid collid = PG_GET_COLLATION(); bool result; - Size len1, - len2; - /* See comment in texteq() */ - len1 = toast_raw_datum_size(arg1); - len2 = toast_raw_datum_size(arg2); - if (len1 != len2) - result = true; + check_collation_set(collid); + + if (lc_collate_is_c(collid) || + collid == DEFAULT_COLLATION_OID || + pg_newlocale_from_collation(collid)->deterministic) + { + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + Size len1, + len2; + + /* See comment in texteq() */ + len1 = toast_raw_datum_size(arg1); + len2 = toast_raw_datum_size(arg2); + if (len1 != len2) + result = true; + else + { + text *targ1 = DatumGetTextPP(arg1); + text *targ2 = DatumGetTextPP(arg2); + + result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2), + len1 - VARHDRSZ) != 0); + + PG_FREE_IF_COPY(targ1, 0); + PG_FREE_IF_COPY(targ2, 1); + } + } else { - text *targ1 = DatumGetTextPP(arg1); - text *targ2 = DatumGetTextPP(arg2); + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); - result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2), - len1 - VARHDRSZ) != 0); + result = (text_cmp(arg1, arg2, collid) != 0); - PG_FREE_IF_COPY(targ1, 0); - PG_FREE_IF_COPY(targ2, 1); + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); } PG_RETURN_BOOL(result); @@ -1825,10 +1872,22 @@ text_starts_with(PG_FUNCTION_ARGS) { Datum arg1 = PG_GETARG_DATUM(0); Datum arg2 = PG_GETARG_DATUM(1); + Oid collid = PG_GET_COLLATION(); + pg_locale_t mylocale = 0; bool result; Size len1, len2; + check_collation_set(collid); + + if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + mylocale = pg_newlocale_from_collation(collid); + + if (mylocale && !mylocale->deterministic) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for substring searches"))); + len1 = toast_raw_datum_size(arg1); len2 = toast_raw_datum_size(arg2); if (len2 > len1) @@ -1898,6 +1957,8 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid) VarStringSortSupport *sss; pg_locale_t locale = 0; + check_collation_set(collid); + /* * If possible, set ssup->comparator to a function which can be used to * directly compare two datums. If we can do this, we'll avoid the @@ -1934,20 +1995,7 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid) * result. */ if (collid != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collid)) - { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for string comparison"), - errhint("Use the COLLATE clause to set the collation explicitly."))); - } locale = pg_newlocale_from_collation(collid); - } /* * There is a further exception on Windows. When the database @@ -2328,12 +2376,9 @@ varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup) else result = strcoll(sss->buf1, sss->buf2); - /* - * In some locales strcoll() can claim that nonidentical strings are - * equal. Believing that would be bad news for a number of reasons, so we - * follow Perl's lead and sort "equal" strings according to strcmp(). - */ - if (result == 0) + /* Break tie if necessary. */ + if (result == 0 && + (!sss->locale || sss->locale->deterministic)) result = strcmp(sss->buf1, sss->buf2); /* Cache result, perhaps saving an expensive strcoll() call next time */ @@ -2760,10 +2805,18 @@ nameeqtext(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); size_t len1 = strlen(NameStr(*arg1)); size_t len2 = VARSIZE_ANY_EXHDR(arg2); + Oid collid = PG_GET_COLLATION(); bool result; - result = (len1 == len2 && - memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0); + check_collation_set(collid); + + if (collid == C_COLLATION_OID) + result = (len1 == len2 && + memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0); + else + result = (varstr_cmp(NameStr(*arg1), len1, + VARDATA_ANY(arg2), len2, + collid) == 0); PG_FREE_IF_COPY(arg2, 1); @@ -2777,10 +2830,18 @@ texteqname(PG_FUNCTION_ARGS) Name arg2 = PG_GETARG_NAME(1); size_t len1 = VARSIZE_ANY_EXHDR(arg1); size_t len2 = strlen(NameStr(*arg2)); + Oid collid = PG_GET_COLLATION(); bool result; - result = (len1 == len2 && - memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0); + check_collation_set(collid); + + if (collid == C_COLLATION_OID) + result = (len1 == len2 && + memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0); + else + result = (varstr_cmp(VARDATA_ANY(arg1), len1, + NameStr(*arg2), len2, + collid) == 0); PG_FREE_IF_COPY(arg1, 0); @@ -2794,10 +2855,18 @@ namenetext(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); size_t len1 = strlen(NameStr(*arg1)); size_t len2 = VARSIZE_ANY_EXHDR(arg2); + Oid collid = PG_GET_COLLATION(); bool result; - result = !(len1 == len2 && - memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0); + check_collation_set(collid); + + if (collid == C_COLLATION_OID) + result = !(len1 == len2 && + memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0); + else + result = !(varstr_cmp(NameStr(*arg1), len1, + VARDATA_ANY(arg2), len2, + collid) == 0); PG_FREE_IF_COPY(arg2, 1); @@ -2811,10 +2880,18 @@ textnename(PG_FUNCTION_ARGS) Name arg2 = PG_GETARG_NAME(1); size_t len1 = VARSIZE_ANY_EXHDR(arg1); size_t len2 = strlen(NameStr(*arg2)); + Oid collid = PG_GET_COLLATION(); bool result; - result = !(len1 == len2 && - memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0); + check_collation_set(collid); + + if (collid == C_COLLATION_OID) + result = !(len1 == len2 && + memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0); + else + result = !(varstr_cmp(VARDATA_ANY(arg1), len1, + NameStr(*arg2), len2, + collid) == 0); PG_FREE_IF_COPY(arg1, 0); @@ -2919,12 +2996,34 @@ textgename(PG_FUNCTION_ARGS) */ static int -internal_text_pattern_compare(text *arg1, text *arg2) +internal_text_pattern_compare(text *arg1, text *arg2, Oid collid) { int result; int len1, len2; + check_collation_set(collid); + + /* + * XXX We cannot use a text_pattern_ops index for nondeterministic + * collations, because these operators intentionally ignore the collation. + * However, the planner has no way to know that, so it might choose such + * an index for an "=" clause, which would lead to wrong results. This + * check here doesn't prevent choosing the index, but it will at least + * error out if the index is chosen. A text_pattern_ops index on a column + * with nondeterministic collation is pretty useless anyway, since LIKE + * etc. won't work there either. A future possibility would be to + * annotate the operator class or its members in the catalog to avoid the + * index. Another alternative is to stay away from the *_pattern_ops + * operator classes and prefer creating LIKE-supporting indexes with + * COLLATE "C". + */ + if (!get_collation_isdeterministic(collid)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for operator class \"%s\"", + "text_pattern_ops"))); + len1 = VARSIZE_ANY_EXHDR(arg1); len2 = VARSIZE_ANY_EXHDR(arg2); @@ -2947,7 +3046,7 @@ text_pattern_lt(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); int result; - result = internal_text_pattern_compare(arg1, arg2); + result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -2963,7 +3062,7 @@ text_pattern_le(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); int result; - result = internal_text_pattern_compare(arg1, arg2); + result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -2979,7 +3078,7 @@ text_pattern_ge(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); int result; - result = internal_text_pattern_compare(arg1, arg2); + result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -2995,7 +3094,7 @@ text_pattern_gt(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); int result; - result = internal_text_pattern_compare(arg1, arg2); + result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -3011,7 +3110,7 @@ bttext_pattern_cmp(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); int result; - result = internal_text_pattern_compare(arg1, arg2); + result = internal_text_pattern_compare(arg1, arg2, PG_GET_COLLATION()); PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -3024,8 +3123,17 @@ Datum bttext_pattern_sortsupport(PG_FUNCTION_ARGS) { SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + Oid collid = ssup->ssup_collation; MemoryContext oldcontext; + check_collation_set(collid); + + if (!get_collation_isdeterministic(collid)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for operator class \"%s\"", + "text_pattern_ops"))); + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); /* Use generic string SortSupport, forcing "C" collation */ @@ -4121,7 +4229,7 @@ replace_text(PG_FUNCTION_ARGS) PG_RETURN_TEXT_P(src_text); } - text_position_setup(src_text, from_sub_text, &state); + text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state); found = text_position_next(&state); @@ -4482,7 +4590,7 @@ split_text(PG_FUNCTION_ARGS) PG_RETURN_TEXT_P(cstring_to_text("")); } - text_position_setup(inputstring, fldsep, &state); + text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state); /* identify bounds of first field */ start_ptr = VARDATA_ANY(inputstring); @@ -4538,11 +4646,12 @@ split_text(PG_FUNCTION_ARGS) * Convenience function to return true when two text params are equal. */ static bool -text_isequal(text *txt1, text *txt2) +text_isequal(text *txt1, text *txt2, Oid collid) { - return DatumGetBool(DirectFunctionCall2(texteq, - PointerGetDatum(txt1), - PointerGetDatum(txt2))); + return DatumGetBool(DirectFunctionCall2Coll(texteq, + collid, + PointerGetDatum(txt1), + PointerGetDatum(txt2))); } /* @@ -4633,7 +4742,7 @@ text_to_array_internal(PG_FUNCTION_ARGS) int lbs[1]; /* single element can be a NULL too */ - is_null = null_string ? text_isequal(inputstring, null_string) : false; + is_null = null_string ? text_isequal(inputstring, null_string, PG_GET_COLLATION()) : false; elems[0] = PointerGetDatum(inputstring); nulls[0] = is_null; @@ -4645,7 +4754,7 @@ text_to_array_internal(PG_FUNCTION_ARGS) TEXTOID, -1, false, 'i')); } - text_position_setup(inputstring, fldsep, &state); + text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state); start_ptr = VARDATA_ANY(inputstring); @@ -4673,7 +4782,7 @@ text_to_array_internal(PG_FUNCTION_ARGS) /* must build a temp text datum to pass to accumArrayResult */ result_text = cstring_to_text_with_len(start_ptr, chunk_len); - is_null = null_string ? text_isequal(result_text, null_string) : false; + is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false; /* stash away this field */ astate = accumArrayResult(astate, @@ -4715,7 +4824,7 @@ text_to_array_internal(PG_FUNCTION_ARGS) /* must build a temp text datum to pass to accumArrayResult */ result_text = cstring_to_text_with_len(start_ptr, chunk_len); - is_null = null_string ? text_isequal(result_text, null_string) : false; + is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false; /* stash away this field */ astate = accumArrayResult(astate, diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 78dd5714fa..bb9ae8fd0c 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -171,13 +171,18 @@ int4hashfast(Datum datum) static bool texteqfast(Datum a, Datum b) { - return DatumGetBool(DirectFunctionCall2(texteq, a, b)); + /* + * The use of DEFAULT_COLLATION_OID is fairly arbitrary here. We just + * want to take the fast "deterministic" path in texteq(). + */ + return DatumGetBool(DirectFunctionCall2Coll(texteq, DEFAULT_COLLATION_OID, a, b)); } static uint32 texthashfast(Datum datum) { - return DatumGetInt32(DirectFunctionCall1(hashtext, datum)); + /* analogously here as in texteqfast() */ + return DatumGetInt32(DirectFunctionCall1Coll(hashtext, DEFAULT_COLLATION_OID, datum)); } static bool diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index e88c45d268..59e6bcd856 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -908,6 +908,22 @@ get_collation_name(Oid colloid) return NULL; } +bool +get_collation_isdeterministic(Oid colloid) +{ + HeapTuple tp; + Form_pg_collation colltup; + bool result; + + tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(colloid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for collation %u", colloid); + colltup = (Form_pg_collation) GETSTRUCT(tp); + result = colltup->collisdeterministic; + ReleaseSysCache(tp); + return result; +} + /* ---------- CONSTRAINT CACHE ---------- */ /* diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index fd50a809ea..4886090132 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -1765,8 +1765,8 @@ setup_collation(FILE *cmdfd) * in pg_collation.h. But add it before reading system collations, so * that it wins if libc defines a locale named ucs_basic. */ - PG_CMD_PRINTF3("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collencoding, collcollate, collctype)" - "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', %d, 'C', 'C');\n\n", + PG_CMD_PRINTF3("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, collcollate, collctype)" + "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', true, %d, 'C', 'C');\n\n", BOOTSTRAP_SUPERUSERID, COLLPROVIDER_LIBC, PG_UTF8); /* Now import all collations we can find in the operating system */ diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index e962ae7e91..31ef4eea12 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -13406,6 +13406,7 @@ dumpCollation(Archive *fout, CollInfo *collinfo) char *qcollname; PGresult *res; int i_collprovider; + int i_collisdeterministic; int i_collcollate; int i_collctype; const char *collprovider; @@ -13423,28 +13424,35 @@ dumpCollation(Archive *fout, CollInfo *collinfo) qcollname = pg_strdup(fmtId(collinfo->dobj.name)); /* Get collation-specific details */ + appendPQExpBuffer(query, "SELECT "); + if (fout->remoteVersion >= 100000) - appendPQExpBuffer(query, "SELECT " + appendPQExpBuffer(query, "collprovider, " - "collcollate, " - "collctype, " - "collversion " - "FROM pg_catalog.pg_collation c " - "WHERE c.oid = '%u'::pg_catalog.oid", - collinfo->dobj.catId.oid); + "collversion, "); else - appendPQExpBuffer(query, "SELECT " + appendPQExpBuffer(query, "'c' AS collprovider, " - "collcollate, " - "collctype, " - "NULL AS collversion " - "FROM pg_catalog.pg_collation c " - "WHERE c.oid = '%u'::pg_catalog.oid", - collinfo->dobj.catId.oid); + "NULL AS collversion, "); + + if (fout->remoteVersion >= 120000) + appendPQExpBuffer(query, + "collisdeterministic, "); + else + appendPQExpBuffer(query, + "true AS collisdeterministic, "); + + appendPQExpBuffer(query, + "collcollate, " + "collctype " + "FROM pg_catalog.pg_collation c " + "WHERE c.oid = '%u'::pg_catalog.oid", + collinfo->dobj.catId.oid); res = ExecuteSqlQueryForSingleRow(fout, query->data); i_collprovider = PQfnumber(res, "collprovider"); + i_collisdeterministic = PQfnumber(res, "collisdeterministic"); i_collcollate = PQfnumber(res, "collcollate"); i_collctype = PQfnumber(res, "collctype"); @@ -13471,6 +13479,9 @@ dumpCollation(Archive *fout, CollInfo *collinfo) "unrecognized collation provider: %s\n", collprovider); + if (strcmp(PQgetvalue(res, 0, i_collisdeterministic), "f") == 0) + appendPQExpBufferStr(q, ", deterministic = false"); + if (strcmp(collcollate, collctype) == 0) { appendPQExpBufferStr(q, ", locale = "); diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 779e48437c..fd8ebee8cd 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -4106,7 +4106,7 @@ listCollations(const char *pattern, bool verbose, bool showSystem) PQExpBufferData buf; PGresult *res; printQueryOpt myopt = pset.popt; - static const bool translate_columns[] = {false, false, false, false, false, false}; + static const bool translate_columns[] = {false, false, false, false, false, true, false}; if (pset.sversion < 90100) { @@ -4134,6 +4134,21 @@ listCollations(const char *pattern, bool verbose, bool showSystem) appendPQExpBuffer(&buf, ",\n CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\"", gettext_noop("Provider")); + else + appendPQExpBuffer(&buf, + ",\n 'libc' AS \"%s\"", + gettext_noop("Provider")); + + if (pset.sversion >= 120000) + appendPQExpBuffer(&buf, + ",\n CASE WHEN c.collisdeterministic THEN '%s' ELSE '%s' END AS \"%s\"", + gettext_noop("yes"), gettext_noop("no"), + gettext_noop("Deterministic?")); + else + appendPQExpBuffer(&buf, + ",\n '%s' AS \"%s\"", + gettext_noop("yes"), + gettext_noop("Deterministic?")); if (verbose) appendPQExpBuffer(&buf, diff --git a/src/include/catalog/pg_collation.h b/src/include/catalog/pg_collation.h index 10fe711a91..4d2fcb3858 100644 --- a/src/include/catalog/pg_collation.h +++ b/src/include/catalog/pg_collation.h @@ -33,6 +33,7 @@ CATALOG(pg_collation,3456,CollationRelationId) Oid collnamespace; /* OID of namespace containing collation */ Oid collowner; /* owner of collation */ char collprovider; /* see constants below */ + bool collisdeterministic BKI_DEFAULT(t); int32 collencoding; /* encoding for this collation; -1 = "all" */ NameData collcollate; /* LC_COLLATE setting */ NameData collctype; /* LC_CTYPE setting */ @@ -61,6 +62,7 @@ typedef FormData_pg_collation *Form_pg_collation; extern Oid CollationCreate(const char *collname, Oid collnamespace, Oid collowner, char collprovider, + bool collisdeterministic, int32 collencoding, const char *collcollate, const char *collctype, const char *collversion, diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 9003f2ce58..0cf7aa3495 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -111,6 +111,7 @@ extern ExprState *execTuplesMatchPrepare(TupleDesc desc, int numCols, const AttrNumber *keyColIdx, const Oid *eqOperators, + const Oid *collations, PlanState *parent); extern void execTuplesHashPrepare(int numCols, const Oid *eqOperators, @@ -121,6 +122,7 @@ extern TupleHashTable BuildTupleHashTable(PlanState *parent, int numCols, AttrNumber *keyColIdx, const Oid *eqfuncoids, FmgrInfo *hashfunctions, + Oid *collations, long nbuckets, Size additionalsize, MemoryContext tablecxt, MemoryContext tempcxt, bool use_variable_hash_iv); @@ -129,6 +131,7 @@ extern TupleHashTable BuildTupleHashTableExt(PlanState *parent, int numCols, AttrNumber *keyColIdx, const Oid *eqfuncoids, FmgrInfo *hashfunctions, + Oid *collations, long nbuckets, Size additionalsize, MemoryContext metacxt, MemoryContext tablecxt, @@ -257,6 +260,7 @@ extern ExprState *ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, int numCols, const AttrNumber *keyColIdx, const Oid *eqfunctions, + const Oid *collations, PlanState *parent); extern ProjectionInfo *ExecBuildProjectionInfo(List *targetList, ExprContext *econtext, diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h index e7bf158c1b..2c94b926d3 100644 --- a/src/include/executor/hashjoin.h +++ b/src/include/executor/hashjoin.h @@ -337,6 +337,7 @@ typedef struct HashJoinTableData FmgrInfo *outer_hashfunctions; /* lookup data for hash functions */ FmgrInfo *inner_hashfunctions; /* lookup data for hash functions */ bool *hashStrict; /* is each hash join operator strict? */ + Oid *collations; Size spaceUsed; /* memory space currently used by tuples */ Size spaceAllowed; /* upper limit for space used */ diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h index 1309b32b90..1233766023 100644 --- a/src/include/executor/nodeHash.h +++ b/src/include/executor/nodeHash.h @@ -24,7 +24,7 @@ extern Node *MultiExecHash(HashState *node); extern void ExecEndHash(HashState *node); extern void ExecReScanHash(HashState *node); -extern HashJoinTable ExecHashTableCreate(HashState *state, List *hashOperators, +extern HashJoinTable ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, bool keepNulls); extern void ExecParallelHashTableAlloc(HashJoinTable hashtable, int batchno); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index fd13c170d7..d3ed5077ec 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -693,6 +693,7 @@ typedef struct TupleHashTableData AttrNumber *keyColIdx; /* attr numbers of key columns */ FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */ ExprState *tab_eq_func; /* comparator for table datatype(s) */ + Oid *tab_collations; /* collations for hash and comparison */ MemoryContext tablecxt; /* memory context containing table */ MemoryContext tempcxt; /* context for function evaluations */ Size entrysize; /* actual size to make each hash entry */ @@ -862,6 +863,7 @@ typedef struct SubPlanState AttrNumber *keyColIdx; /* control data for hash tables */ Oid *tab_eq_funcoids; /* equality func oids for table * datatype(s) */ + Oid *tab_collations; /* collations for hash and comparison */ FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */ FmgrInfo *tab_eq_funcs; /* equality functions for table datatype(s) */ FmgrInfo *lhs_hash_funcs; /* hash functions for lefthand datatype(s) */ @@ -1872,6 +1874,7 @@ typedef struct HashJoinState List *hj_OuterHashKeys; /* list of ExprState nodes */ List *hj_InnerHashKeys; /* list of ExprState nodes */ List *hj_HashOperators; /* list of operator OIDs */ + List *hj_Collations; HashJoinTable hj_HashTable; uint32 hj_CurHashValue; int hj_CurBucketNo; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index d66a187a53..24740c31e3 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -297,6 +297,7 @@ typedef struct RecursiveUnion * duplicate-ness */ AttrNumber *dupColIdx; /* their indexes in the target list */ Oid *dupOperators; /* equality operators to compare with */ + Oid *dupCollations; long numGroups; /* estimated number of groups in input */ } RecursiveUnion; @@ -773,6 +774,7 @@ typedef struct Group int numCols; /* number of grouping columns */ AttrNumber *grpColIdx; /* their indexes in the target list */ Oid *grpOperators; /* equality operators to compare with */ + Oid *grpCollations; } Group; /* --------------- @@ -797,6 +799,7 @@ typedef struct Agg int numCols; /* number of grouping columns */ AttrNumber *grpColIdx; /* their indexes in the target list */ Oid *grpOperators; /* equality operators to compare with */ + Oid *grpCollations; long numGroups; /* estimated number of groups in input */ Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */ /* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */ @@ -815,9 +818,11 @@ typedef struct WindowAgg int partNumCols; /* number of columns in partition clause */ AttrNumber *partColIdx; /* their indexes in the target list */ Oid *partOperators; /* equality operators for partition columns */ + Oid *partCollations; /* collations for partition columns */ int ordNumCols; /* number of columns in ordering clause */ AttrNumber *ordColIdx; /* their indexes in the target list */ Oid *ordOperators; /* equality operators for ordering columns */ + Oid *ordCollations; /* collations for ordering columns */ int frameOptions; /* frame_clause options, see WindowDef */ Node *startOffset; /* expression for starting bound, if any */ Node *endOffset; /* expression for ending bound, if any */ @@ -839,6 +844,7 @@ typedef struct Unique int numCols; /* number of columns to check for uniqueness */ AttrNumber *uniqColIdx; /* their indexes in the target list */ Oid *uniqOperators; /* equality operators to compare with */ + Oid *uniqCollations; /* collations for equality comparisons */ } Unique; /* ------------ @@ -913,6 +919,7 @@ typedef struct SetOp * duplicate-ness */ AttrNumber *dupColIdx; /* their indexes in the target list */ Oid *dupOperators; /* equality operators to compare with */ + Oid *dupCollations; AttrNumber flagColIdx; /* where is the flag column, if any */ int firstFlag; /* flag value for first input relation */ long numGroups; /* estimated number of groups in input */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 3bbdb5e2f7..b093a3c8ac 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -53,7 +53,7 @@ extern bool is_projection_capable_plan(Plan *plan); extern Sort *make_sort_from_sortclauses(List *sortcls, Plan *lefttree); extern Agg *make_agg(List *tlist, List *qual, AggStrategy aggstrategy, AggSplit aggsplit, - int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, + int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, List *groupingSets, List *chain, double dNumGroups, Plan *lefttree); extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount); diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h index 58db79203b..46d614f4fb 100644 --- a/src/include/optimizer/tlist.h +++ b/src/include/optimizer/tlist.h @@ -32,6 +32,7 @@ extern bool tlist_same_collations(List *tlist, List *colCollations, bool junkOK) extern void apply_tlist_labeling(List *dest_tlist, List *src_tlist); extern Oid *extract_grouping_ops(List *groupClause); +extern Oid *extract_grouping_collations(List *groupClause, List *tlist); extern AttrNumber *extract_grouping_cols(List *groupClause, List *tlist); extern bool grouping_is_sortable(List *groupClause); extern bool grouping_is_hashable(List *groupClause); diff --git a/src/include/partitioning/partbounds.h b/src/include/partitioning/partbounds.h index b1ae39ad63..683e1574ea 100644 --- a/src/include/partitioning/partbounds.h +++ b/src/include/partitioning/partbounds.h @@ -77,6 +77,7 @@ typedef struct PartitionBoundInfoData extern int get_hash_partition_greatest_modulus(PartitionBoundInfo b); extern uint64 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, + Oid *partcollation, Datum *values, bool *isnull); extern List *get_qual_from_partbound(Relation rel, Relation parent, PartitionBoundSpec *spec); diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index 16b0b1d2dc..b9a9ecb7cc 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -90,6 +90,7 @@ extern Oid get_atttype(Oid relid, AttrNumber attnum); extern void get_atttypetypmodcoll(Oid relid, AttrNumber attnum, Oid *typid, int32 *typmod, Oid *collid); extern char *get_collation_name(Oid colloid); +extern bool get_collation_isdeterministic(Oid colloid); extern char *get_constraint_name(Oid conoid); extern char *get_language_name(Oid langoid, bool missing_ok); extern Oid get_opclass_family(Oid opclass); diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 606952afd7..a342a62549 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -82,6 +82,7 @@ extern void cache_locale_time(void); struct pg_locale_struct { char provider; + bool deterministic; union { #ifdef HAVE_LOCALE_T diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index f95d165288..fc8f092585 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -1100,6 +1100,716 @@ select textrange_en_us('A','Z') @> 'b'::text; drop type textrange_c; drop type textrange_en_us; +-- nondeterministic collations +CREATE COLLATION ctest_det (provider = icu, locale = 'und', deterministic = true); +CREATE COLLATION ctest_nondet (provider = icu, locale = 'und', deterministic = false); +CREATE TABLE test6 (a int, b text); +-- same string in different normal forms +INSERT INTO test6 VALUES (1, U&'\00E4bc'); +INSERT INTO test6 VALUES (2, U&'\0061\0308bc'); +SELECT * FROM test6; + a | b +---+----- + 1 | äbc + 2 | äbc +(2 rows) + +SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_det; + a | b +---+----- + 1 | äbc +(1 row) + +SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_nondet; + a | b +---+----- + 1 | äbc + 2 | äbc +(2 rows) + +CREATE COLLATION case_sensitive (provider = icu, locale = 'und'); +CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false); +SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive; + ?column? | ?column? +----------+---------- + t | f +(1 row) + +SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive; + ?column? | ?column? +----------+---------- + t | t +(1 row) + +CREATE TABLE test1cs (x text COLLATE case_sensitive); +CREATE TABLE test2cs (x text COLLATE case_sensitive); +CREATE TABLE test3cs (x text COLLATE case_sensitive); +INSERT INTO test1cs VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2cs VALUES ('ABC'), ('ghi'); +INSERT INTO test3cs VALUES ('abc'), ('ABC'), ('def'), ('ghi'); +SELECT x FROM test3cs WHERE x = 'abc'; + x +----- + abc +(1 row) + +SELECT x FROM test3cs WHERE x <> 'abc'; + x +----- + ABC + def + ghi +(3 rows) + +SELECT x FROM test3cs WHERE x LIKE 'a%'; + x +----- + abc +(1 row) + +SELECT x FROM test3cs WHERE x ILIKE 'a%'; + x +----- + abc + ABC +(2 rows) + +SELECT x FROM test3cs WHERE x SIMILAR TO 'a%'; + x +----- + abc +(1 row) + +SELECT x FROM test3cs WHERE x ~ 'a'; + x +----- + abc +(1 row) + +SELECT x FROM test1cs UNION SELECT x FROM test2cs ORDER BY x; + x +----- + abc + ABC + def + ghi +(4 rows) + +SELECT x FROM test2cs UNION SELECT x FROM test1cs ORDER BY x; + x +----- + abc + ABC + def + ghi +(4 rows) + +SELECT x FROM test1cs INTERSECT SELECT x FROM test2cs; + x +----- + ghi +(1 row) + +SELECT x FROM test2cs INTERSECT SELECT x FROM test1cs; + x +----- + ghi +(1 row) + +SELECT x FROM test1cs EXCEPT SELECT x FROM test2cs; + x +----- + abc + def +(2 rows) + +SELECT x FROM test2cs EXCEPT SELECT x FROM test1cs; + x +----- + ABC +(1 row) + +SELECT DISTINCT x FROM test3cs ORDER BY x; + x +----- + abc + ABC + def + ghi +(4 rows) + +SELECT count(DISTINCT x) FROM test3cs; + count +------- + 4 +(1 row) + +SELECT x, count(*) FROM test3cs GROUP BY x ORDER BY x; + x | count +-----+------- + abc | 1 + ABC | 1 + def | 1 + ghi | 1 +(4 rows) + +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3cs ORDER BY x; + x | row_number | rank +-----+------------+------ + abc | 1 | 1 + ABC | 2 | 2 + def | 3 | 3 + ghi | 4 | 4 +(4 rows) + +CREATE UNIQUE INDEX ON test1cs (x); -- ok +INSERT INTO test1cs VALUES ('ABC'); -- ok +CREATE UNIQUE INDEX ON test3cs (x); -- ok +SELECT string_to_array('ABC,DEF,GHI' COLLATE case_sensitive, ',', 'abc'); + string_to_array +----------------- + {ABC,DEF,GHI} +(1 row) + +SELECT string_to_array('ABCDEFGHI' COLLATE case_sensitive, NULL, 'b'); + string_to_array +--------------------- + {A,B,C,D,E,F,G,H,I} +(1 row) + +CREATE TABLE test1ci (x text COLLATE case_insensitive); +CREATE TABLE test2ci (x text COLLATE case_insensitive); +CREATE TABLE test3ci (x text COLLATE case_insensitive); +CREATE INDEX ON test3ci (x text_pattern_ops); -- error +ERROR: nondeterministic collations are not supported for operator class "text_pattern_ops" +INSERT INTO test1ci VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2ci VALUES ('ABC'), ('ghi'); +INSERT INTO test3ci VALUES ('abc'), ('ABC'), ('def'), ('ghi'); +SELECT x FROM test3ci WHERE x = 'abc'; + x +----- + abc + ABC +(2 rows) + +SELECT x FROM test3ci WHERE x <> 'abc'; + x +----- + def + ghi +(2 rows) + +SELECT x FROM test3ci WHERE x LIKE 'a%'; +ERROR: nondeterministic collations are not supported for LIKE +SELECT x FROM test3ci WHERE x ILIKE 'a%'; +ERROR: nondeterministic collations are not supported for ILIKE +SELECT x FROM test3ci WHERE x SIMILAR TO 'a%'; +ERROR: nondeterministic collations are not supported for regular expressions +SELECT x FROM test3ci WHERE x ~ 'a'; +ERROR: nondeterministic collations are not supported for regular expressions +SELECT x FROM test1ci UNION SELECT x FROM test2ci ORDER BY x; + x +----- + abc + def + ghi +(3 rows) + +SELECT x FROM test2ci UNION SELECT x FROM test1ci ORDER BY x; + x +----- + ABC + def + ghi +(3 rows) + +SELECT x FROM test1ci INTERSECT SELECT x FROM test2ci; + x +----- + ghi + abc +(2 rows) + +SELECT x FROM test2ci INTERSECT SELECT x FROM test1ci; + x +----- + ghi + ABC +(2 rows) + +SELECT x FROM test1ci EXCEPT SELECT x FROM test2ci; + x +----- + def +(1 row) + +SELECT x FROM test2ci EXCEPT SELECT x FROM test1ci; + x +--- +(0 rows) + +SELECT DISTINCT x FROM test3ci ORDER BY x; + x +----- + abc + def + ghi +(3 rows) + +SELECT count(DISTINCT x) FROM test3ci; + count +------- + 3 +(1 row) + +SELECT x, count(*) FROM test3ci GROUP BY x ORDER BY x; + x | count +-----+------- + abc | 2 + def | 1 + ghi | 1 +(3 rows) + +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3ci ORDER BY x; + x | row_number | rank +-----+------------+------ + abc | 1 | 1 + ABC | 2 | 1 + def | 3 | 3 + ghi | 4 | 4 +(4 rows) + +CREATE UNIQUE INDEX ON test1ci (x); -- ok +INSERT INTO test1ci VALUES ('ABC'); -- error +ERROR: duplicate key value violates unique constraint "test1ci_x_idx" +DETAIL: Key (x)=(ABC) already exists. +CREATE UNIQUE INDEX ON test3ci (x); -- error +ERROR: could not create unique index "test3ci_x_idx" +DETAIL: Key (x)=(abc) is duplicated. +SELECT string_to_array('ABC,DEF,GHI' COLLATE case_insensitive, ',', 'abc'); +ERROR: nondeterministic collations are not supported for substring searches +SELECT string_to_array('ABCDEFGHI' COLLATE case_insensitive, NULL, 'b'); + string_to_array +------------------------ + {A,NULL,C,D,E,F,G,H,I} +(1 row) + +-- bpchar +CREATE TABLE test1bpci (x char(3) COLLATE case_insensitive); +CREATE TABLE test2bpci (x char(3) COLLATE case_insensitive); +CREATE TABLE test3bpci (x char(3) COLLATE case_insensitive); +CREATE INDEX ON test3bpci (x bpchar_pattern_ops); -- error +ERROR: nondeterministic collations are not supported for operator class "bpchar_pattern_ops" +INSERT INTO test1bpci VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2bpci VALUES ('ABC'), ('ghi'); +INSERT INTO test3bpci VALUES ('abc'), ('ABC'), ('def'), ('ghi'); +SELECT x FROM test3bpci WHERE x = 'abc'; + x +----- + abc + ABC +(2 rows) + +SELECT x FROM test3bpci WHERE x <> 'abc'; + x +----- + def + ghi +(2 rows) + +SELECT x FROM test3bpci WHERE x LIKE 'a%'; +ERROR: nondeterministic collations are not supported for LIKE +SELECT x FROM test3bpci WHERE x ILIKE 'a%'; +ERROR: nondeterministic collations are not supported for ILIKE +SELECT x FROM test3bpci WHERE x SIMILAR TO 'a%'; +ERROR: nondeterministic collations are not supported for regular expressions +SELECT x FROM test3bpci WHERE x ~ 'a'; +ERROR: nondeterministic collations are not supported for regular expressions +SELECT x FROM test1bpci UNION SELECT x FROM test2bpci ORDER BY x; + x +----- + abc + def + ghi +(3 rows) + +SELECT x FROM test2bpci UNION SELECT x FROM test1bpci ORDER BY x; + x +----- + ABC + def + ghi +(3 rows) + +SELECT x FROM test1bpci INTERSECT SELECT x FROM test2bpci; + x +----- + ghi + abc +(2 rows) + +SELECT x FROM test2bpci INTERSECT SELECT x FROM test1bpci; + x +----- + ghi + ABC +(2 rows) + +SELECT x FROM test1bpci EXCEPT SELECT x FROM test2bpci; + x +----- + def +(1 row) + +SELECT x FROM test2bpci EXCEPT SELECT x FROM test1bpci; + x +--- +(0 rows) + +SELECT DISTINCT x FROM test3bpci ORDER BY x; + x +----- + abc + def + ghi +(3 rows) + +SELECT count(DISTINCT x) FROM test3bpci; + count +------- + 3 +(1 row) + +SELECT x, count(*) FROM test3bpci GROUP BY x ORDER BY x; + x | count +-----+------- + abc | 2 + def | 1 + ghi | 1 +(3 rows) + +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3bpci ORDER BY x; + x | row_number | rank +-----+------------+------ + abc | 1 | 1 + ABC | 2 | 1 + def | 3 | 3 + ghi | 4 | 4 +(4 rows) + +CREATE UNIQUE INDEX ON test1bpci (x); -- ok +INSERT INTO test1bpci VALUES ('ABC'); -- error +ERROR: duplicate key value violates unique constraint "test1bpci_x_idx" +DETAIL: Key (x)=(ABC) already exists. +CREATE UNIQUE INDEX ON test3bpci (x); -- error +ERROR: could not create unique index "test3bpci_x_idx" +DETAIL: Key (x)=(abc) is duplicated. +SELECT string_to_array('ABC,DEF,GHI'::char(11) COLLATE case_insensitive, ',', 'abc'); +ERROR: nondeterministic collations are not supported for substring searches +SELECT string_to_array('ABCDEFGHI'::char(9) COLLATE case_insensitive, NULL, 'b'); + string_to_array +------------------------ + {A,NULL,C,D,E,F,G,H,I} +(1 row) + +-- This tests the issue described in match_pattern_prefix(). In the +-- absence of that check, the case_insensitive tests below would +-- return no rows where they should logically return one. +CREATE TABLE test4c (x text COLLATE "C"); +INSERT INTO test4c VALUES ('abc'); +CREATE INDEX ON test4c (x); +SET enable_seqscan = off; +SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_sensitive; -- ok, no rows + x +--- +(0 rows) + +SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_sensitive; -- ok, no rows + x +--- +(0 rows) + +SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_insensitive; -- error +ERROR: nondeterministic collations are not supported for LIKE +SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_insensitive; -- error +ERROR: nondeterministic collations are not supported for LIKE +RESET enable_seqscan; +-- Unicode special case: different variants of Greek lower case sigma. +-- A naive implementation like citext that just does lower(x) = +-- lower(y) will do the wrong thing here, because lower('Σ') is 'σ' +-- but upper('ς') is 'Σ'. +SELECT 'ὀδυσσεύς' = 'ὈΔΥΣΣΕΎΣ' COLLATE case_sensitive; + ?column? +---------- + f +(1 row) + +SELECT 'ὀδυσσεύς' = 'ὈΔΥΣΣΕΎΣ' COLLATE case_insensitive; + ?column? +---------- + t +(1 row) + +-- name vs. text comparison operators +SELECT relname FROM pg_class WHERE relname = 'PG_CLASS'::text COLLATE case_insensitive; + relname +---------- + pg_class +(1 row) + +SELECT relname FROM pg_class WHERE 'PG_CLASS'::text = relname COLLATE case_insensitive; + relname +---------- + pg_class +(1 row) + +SELECT typname FROM pg_type WHERE typname LIKE 'int_' AND typname <> 'INT2'::text COLLATE case_insensitive; + typname +--------- + int4 + int8 +(2 rows) + +SELECT typname FROM pg_type WHERE typname LIKE 'int_' AND 'INT2'::text <> typname COLLATE case_insensitive;; + typname +--------- + int4 + int8 +(2 rows) + +-- test case adapted from subselect.sql +CREATE TEMP TABLE outer_text (f1 text COLLATE case_insensitive, f2 text); +INSERT INTO outer_text VALUES ('a', 'a'); +INSERT INTO outer_text VALUES ('b', 'a'); +INSERT INTO outer_text VALUES ('A', NULL); +INSERT INTO outer_text VALUES ('B', NULL); +CREATE TEMP TABLE inner_text (c1 text COLLATE case_insensitive, c2 text); +INSERT INTO inner_text VALUES ('a', NULL); +SELECT * FROM outer_text WHERE (f1, f2) NOT IN (SELECT * FROM inner_text); + f1 | f2 +----+---- + b | a + B | +(2 rows) + +-- accents +CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-true', deterministic = false); +CREATE TABLE test4 (a int, b text); +INSERT INTO test4 VALUES (1, 'cote'), (2, 'côte'), (3, 'coté'), (4, 'côté'); +SELECT * FROM test4 WHERE b = 'cote'; + a | b +---+------ + 1 | cote +(1 row) + +SELECT * FROM test4 WHERE b = 'cote' COLLATE ignore_accents; + a | b +---+------ + 1 | cote + 2 | côte + 3 | coté + 4 | côté +(4 rows) + +SELECT * FROM test4 WHERE b = 'Cote' COLLATE ignore_accents; -- still case-sensitive + a | b +---+--- +(0 rows) + +SELECT * FROM test4 WHERE b = 'Cote' COLLATE case_insensitive; + a | b +---+------ + 1 | cote +(1 row) + +-- foreign keys (should use collation of primary key) +-- PK is case-sensitive, FK is case-insensitive +CREATE TABLE test10pk (x text COLLATE case_sensitive PRIMARY KEY); +INSERT INTO test10pk VALUES ('abc'), ('def'), ('ghi'); +CREATE TABLE test10fk (x text COLLATE case_insensitive REFERENCES test10pk (x) ON UPDATE CASCADE ON DELETE CASCADE); +INSERT INTO test10fk VALUES ('abc'); -- ok +INSERT INTO test10fk VALUES ('ABC'); -- error +ERROR: insert or update on table "test10fk" violates foreign key constraint "test10fk_x_fkey" +DETAIL: Key (x)=(ABC) is not present in table "test10pk". +INSERT INTO test10fk VALUES ('xyz'); -- error +ERROR: insert or update on table "test10fk" violates foreign key constraint "test10fk_x_fkey" +DETAIL: Key (x)=(xyz) is not present in table "test10pk". +SELECT * FROM test10pk; + x +----- + abc + def + ghi +(3 rows) + +SELECT * FROM test10fk; + x +----- + abc +(1 row) + +-- restrict update even though the values are "equal" in the FK table +UPDATE test10fk SET x = 'ABC' WHERE x = 'abc'; -- error +ERROR: insert or update on table "test10fk" violates foreign key constraint "test10fk_x_fkey" +DETAIL: Key (x)=(ABC) is not present in table "test10pk". +SELECT * FROM test10fk; + x +----- + abc +(1 row) + +DELETE FROM test10pk WHERE x = 'abc'; +SELECT * FROM test10pk; + x +----- + def + ghi +(2 rows) + +SELECT * FROM test10fk; + x +--- +(0 rows) + +-- PK is case-insensitive, FK is case-sensitive +CREATE TABLE test11pk (x text COLLATE case_insensitive PRIMARY KEY); +INSERT INTO test11pk VALUES ('abc'), ('def'), ('ghi'); +CREATE TABLE test11fk (x text COLLATE case_sensitive REFERENCES test11pk (x) ON UPDATE CASCADE ON DELETE CASCADE); +INSERT INTO test11fk VALUES ('abc'); -- ok +INSERT INTO test11fk VALUES ('ABC'); -- ok +INSERT INTO test11fk VALUES ('xyz'); -- error +ERROR: insert or update on table "test11fk" violates foreign key constraint "test11fk_x_fkey" +DETAIL: Key (x)=(xyz) is not present in table "test11pk". +SELECT * FROM test11pk; + x +----- + abc + def + ghi +(3 rows) + +SELECT * FROM test11fk; + x +----- + abc + ABC +(2 rows) + +-- cascade update even though the values are "equal" in the PK table +UPDATE test11pk SET x = 'ABC' WHERE x = 'abc'; +SELECT * FROM test11fk; + x +----- + ABC + ABC +(2 rows) + +DELETE FROM test11pk WHERE x = 'abc'; +SELECT * FROM test11pk; + x +----- + def + ghi +(2 rows) + +SELECT * FROM test11fk; + x +--- +(0 rows) + +-- partitioning +CREATE TABLE test20 (a int, b text COLLATE case_insensitive) PARTITION BY LIST (b); +CREATE TABLE test20_1 PARTITION OF test20 FOR VALUES IN ('abc'); +INSERT INTO test20 VALUES (1, 'abc'); +INSERT INTO test20 VALUES (2, 'ABC'); +SELECT * FROM test20_1; + a | b +---+----- + 1 | abc + 2 | ABC +(2 rows) + +CREATE TABLE test21 (a int, b text COLLATE case_insensitive) PARTITION BY RANGE (b); +CREATE TABLE test21_1 PARTITION OF test21 FOR VALUES FROM ('ABC') TO ('DEF'); +INSERT INTO test21 VALUES (1, 'abc'); +INSERT INTO test21 VALUES (2, 'ABC'); +SELECT * FROM test21_1; + a | b +---+----- + 1 | abc + 2 | ABC +(2 rows) + +CREATE TABLE test22 (a int, b text COLLATE case_sensitive) PARTITION BY HASH (b); +CREATE TABLE test22_0 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test22_1 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test22 VALUES (1, 'def'); +INSERT INTO test22 VALUES (2, 'DEF'); +-- they end up in different partitions +SELECT (SELECT count(*) FROM test22_0) = (SELECT count(*) FROM test22_1); + ?column? +---------- + t +(1 row) + +CREATE TABLE test23 (a int, b text COLLATE case_insensitive) PARTITION BY HASH (b); +CREATE TABLE test23_0 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test23_1 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test23 VALUES (1, 'def'); +INSERT INTO test23 VALUES (2, 'DEF'); +-- they end up in the same partition (but it's platform-dependent which one) +SELECT (SELECT count(*) FROM test23_0) <> (SELECT count(*) FROM test23_1); + ?column? +---------- + t +(1 row) + +CREATE TABLE test30 (a int, b char(3) COLLATE case_insensitive) PARTITION BY LIST (b); +CREATE TABLE test30_1 PARTITION OF test30 FOR VALUES IN ('abc'); +INSERT INTO test30 VALUES (1, 'abc'); +INSERT INTO test30 VALUES (2, 'ABC'); +SELECT * FROM test30_1; + a | b +---+----- + 1 | abc + 2 | ABC +(2 rows) + +CREATE TABLE test31 (a int, b char(3) COLLATE case_insensitive) PARTITION BY RANGE (b); +CREATE TABLE test31_1 PARTITION OF test31 FOR VALUES FROM ('ABC') TO ('DEF'); +INSERT INTO test31 VALUES (1, 'abc'); +INSERT INTO test31 VALUES (2, 'ABC'); +SELECT * FROM test31_1; + a | b +---+----- + 1 | abc + 2 | ABC +(2 rows) + +CREATE TABLE test32 (a int, b char(3) COLLATE case_sensitive) PARTITION BY HASH (b); +CREATE TABLE test32_0 PARTITION OF test32 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test32_1 PARTITION OF test32 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test32 VALUES (1, 'def'); +INSERT INTO test32 VALUES (2, 'DEF'); +-- they end up in different partitions +SELECT (SELECT count(*) FROM test32_0) = (SELECT count(*) FROM test32_1); + ?column? +---------- + t +(1 row) + +CREATE TABLE test33 (a int, b char(3) COLLATE case_insensitive) PARTITION BY HASH (b); +CREATE TABLE test33_0 PARTITION OF test33 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test33_1 PARTITION OF test33 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test33 VALUES (1, 'def'); +INSERT INTO test33 VALUES (2, 'DEF'); +-- they end up in the same partition (but it's platform-dependent which one) +SELECT (SELECT count(*) FROM test33_0) <> (SELECT count(*) FROM test33_1); + ?column? +---------- + t +(1 row) + -- cleanup SET client_min_messages TO warning; DROP SCHEMA collate_tests CASCADE; diff --git a/src/test/regress/expected/collate.linux.utf8.out b/src/test/regress/expected/collate.linux.utf8.out index d33f04a3b5..6b245a7f28 100644 --- a/src/test/regress/expected/collate.linux.utf8.out +++ b/src/test/regress/expected/collate.linux.utf8.out @@ -1117,6 +1117,11 @@ select textrange_en_us('A','Z') @> 'b'::text; drop type textrange_c; drop type textrange_en_us; +-- nondeterministic collations +-- (not supported with libc provider) +CREATE COLLATION ctest_det (locale = 'en_US.utf8', deterministic = true); +ERROR: nondeterministic collations not supported with this provider +CREATE COLLATION ctest_nondet (locale = 'en_US.utf8', deterministic = false); -- cleanup SET client_min_messages TO warning; DROP SCHEMA collate_tests CASCADE; diff --git a/src/test/regress/expected/collate.out b/src/test/regress/expected/collate.out index fcbe3a5cc8..dbfa5c9348 100644 --- a/src/test/regress/expected/collate.out +++ b/src/test/regress/expected/collate.out @@ -498,6 +498,21 @@ SELECT a, b, a < b as lt FROM A | b | t (2 rows) +-- collation mismatch in subselects +SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y, x FROM collate_test10); +ERROR: could not determine which collation to use for string hashing +HINT: Use the COLLATE clause to set the collation explicitly. +-- now it works with overrides +SELECT * FROM collate_test10 WHERE (x COLLATE "POSIX", y COLLATE "C") NOT IN (SELECT y, x FROM collate_test10); + a | x | y +---+---+--- +(0 rows) + +SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y COLLATE "C", x COLLATE "POSIX" FROM collate_test10); + a | x | y +---+---+--- +(0 rows) + -- casting SELECT CAST('42' AS text COLLATE "C"); ERROR: syntax error at or near "COLLATE" diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index fe5fc64480..4a54104182 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -745,6 +745,25 @@ select * from outer_7597 where (f1, f2) not in (select * from inner_7597); 1 | (2 rows) +-- +-- Similar test case using text that verifies that collation +-- information is passed through by execTuplesEqual() in nodeSubplan.c +-- (otherwise it would error in texteq()) +-- +create temp table outer_text (f1 text, f2 text); +insert into outer_text values ('a', 'a'); +insert into outer_text values ('b', 'a'); +insert into outer_text values ('a', null); +insert into outer_text values ('b', null); +create temp table inner_text (c1 text, c2 text); +insert into inner_text values ('a', null); +select * from outer_text where (f1, f2) not in (select * from inner_text); + f1 | f2 +----+---- + b | a + b | +(2 rows) + -- -- Test case for premature memory release during hashing of subplan output -- diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index 0aeba3e202..8de4ad7b5d 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -425,6 +425,256 @@ CREATE INDEX collate_dep_test4i ON collate_dep_test4t (b COLLATE test0); drop type textrange_en_us; +-- nondeterministic collations + +CREATE COLLATION ctest_det (provider = icu, locale = 'und', deterministic = true); +CREATE COLLATION ctest_nondet (provider = icu, locale = 'und', deterministic = false); + +CREATE TABLE test6 (a int, b text); +-- same string in different normal forms +INSERT INTO test6 VALUES (1, U&'\00E4bc'); +INSERT INTO test6 VALUES (2, U&'\0061\0308bc'); +SELECT * FROM test6; +SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_det; +SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_nondet; + +CREATE COLLATION case_sensitive (provider = icu, locale = 'und'); +CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false); + +SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive; +SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive; + +CREATE TABLE test1cs (x text COLLATE case_sensitive); +CREATE TABLE test2cs (x text COLLATE case_sensitive); +CREATE TABLE test3cs (x text COLLATE case_sensitive); +INSERT INTO test1cs VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2cs VALUES ('ABC'), ('ghi'); +INSERT INTO test3cs VALUES ('abc'), ('ABC'), ('def'), ('ghi'); + +SELECT x FROM test3cs WHERE x = 'abc'; +SELECT x FROM test3cs WHERE x <> 'abc'; +SELECT x FROM test3cs WHERE x LIKE 'a%'; +SELECT x FROM test3cs WHERE x ILIKE 'a%'; +SELECT x FROM test3cs WHERE x SIMILAR TO 'a%'; +SELECT x FROM test3cs WHERE x ~ 'a'; +SELECT x FROM test1cs UNION SELECT x FROM test2cs ORDER BY x; +SELECT x FROM test2cs UNION SELECT x FROM test1cs ORDER BY x; +SELECT x FROM test1cs INTERSECT SELECT x FROM test2cs; +SELECT x FROM test2cs INTERSECT SELECT x FROM test1cs; +SELECT x FROM test1cs EXCEPT SELECT x FROM test2cs; +SELECT x FROM test2cs EXCEPT SELECT x FROM test1cs; +SELECT DISTINCT x FROM test3cs ORDER BY x; +SELECT count(DISTINCT x) FROM test3cs; +SELECT x, count(*) FROM test3cs GROUP BY x ORDER BY x; +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3cs ORDER BY x; +CREATE UNIQUE INDEX ON test1cs (x); -- ok +INSERT INTO test1cs VALUES ('ABC'); -- ok +CREATE UNIQUE INDEX ON test3cs (x); -- ok +SELECT string_to_array('ABC,DEF,GHI' COLLATE case_sensitive, ',', 'abc'); +SELECT string_to_array('ABCDEFGHI' COLLATE case_sensitive, NULL, 'b'); + +CREATE TABLE test1ci (x text COLLATE case_insensitive); +CREATE TABLE test2ci (x text COLLATE case_insensitive); +CREATE TABLE test3ci (x text COLLATE case_insensitive); +CREATE INDEX ON test3ci (x text_pattern_ops); -- error +INSERT INTO test1ci VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2ci VALUES ('ABC'), ('ghi'); +INSERT INTO test3ci VALUES ('abc'), ('ABC'), ('def'), ('ghi'); + +SELECT x FROM test3ci WHERE x = 'abc'; +SELECT x FROM test3ci WHERE x <> 'abc'; +SELECT x FROM test3ci WHERE x LIKE 'a%'; +SELECT x FROM test3ci WHERE x ILIKE 'a%'; +SELECT x FROM test3ci WHERE x SIMILAR TO 'a%'; +SELECT x FROM test3ci WHERE x ~ 'a'; +SELECT x FROM test1ci UNION SELECT x FROM test2ci ORDER BY x; +SELECT x FROM test2ci UNION SELECT x FROM test1ci ORDER BY x; +SELECT x FROM test1ci INTERSECT SELECT x FROM test2ci; +SELECT x FROM test2ci INTERSECT SELECT x FROM test1ci; +SELECT x FROM test1ci EXCEPT SELECT x FROM test2ci; +SELECT x FROM test2ci EXCEPT SELECT x FROM test1ci; +SELECT DISTINCT x FROM test3ci ORDER BY x; +SELECT count(DISTINCT x) FROM test3ci; +SELECT x, count(*) FROM test3ci GROUP BY x ORDER BY x; +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3ci ORDER BY x; +CREATE UNIQUE INDEX ON test1ci (x); -- ok +INSERT INTO test1ci VALUES ('ABC'); -- error +CREATE UNIQUE INDEX ON test3ci (x); -- error +SELECT string_to_array('ABC,DEF,GHI' COLLATE case_insensitive, ',', 'abc'); +SELECT string_to_array('ABCDEFGHI' COLLATE case_insensitive, NULL, 'b'); + +-- bpchar +CREATE TABLE test1bpci (x char(3) COLLATE case_insensitive); +CREATE TABLE test2bpci (x char(3) COLLATE case_insensitive); +CREATE TABLE test3bpci (x char(3) COLLATE case_insensitive); +CREATE INDEX ON test3bpci (x bpchar_pattern_ops); -- error +INSERT INTO test1bpci VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2bpci VALUES ('ABC'), ('ghi'); +INSERT INTO test3bpci VALUES ('abc'), ('ABC'), ('def'), ('ghi'); + +SELECT x FROM test3bpci WHERE x = 'abc'; +SELECT x FROM test3bpci WHERE x <> 'abc'; +SELECT x FROM test3bpci WHERE x LIKE 'a%'; +SELECT x FROM test3bpci WHERE x ILIKE 'a%'; +SELECT x FROM test3bpci WHERE x SIMILAR TO 'a%'; +SELECT x FROM test3bpci WHERE x ~ 'a'; +SELECT x FROM test1bpci UNION SELECT x FROM test2bpci ORDER BY x; +SELECT x FROM test2bpci UNION SELECT x FROM test1bpci ORDER BY x; +SELECT x FROM test1bpci INTERSECT SELECT x FROM test2bpci; +SELECT x FROM test2bpci INTERSECT SELECT x FROM test1bpci; +SELECT x FROM test1bpci EXCEPT SELECT x FROM test2bpci; +SELECT x FROM test2bpci EXCEPT SELECT x FROM test1bpci; +SELECT DISTINCT x FROM test3bpci ORDER BY x; +SELECT count(DISTINCT x) FROM test3bpci; +SELECT x, count(*) FROM test3bpci GROUP BY x ORDER BY x; +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3bpci ORDER BY x; +CREATE UNIQUE INDEX ON test1bpci (x); -- ok +INSERT INTO test1bpci VALUES ('ABC'); -- error +CREATE UNIQUE INDEX ON test3bpci (x); -- error +SELECT string_to_array('ABC,DEF,GHI'::char(11) COLLATE case_insensitive, ',', 'abc'); +SELECT string_to_array('ABCDEFGHI'::char(9) COLLATE case_insensitive, NULL, 'b'); + +-- This tests the issue described in match_pattern_prefix(). In the +-- absence of that check, the case_insensitive tests below would +-- return no rows where they should logically return one. +CREATE TABLE test4c (x text COLLATE "C"); +INSERT INTO test4c VALUES ('abc'); +CREATE INDEX ON test4c (x); +SET enable_seqscan = off; +SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_sensitive; -- ok, no rows +SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_sensitive; -- ok, no rows +SELECT x FROM test4c WHERE x LIKE 'ABC' COLLATE case_insensitive; -- error +SELECT x FROM test4c WHERE x LIKE 'ABC%' COLLATE case_insensitive; -- error +RESET enable_seqscan; + +-- Unicode special case: different variants of Greek lower case sigma. +-- A naive implementation like citext that just does lower(x) = +-- lower(y) will do the wrong thing here, because lower('Σ') is 'σ' +-- but upper('ς') is 'Σ'. +SELECT 'ὀδυσσεύς' = 'ὈΔΥΣΣΕΎΣ' COLLATE case_sensitive; +SELECT 'ὀδυσσεύς' = 'ὈΔΥΣΣΕΎΣ' COLLATE case_insensitive; + +-- name vs. text comparison operators +SELECT relname FROM pg_class WHERE relname = 'PG_CLASS'::text COLLATE case_insensitive; +SELECT relname FROM pg_class WHERE 'PG_CLASS'::text = relname COLLATE case_insensitive; + +SELECT typname FROM pg_type WHERE typname LIKE 'int_' AND typname <> 'INT2'::text COLLATE case_insensitive; +SELECT typname FROM pg_type WHERE typname LIKE 'int_' AND 'INT2'::text <> typname COLLATE case_insensitive;; + +-- test case adapted from subselect.sql +CREATE TEMP TABLE outer_text (f1 text COLLATE case_insensitive, f2 text); +INSERT INTO outer_text VALUES ('a', 'a'); +INSERT INTO outer_text VALUES ('b', 'a'); +INSERT INTO outer_text VALUES ('A', NULL); +INSERT INTO outer_text VALUES ('B', NULL); + +CREATE TEMP TABLE inner_text (c1 text COLLATE case_insensitive, c2 text); +INSERT INTO inner_text VALUES ('a', NULL); + +SELECT * FROM outer_text WHERE (f1, f2) NOT IN (SELECT * FROM inner_text); + +-- accents +CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-true', deterministic = false); + +CREATE TABLE test4 (a int, b text); +INSERT INTO test4 VALUES (1, 'cote'), (2, 'côte'), (3, 'coté'), (4, 'côté'); +SELECT * FROM test4 WHERE b = 'cote'; +SELECT * FROM test4 WHERE b = 'cote' COLLATE ignore_accents; +SELECT * FROM test4 WHERE b = 'Cote' COLLATE ignore_accents; -- still case-sensitive +SELECT * FROM test4 WHERE b = 'Cote' COLLATE case_insensitive; + +-- foreign keys (should use collation of primary key) + +-- PK is case-sensitive, FK is case-insensitive +CREATE TABLE test10pk (x text COLLATE case_sensitive PRIMARY KEY); +INSERT INTO test10pk VALUES ('abc'), ('def'), ('ghi'); +CREATE TABLE test10fk (x text COLLATE case_insensitive REFERENCES test10pk (x) ON UPDATE CASCADE ON DELETE CASCADE); +INSERT INTO test10fk VALUES ('abc'); -- ok +INSERT INTO test10fk VALUES ('ABC'); -- error +INSERT INTO test10fk VALUES ('xyz'); -- error +SELECT * FROM test10pk; +SELECT * FROM test10fk; +-- restrict update even though the values are "equal" in the FK table +UPDATE test10fk SET x = 'ABC' WHERE x = 'abc'; -- error +SELECT * FROM test10fk; +DELETE FROM test10pk WHERE x = 'abc'; +SELECT * FROM test10pk; +SELECT * FROM test10fk; + +-- PK is case-insensitive, FK is case-sensitive +CREATE TABLE test11pk (x text COLLATE case_insensitive PRIMARY KEY); +INSERT INTO test11pk VALUES ('abc'), ('def'), ('ghi'); +CREATE TABLE test11fk (x text COLLATE case_sensitive REFERENCES test11pk (x) ON UPDATE CASCADE ON DELETE CASCADE); +INSERT INTO test11fk VALUES ('abc'); -- ok +INSERT INTO test11fk VALUES ('ABC'); -- ok +INSERT INTO test11fk VALUES ('xyz'); -- error +SELECT * FROM test11pk; +SELECT * FROM test11fk; +-- cascade update even though the values are "equal" in the PK table +UPDATE test11pk SET x = 'ABC' WHERE x = 'abc'; +SELECT * FROM test11fk; +DELETE FROM test11pk WHERE x = 'abc'; +SELECT * FROM test11pk; +SELECT * FROM test11fk; + +-- partitioning +CREATE TABLE test20 (a int, b text COLLATE case_insensitive) PARTITION BY LIST (b); +CREATE TABLE test20_1 PARTITION OF test20 FOR VALUES IN ('abc'); +INSERT INTO test20 VALUES (1, 'abc'); +INSERT INTO test20 VALUES (2, 'ABC'); +SELECT * FROM test20_1; + +CREATE TABLE test21 (a int, b text COLLATE case_insensitive) PARTITION BY RANGE (b); +CREATE TABLE test21_1 PARTITION OF test21 FOR VALUES FROM ('ABC') TO ('DEF'); +INSERT INTO test21 VALUES (1, 'abc'); +INSERT INTO test21 VALUES (2, 'ABC'); +SELECT * FROM test21_1; + +CREATE TABLE test22 (a int, b text COLLATE case_sensitive) PARTITION BY HASH (b); +CREATE TABLE test22_0 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test22_1 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test22 VALUES (1, 'def'); +INSERT INTO test22 VALUES (2, 'DEF'); +-- they end up in different partitions +SELECT (SELECT count(*) FROM test22_0) = (SELECT count(*) FROM test22_1); + +CREATE TABLE test23 (a int, b text COLLATE case_insensitive) PARTITION BY HASH (b); +CREATE TABLE test23_0 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test23_1 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test23 VALUES (1, 'def'); +INSERT INTO test23 VALUES (2, 'DEF'); +-- they end up in the same partition (but it's platform-dependent which one) +SELECT (SELECT count(*) FROM test23_0) <> (SELECT count(*) FROM test23_1); + +CREATE TABLE test30 (a int, b char(3) COLLATE case_insensitive) PARTITION BY LIST (b); +CREATE TABLE test30_1 PARTITION OF test30 FOR VALUES IN ('abc'); +INSERT INTO test30 VALUES (1, 'abc'); +INSERT INTO test30 VALUES (2, 'ABC'); +SELECT * FROM test30_1; + +CREATE TABLE test31 (a int, b char(3) COLLATE case_insensitive) PARTITION BY RANGE (b); +CREATE TABLE test31_1 PARTITION OF test31 FOR VALUES FROM ('ABC') TO ('DEF'); +INSERT INTO test31 VALUES (1, 'abc'); +INSERT INTO test31 VALUES (2, 'ABC'); +SELECT * FROM test31_1; + +CREATE TABLE test32 (a int, b char(3) COLLATE case_sensitive) PARTITION BY HASH (b); +CREATE TABLE test32_0 PARTITION OF test32 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test32_1 PARTITION OF test32 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test32 VALUES (1, 'def'); +INSERT INTO test32 VALUES (2, 'DEF'); +-- they end up in different partitions +SELECT (SELECT count(*) FROM test32_0) = (SELECT count(*) FROM test32_1); + +CREATE TABLE test33 (a int, b char(3) COLLATE case_insensitive) PARTITION BY HASH (b); +CREATE TABLE test33_0 PARTITION OF test33 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test33_1 PARTITION OF test33 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test33 VALUES (1, 'def'); +INSERT INTO test33 VALUES (2, 'DEF'); +-- they end up in the same partition (but it's platform-dependent which one) +SELECT (SELECT count(*) FROM test33_0) <> (SELECT count(*) FROM test33_1); + + -- cleanup SET client_min_messages TO warning; DROP SCHEMA collate_tests CASCADE; diff --git a/src/test/regress/sql/collate.linux.utf8.sql b/src/test/regress/sql/collate.linux.utf8.sql index e882153244..4ca02b821d 100644 --- a/src/test/regress/sql/collate.linux.utf8.sql +++ b/src/test/regress/sql/collate.linux.utf8.sql @@ -428,6 +428,13 @@ CREATE INDEX collate_dep_test4i ON collate_dep_test4t (b COLLATE test0); drop type textrange_en_us; +-- nondeterministic collations +-- (not supported with libc provider) + +CREATE COLLATION ctest_det (locale = 'en_US.utf8', deterministic = true); +CREATE COLLATION ctest_nondet (locale = 'en_US.utf8', deterministic = false); + + -- cleanup SET client_min_messages TO warning; DROP SCHEMA collate_tests CASCADE; diff --git a/src/test/regress/sql/collate.sql b/src/test/regress/sql/collate.sql index 4ddde95a5e..cb2bc22155 100644 --- a/src/test/regress/sql/collate.sql +++ b/src/test/regress/sql/collate.sql @@ -163,6 +163,11 @@ CREATE TABLE test_u AS SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM SELECT a, b, a < b as lt FROM (VALUES ('a', 'B'), ('A', 'b' COLLATE "C")) v(a,b); +-- collation mismatch in subselects +SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y, x FROM collate_test10); +-- now it works with overrides +SELECT * FROM collate_test10 WHERE (x COLLATE "POSIX", y COLLATE "C") NOT IN (SELECT y, x FROM collate_test10); +SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y COLLATE "C", x COLLATE "POSIX" FROM collate_test10); -- casting diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index b5931ee700..856bbff732 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -435,6 +435,23 @@ CREATE VIEW orders_view AS select * from outer_7597 where (f1, f2) not in (select * from inner_7597); +-- +-- Similar test case using text that verifies that collation +-- information is passed through by execTuplesEqual() in nodeSubplan.c +-- (otherwise it would error in texteq()) +-- + +create temp table outer_text (f1 text, f2 text); +insert into outer_text values ('a', 'a'); +insert into outer_text values ('b', 'a'); +insert into outer_text values ('a', null); +insert into outer_text values ('b', null); + +create temp table inner_text (c1 text, c2 text); +insert into inner_text values ('a', null); + +select * from outer_text where (f1, f2) not in (select * from inner_text); + -- -- Test case for premature memory release during hashing of subplan output -- diff --git a/src/test/subscription/Makefile b/src/test/subscription/Makefile index e7bbb454c7..4378819530 100644 --- a/src/test/subscription/Makefile +++ b/src/test/subscription/Makefile @@ -15,6 +15,8 @@ include $(top_builddir)/src/Makefile.global EXTRA_INSTALL = contrib/hstore +export with_icu + check: $(prove_check) diff --git a/src/test/subscription/t/012_collation.pl b/src/test/subscription/t/012_collation.pl new file mode 100644 index 0000000000..1c5ebcd0e6 --- /dev/null +++ b/src/test/subscription/t/012_collation.pl @@ -0,0 +1,103 @@ +# Test collations, in particular nondeterministic ones +# (only works with ICU) +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +if ($ENV{with_icu} eq 'yes') +{ + plan tests => 2; +} +else +{ + plan skip_all => 'ICU not supported by this build'; +} + +my $node_publisher = get_new_node('publisher'); +$node_publisher->init(allows_streaming => 'logical'); +$node_publisher->start; + +my $node_subscriber = get_new_node('subscriber'); +$node_subscriber->init(allows_streaming => 'logical'); +$node_subscriber->start; + +my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres'; + +# Test plan: Create a table with a nondeterministic collation in the +# primary key column. Pre-insert rows on the publisher and subscriber +# that are collation-wise equal but byte-wise different. (We use a +# string in different normal forms for that.) Set up publisher and +# subscriber. Update the row on the publisher, but don't change the +# primary key column. The subscriber needs to find the row to be +# updated using the nondeterministic collation semantics. We need to +# test for both a replica identity index and for replica identity +# full, since those have different code paths internally. + +$node_subscriber->safe_psql('postgres', + q{CREATE COLLATION ctest_nondet (provider = icu, locale = 'und', deterministic = false)}); + +# table with replica identity index + +$node_publisher->safe_psql('postgres', + q{CREATE TABLE tab1 (a text PRIMARY KEY, b text)}); + +$node_publisher->safe_psql('postgres', + q{INSERT INTO tab1 VALUES (U&'\00E4bc', 'foo')}); + +$node_subscriber->safe_psql('postgres', + q{CREATE TABLE tab1 (a text COLLATE ctest_nondet PRIMARY KEY, b text)}); + +$node_subscriber->safe_psql('postgres', + q{INSERT INTO tab1 VALUES (U&'\0061\0308bc', 'foo')}); + +# table with replica identity full + +$node_publisher->safe_psql('postgres', + q{CREATE TABLE tab2 (a text, b text)}); +$node_publisher->safe_psql('postgres', + q{ALTER TABLE tab2 REPLICA IDENTITY FULL}); + +$node_publisher->safe_psql('postgres', + q{INSERT INTO tab2 VALUES (U&'\00E4bc', 'foo')}); + +$node_subscriber->safe_psql('postgres', + q{CREATE TABLE tab2 (a text COLLATE ctest_nondet, b text)}); +$node_subscriber->safe_psql('postgres', + q{ALTER TABLE tab2 REPLICA IDENTITY FULL}); + +$node_subscriber->safe_psql('postgres', + q{INSERT INTO tab2 VALUES (U&'\0061\0308bc', 'foo')}); + +# set up publication, subscription + +$node_publisher->safe_psql('postgres', + q{CREATE PUBLICATION pub1 FOR ALL TABLES}); + +$node_subscriber->safe_psql('postgres', + qq{CREATE SUBSCRIPTION sub1 CONNECTION '$publisher_connstr application_name=sub1' PUBLICATION pub1 WITH (copy_data = false)}); + +$node_publisher->wait_for_catchup('sub1'); + +# test with replica identity index + +$node_publisher->safe_psql('postgres', + q{UPDATE tab1 SET b = 'bar' WHERE b = 'foo'}); + +$node_publisher->wait_for_catchup('sub1'); + +is($node_subscriber->safe_psql('postgres', q{SELECT b FROM tab1}), + qq(bar), + 'update with primary key with nondeterministic collation'); + +# test with replica identity full + +$node_publisher->safe_psql('postgres', + q{UPDATE tab2 SET b = 'bar' WHERE b = 'foo'}); + +$node_publisher->wait_for_catchup('sub1'); + +is($node_subscriber->safe_psql('postgres', q{SELECT b FROM tab2}), + qq(bar), + 'update with replica identity full with nondeterministic collation'); base-commit: e1e0e8d58c5c70da92e36cb9d59c2f7ecf839e00 -- 2.21.0