From 1d675c51fb40a23b355ad5aa3de382e9fd4ffc82 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 25 Jan 2019 16:13:00 +0100 Subject: [PATCH v4] Collations with nondeterministic comparison This adds a flag "deterministic" to collations. If that is false, such a collation disables various optimizations that assume that strings are equal only if they are byte-wise equal. That then allows use cases such as case-insensitive or accent-insensitive comparisons or handling of strings with different Unicode normal forms. The term "deterministic comparison" in this context is from Unicode Technical Standard #10 (https://unicode.org/reports/tr10/#Deterministic_Comparison). --- contrib/bloom/bloom.h | 1 + contrib/bloom/blutils.c | 3 +- doc/src/sgml/catalogs.sgml | 7 + doc/src/sgml/charset.sgml | 11 +- doc/src/sgml/func.sgml | 6 + doc/src/sgml/ref/create_collation.sgml | 22 + src/backend/access/hash/hashfunc.c | 86 +++ src/backend/catalog/pg_collation.c | 2 + src/backend/commands/collationcmds.c | 25 +- src/backend/executor/execExpr.c | 4 +- src/backend/executor/execGrouping.c | 12 +- src/backend/executor/execPartition.c | 1 + src/backend/executor/execReplication.c | 5 +- src/backend/executor/nodeAgg.c | 9 +- src/backend/executor/nodeGroup.c | 1 + src/backend/executor/nodeHash.c | 14 +- src/backend/executor/nodeHashjoin.c | 5 + src/backend/executor/nodeRecursiveunion.c | 1 + src/backend/executor/nodeSetOp.c | 2 + src/backend/executor/nodeSubplan.c | 14 +- src/backend/executor/nodeUnique.c | 1 + src/backend/executor/nodeWindowAgg.c | 2 + src/backend/nodes/copyfuncs.c | 7 + src/backend/nodes/outfuncs.c | 7 + src/backend/nodes/readfuncs.c | 7 + src/backend/optimizer/plan/createplan.c | 54 +- src/backend/optimizer/util/tlist.c | 25 + src/backend/partitioning/partbounds.c | 4 +- src/backend/partitioning/partprune.c | 3 +- src/backend/regex/regc_pg_locale.c | 5 + src/backend/utils/adt/arrayfuncs.c | 2 +- src/backend/utils/adt/like.c | 27 +- src/backend/utils/adt/name.c | 32 +- src/backend/utils/adt/orderedsetaggs.c | 3 +- src/backend/utils/adt/pg_locale.c | 1 + src/backend/utils/adt/ri_triggers.c | 39 +- src/backend/utils/adt/varchar.c | 117 +++ src/backend/utils/adt/varlena.c | 166 +++-- src/backend/utils/cache/catcache.c | 9 +- src/backend/utils/cache/lsyscache.c | 16 + src/bin/initdb/initdb.c | 4 +- src/bin/pg_dump/pg_dump.c | 39 +- src/bin/psql/describe.c | 17 +- src/include/catalog/pg_collation.h | 2 + src/include/executor/executor.h | 3 + src/include/executor/hashjoin.h | 1 + src/include/executor/nodeHash.h | 2 +- src/include/nodes/execnodes.h | 3 + src/include/nodes/plannodes.h | 7 + src/include/optimizer/planmain.h | 2 +- src/include/optimizer/tlist.h | 1 + src/include/partitioning/partbounds.h | 1 + src/include/utils/lsyscache.h | 1 + src/include/utils/pg_locale.h | 1 + .../regress/expected/collate.icu.utf8.out | 702 +++++++++++++++++- .../regress/expected/collate.linux.utf8.out | 25 +- src/test/regress/expected/collate.out | 15 + src/test/regress/expected/subselect.out | 19 + src/test/regress/sql/collate.icu.utf8.sql | 230 ++++++ src/test/regress/sql/collate.linux.utf8.sql | 8 + src/test/regress/sql/collate.sql | 5 + src/test/regress/sql/subselect.sql | 17 + src/test/subscription/Makefile | 2 + src/test/subscription/t/012_collation.pl | 98 +++ 64 files changed, 1793 insertions(+), 170 deletions(-) create mode 100644 src/test/subscription/t/012_collation.pl diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h index 24200fb5fa..c4672f6853 100644 --- a/contrib/bloom/bloom.h +++ b/contrib/bloom/bloom.h @@ -137,6 +137,7 @@ typedef struct BloomMetaPageData typedef struct BloomState { FmgrInfo hashFn[INDEX_MAX_KEYS]; + Oid collations[INDEX_MAX_KEYS]; BloomOptions opts; /* copy of options on index's metapage */ int32 nColumns; diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c index 6458376578..d078dfbd46 100644 --- a/contrib/bloom/blutils.c +++ b/contrib/bloom/blutils.c @@ -163,6 +163,7 @@ initBloomState(BloomState *state, Relation index) fmgr_info_copy(&(state->hashFn[i]), index_getprocinfo(index, i + 1, BLOOM_HASH_PROC), CurrentMemoryContext); + state->collations[i] = index->rd_indcollation[i]; } /* Initialize amcache if needed with options from metapage */ @@ -267,7 +268,7 @@ signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno) * different columns will be mapped into different bits because of step * above */ - hashVal = DatumGetInt32(FunctionCall1(&state->hashFn[attno], value)); + hashVal = DatumGetInt32(FunctionCall1Coll(&state->hashFn[attno], state->collations[attno], value)); mySrand(hashVal ^ myRand()); for (j = 0; j < state->opts.bitSize[attno]; j++) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index af4d0625ea..b087c1ec74 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -2077,6 +2077,13 @@ <structname>pg_collation</structname> Columns default, c = libc, i = icu + + collisdeterministic + bool + + Is the collation deterministic? + + collencoding int4 diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index a6143ef8a7..ec6343fa80 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -847,11 +847,12 @@ ICU collations Note that while this system allows creating collations that ignore - case or ignore accents or similar (using - the ks key), PostgreSQL does not at the moment allow - such collations to act in a truly case- or accent-insensitive manner. Any - strings that compare equal according to the collation but are not - byte-wise equal will be sorted according to their byte values. + case or ignore accents or similar (using the + ks key), in order for such such collations to act in a + truly case- or accent-insensitive manner, they also need to be declared as not + deterministic in CREATE COLLATION. + Otherwise, any strings that compare equal according to the collation but + are not byte-wise equal will be sorted according to their byte values. diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 4930ec17f6..ee84e60ca6 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -3962,6 +3962,12 @@ Pattern Matching + + The pattern matching operators of all three kinds do not support + nondeterministic collations. If required, apply a different collation to + the expression to work around this limitation. + + <function>LIKE</function> diff --git a/doc/src/sgml/ref/create_collation.sgml b/doc/src/sgml/ref/create_collation.sgml index 038797fce1..def4dda6e8 100644 --- a/doc/src/sgml/ref/create_collation.sgml +++ b/doc/src/sgml/ref/create_collation.sgml @@ -23,6 +23,7 @@ [ LC_COLLATE = lc_collate, ] [ LC_CTYPE = lc_ctype, ] [ PROVIDER = provider, ] + [ DETERMINISTIC = boolean, ] [ VERSION = version ] ) CREATE COLLATION [ IF NOT EXISTS ] name FROM existing_collation @@ -124,6 +125,27 @@ Parameters + + DETERMINISTIC + + + + Specifies whether the collation should use deterministic comparisons. + The default is true. A deterministic comparison considers strings that + are not byte-wise equal to be unequal even if they are considered + logically equal by the comparison. PostgreSQL breaks ties using a + byte-wise comparison. Comparison that is not deterministic can make the + collation be, say, case- or accent-insensitive. For that, you need to + choose an appropriate LC_COLLATE setting + and set the collation to not deterministic here. + + + + Nondeterministic collations are only supported with the ICU provider. + + + + version diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index 63005ddc4d..a2ac1f8137 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -27,7 +27,9 @@ #include "postgres.h" #include "access/hash.h" +#include "catalog/pg_collation.h" #include "utils/builtins.h" +#include "utils/pg_locale.h" /* * Datatype-specific hash functions. @@ -242,8 +244,50 @@ Datum hashtext(PG_FUNCTION_ARGS) { text *key = PG_GETARG_TEXT_PP(0); + Oid collid = PG_GET_COLLATION(); Datum result; + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + if (collid != DEFAULT_COLLATION_OID) + { + pg_locale_t mylocale = pg_newlocale_from_collation(collid); + + if (mylocale && !mylocale->deterministic) + { + if (mylocale->provider == COLLPROVIDER_ICU) + { +#ifdef USE_ICU + int32_t ulen = -1; + UChar *uchar = NULL; + Size bsize; + uint8_t *buf; + + ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); + + bsize = ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, NULL, 0); + buf = palloc(bsize); + ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, buf, bsize); + + result = hash_any(buf, bsize); + + PG_FREE_IF_COPY(key, 0); + + return result; +#else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", mylocale->provider); +#endif + } + } + } + /* * Note: this is currently identical in behavior to hashvarlena, but keep * it as a separate function in case we someday want to do something @@ -262,8 +306,50 @@ Datum hashtextextended(PG_FUNCTION_ARGS) { text *key = PG_GETARG_TEXT_PP(0); + Oid collid = PG_GET_COLLATION(); Datum result; + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + if (collid != DEFAULT_COLLATION_OID) + { + pg_locale_t mylocale = pg_newlocale_from_collation(collid); + + if (mylocale && !mylocale->deterministic) + { + if (mylocale->provider == COLLPROVIDER_ICU) + { +#ifdef USE_ICU + int32_t ulen = -1; + UChar *uchar = NULL; + Size bsize; + uint8_t *buf; + + ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); + + bsize = ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, NULL, 0); + buf = palloc(bsize); + ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, buf, bsize); + + result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1)); + + PG_FREE_IF_COPY(key, 0); + + return result; +#else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", mylocale->provider); +#endif + } + } + } + /* Same approach as hashtext */ result = hash_any_extended((unsigned char *) VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key), diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c index 74e1e82cb9..dd99d53547 100644 --- a/src/backend/catalog/pg_collation.c +++ b/src/backend/catalog/pg_collation.c @@ -46,6 +46,7 @@ Oid CollationCreate(const char *collname, Oid collnamespace, Oid collowner, char collprovider, + bool collisdeterministic, int32 collencoding, const char *collcollate, const char *collctype, const char *collversion, @@ -160,6 +161,7 @@ CollationCreate(const char *collname, Oid collnamespace, values[Anum_pg_collation_collnamespace - 1] = ObjectIdGetDatum(collnamespace); values[Anum_pg_collation_collowner - 1] = ObjectIdGetDatum(collowner); values[Anum_pg_collation_collprovider - 1] = CharGetDatum(collprovider); + values[Anum_pg_collation_collisdeterministic - 1] = BoolGetDatum(collisdeterministic); values[Anum_pg_collation_collencoding - 1] = Int32GetDatum(collencoding); namestrcpy(&name_collate, collcollate); values[Anum_pg_collation_collcollate - 1] = NameGetDatum(&name_collate); diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index ed3f1c12e5..919e092483 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -59,10 +59,12 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e DefElem *lccollateEl = NULL; DefElem *lcctypeEl = NULL; DefElem *providerEl = NULL; + DefElem *deterministicEl = NULL; DefElem *versionEl = NULL; char *collcollate = NULL; char *collctype = NULL; char *collproviderstr = NULL; + bool collisdeterministic = true; int collencoding = 0; char collprovider = 0; char *collversion = NULL; @@ -91,6 +93,8 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e defelp = &lcctypeEl; else if (strcmp(defel->defname, "provider") == 0) defelp = &providerEl; + else if (strcmp(defel->defname, "deterministic") == 0) + defelp = &deterministicEl; else if (strcmp(defel->defname, "version") == 0) defelp = &versionEl; else @@ -125,6 +129,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate)); collctype = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype)); collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider; + collisdeterministic = ((Form_pg_collation) GETSTRUCT(tp))->collisdeterministic; collencoding = ((Form_pg_collation) GETSTRUCT(tp))->collencoding; ReleaseSysCache(tp); @@ -157,6 +162,9 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e if (providerEl) collproviderstr = defGetString(providerEl); + if (deterministicEl) + collisdeterministic = defGetBoolean(deterministicEl); + if (versionEl) collversion = defGetString(versionEl); @@ -185,6 +193,16 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("parameter \"lc_ctype\" must be specified"))); + /* + * Nondeterministic collations are currently only supported with ICU + * because that's the only case where it can actually make a difference. + * So we can save writing the code for the other providers. + */ + if (!collisdeterministic && collprovider != COLLPROVIDER_ICU) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations not supported with this provider"))); + if (!fromEl) { if (collprovider == COLLPROVIDER_ICU) @@ -203,6 +221,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e collNamespace, GetUserId(), collprovider, + collisdeterministic, collencoding, collcollate, collctype, @@ -586,7 +605,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) * about existing ones. */ collid = CollationCreate(localebuf, nspid, GetUserId(), - COLLPROVIDER_LIBC, enc, + COLLPROVIDER_LIBC, true, enc, localebuf, localebuf, get_collation_actual_version(COLLPROVIDER_LIBC, localebuf), true, true); @@ -647,7 +666,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) int enc = aliases[i].enc; collid = CollationCreate(alias, nspid, GetUserId(), - COLLPROVIDER_LIBC, enc, + COLLPROVIDER_LIBC, true, enc, locale, locale, get_collation_actual_version(COLLPROVIDER_LIBC, locale), true, true); @@ -709,7 +728,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) collid = CollationCreate(psprintf("%s-x-icu", langtag), nspid, GetUserId(), - COLLPROVIDER_ICU, -1, + COLLPROVIDER_ICU, true, -1, collcollate, collcollate, get_collation_actual_version(COLLPROVIDER_ICU, collcollate), true, true); diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index 4047d24b03..4eada12e70 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -3316,6 +3316,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, int numCols, const AttrNumber *keyColIdx, const Oid *eqfunctions, + const Oid *collations, PlanState *parent) { ExprState *state = makeNode(ExprState); @@ -3376,6 +3377,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, Form_pg_attribute latt = TupleDescAttr(ldesc, attno - 1); Form_pg_attribute ratt = TupleDescAttr(rdesc, attno - 1); Oid foid = eqfunctions[natt]; + Oid collid = collations[natt]; FmgrInfo *finfo; FunctionCallInfo fcinfo; AclResult aclresult; @@ -3393,7 +3395,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, fmgr_info(foid, finfo); fmgr_info_set_expr(NULL, finfo); InitFunctionCallInfoData(*fcinfo, finfo, 2, - InvalidOid, NULL, NULL); + collid, NULL, NULL); /* left arg */ scratch.opcode = EEOP_INNER_VAR; diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index 4e7600e4ed..cef6323c63 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -61,6 +61,7 @@ execTuplesMatchPrepare(TupleDesc desc, int numCols, const AttrNumber *keyColIdx, const Oid *eqOperators, + const Oid *collations, PlanState *parent) { Oid *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid)); @@ -76,7 +77,7 @@ execTuplesMatchPrepare(TupleDesc desc, /* build actual expression */ expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL, - numCols, keyColIdx, eqFunctions, + numCols, keyColIdx, eqFunctions, collations, parent); return expr; @@ -155,6 +156,7 @@ BuildTupleHashTable(PlanState *parent, int numCols, AttrNumber *keyColIdx, const Oid *eqfuncoids, FmgrInfo *hashfunctions, + Oid *collations, long nbuckets, Size additionalsize, MemoryContext tablecxt, MemoryContext tempcxt, bool use_variable_hash_iv) @@ -174,6 +176,7 @@ BuildTupleHashTable(PlanState *parent, hashtable->numCols = numCols; hashtable->keyColIdx = keyColIdx; hashtable->tab_hash_funcs = hashfunctions; + hashtable->tab_collations = collations; hashtable->tablecxt = tablecxt; hashtable->tempcxt = tempcxt; hashtable->entrysize = entrysize; @@ -211,7 +214,7 @@ BuildTupleHashTable(PlanState *parent, hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc, &TTSOpsMinimalTuple, &TTSOpsMinimalTuple, numCols, - keyColIdx, eqfuncoids, + keyColIdx, eqfuncoids, collations, parent); MemoryContextSwitchTo(oldcontext); @@ -374,8 +377,9 @@ TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple) { uint32 hkey; - hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i], - attr)); + hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], + hashtable->tab_collations[i], + attr)); hashkey ^= hkey; } } diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 2a7bc01563..0341250f1d 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -1218,6 +1218,7 @@ get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull) greatest_modulus = get_hash_partition_greatest_modulus(boundinfo); rowHash = compute_partition_hash_value(key->partnatts, key->partsupfunc, + key->partcollation, values, isnull); part_index = boundinfo->indexes[rowHash % greatest_modulus]; diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 589573b879..d9d5a7b78b 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -95,6 +95,8 @@ build_replindex_scan_key(ScanKey skey, Relation rel, Relation idxrel, regop, searchslot->tts_values[mainattno - 1]); + skey[attoff].sk_collation = idxrel->rd_indcollation[attoff]; + /* Check for null value. */ if (searchslot->tts_isnull[mainattno - 1]) { @@ -261,7 +263,8 @@ tuple_equals_slot(TupleDesc desc, HeapTuple tup, TupleTableSlot *slot) errmsg("could not identify an equality operator for type %s", format_type_be(att->atttypid)))); - if (!DatumGetBool(FunctionCall2(&typentry->eq_opr_finfo, + if (!DatumGetBool(FunctionCall2Coll(&typentry->eq_opr_finfo, + att->attcollation, values[attrnum], slot->tts_values[attrnum]))) return false; diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 508c919574..1e0b46cd95 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -747,15 +747,14 @@ process_ordered_aggregate_single(AggState *aggstate, /* * If DISTINCT mode, and not distinct from prior, skip it. - * - * Note: we assume equality functions don't care about collation. */ if (isDistinct && haveOldVal && ((oldIsNull && *isNull) || (!oldIsNull && !*isNull && oldAbbrevVal == newAbbrevVal && - DatumGetBool(FunctionCall2(&pertrans->equalfnOne, + DatumGetBool(FunctionCall2Coll(&pertrans->equalfnOne, + pertrans->aggCollation, oldVal, *newVal))))) { /* equal to prior, so forget this one */ @@ -1283,6 +1282,7 @@ build_hash_table(AggState *aggstate) perhash->hashGrpColIdxHash, perhash->eqfuncoids, perhash->hashfunctions, + perhash->aggnode->grpCollations, perhash->aggnode->numGroups, additionalsize, aggstate->hashcontext->ecxt_per_tuple_memory, @@ -2376,6 +2376,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) length, aggnode->grpColIdx, aggnode->grpOperators, + aggnode->grpCollations, (PlanState *) aggstate); } @@ -2387,6 +2388,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) aggnode->numCols, aggnode->grpColIdx, aggnode->grpOperators, + aggnode->grpCollations, (PlanState *) aggstate); } } @@ -3141,6 +3143,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans, numDistinctCols, pertrans->sortColIdx, ops, + pertrans->sortCollations, &aggstate->ss.ps); pfree(ops); } diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c index 655084d7b5..05f1d33150 100644 --- a/src/backend/executor/nodeGroup.c +++ b/src/backend/executor/nodeGroup.c @@ -212,6 +212,7 @@ ExecInitGroup(Group *node, EState *estate, int eflags) node->numCols, node->grpColIdx, node->grpOperators, + node->grpCollations, &grpstate->ss.ps); return grpstate; diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 856daf6a7f..64eec91f8b 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -425,7 +425,7 @@ ExecEndHash(HashState *node) * ---------------------------------------------------------------- */ HashJoinTable -ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls) +ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, bool keepNulls) { Hash *node; HashJoinTable hashtable; @@ -439,6 +439,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls) int nkeys; int i; ListCell *ho; + ListCell *hc; MemoryContext oldcxt; /* @@ -541,8 +542,9 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls) hashtable->inner_hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool)); + hashtable->collations = (Oid *) palloc(nkeys * sizeof(Oid)); i = 0; - foreach(ho, hashOperators) + forboth(ho, hashOperators, hc, hashCollations) { Oid hashop = lfirst_oid(ho); Oid left_hashfn; @@ -554,6 +556,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls) fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]); fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]); hashtable->hashStrict[i] = op_strict(hashop); + hashtable->collations[i] = lfirst_oid(hc); i++; } @@ -1847,7 +1850,7 @@ ExecHashGetHashValue(HashJoinTable hashtable, /* Compute the hash function */ uint32 hkey; - hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i], keyval)); + hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], hashtable->collations[i], keyval)); hashkey ^= hkey; } @@ -2303,8 +2306,9 @@ ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse) uint32 hashvalue; int bucket; - hashvalue = DatumGetUInt32(FunctionCall1(&hashfunctions[0], - sslot.values[i])); + hashvalue = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[0], + hashtable->collations[0], + sslot.values[i])); /* * While we have not hit a hole in the hashtable and have not hit diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 2098708864..aa43296e26 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -278,6 +278,7 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel) */ hashtable = ExecHashTableCreate(hashNode, node->hj_HashOperators, + node->hj_Collations, HJ_FILL_INNER(node)); node->hj_HashTable = hashtable; @@ -603,6 +604,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) List *rclauses; List *rhclauses; List *hoperators; + List *hcollations; TupleDesc outerDesc, innerDesc; ListCell *l; @@ -738,6 +740,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) rclauses = NIL; rhclauses = NIL; hoperators = NIL; + hcollations = NIL; foreach(l, node->hashclauses) { OpExpr *hclause = lfirst_node(OpExpr, l); @@ -749,10 +752,12 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) rhclauses = lappend(rhclauses, ExecInitExpr(lsecond(hclause->args), innerPlanState(hjstate))); hoperators = lappend_oid(hoperators, hclause->opno); + hcollations = lappend_oid(hcollations, hclause->inputcollid); } hjstate->hj_OuterHashKeys = lclauses; hjstate->hj_InnerHashKeys = rclauses; hjstate->hj_HashOperators = hoperators; + hjstate->hj_Collations = hcollations; /* child Hash node needs to evaluate inner hash keys, too */ ((HashState *) innerPlanState(hjstate))->hashkeys = rhclauses; diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c index d2488ad988..bef2c7fbdc 100644 --- a/src/backend/executor/nodeRecursiveunion.c +++ b/src/backend/executor/nodeRecursiveunion.c @@ -43,6 +43,7 @@ build_hash_table(RecursiveUnionState *rustate) node->dupColIdx, rustate->eqfuncoids, rustate->hashfunctions, + node->dupCollations, node->numGroups, 0, rustate->tableContext, diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c index 5d8c8b8b02..5a86bbcc95 100644 --- a/src/backend/executor/nodeSetOp.c +++ b/src/backend/executor/nodeSetOp.c @@ -132,6 +132,7 @@ build_hash_table(SetOpState *setopstate) node->dupColIdx, setopstate->eqfuncoids, setopstate->hashfunctions, + node->dupCollations, node->numGroups, 0, setopstate->tableContext, @@ -553,6 +554,7 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags) node->numCols, node->dupColIdx, node->dupOperators, + node->dupCollations, &setopstate->ps); if (node->strategy == SETOP_HASHED) diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index 574e7bc4fa..6d2a66ec67 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -511,6 +511,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) node->keyColIdx, node->tab_eq_funcoids, node->tab_hash_funcs, + node->tab_collations, nbuckets, 0, node->hashtablecxt, @@ -533,6 +534,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) node->keyColIdx, node->tab_eq_funcoids, node->tab_hash_funcs, + node->tab_collations, nbuckets, 0, node->hashtablecxt, @@ -633,6 +635,7 @@ execTuplesUnequal(TupleTableSlot *slot1, int numCols, AttrNumber *matchColIdx, FmgrInfo *eqfunctions, + const Oid *collations, MemoryContext evalContext) { MemoryContext oldContext; @@ -670,8 +673,8 @@ execTuplesUnequal(TupleTableSlot *slot1, continue; /* can't prove anything here */ /* Apply the type-specific equality function */ - - if (!DatumGetBool(FunctionCall2(&eqfunctions[i], + if (!DatumGetBool(FunctionCall2Coll(&eqfunctions[i], + collations[i], attr1, attr2))) { result = true; /* they are unequal */ @@ -713,6 +716,7 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot, if (!execTuplesUnequal(slot, hashtable->tableslot, numCols, keyColIdx, eqfunctions, + hashtable->tab_collations, hashtable->tempcxt)) { TermTupleHashIterator(&hashiter); @@ -808,6 +812,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) sstate->tab_eq_funcoids = NULL; sstate->tab_hash_funcs = NULL; sstate->tab_eq_funcs = NULL; + sstate->tab_collations = NULL; sstate->lhs_hash_funcs = NULL; sstate->cur_eq_funcs = NULL; @@ -906,6 +911,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) sstate->tab_eq_funcoids = (Oid *) palloc(ncols * sizeof(Oid)); sstate->tab_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); sstate->tab_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); + sstate->tab_collations = (Oid *) palloc(ncols * sizeof(Oid)); sstate->lhs_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); sstate->cur_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); i = 1; @@ -956,6 +962,9 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) fmgr_info(left_hashfn, &sstate->lhs_hash_funcs[i - 1]); fmgr_info(right_hashfn, &sstate->tab_hash_funcs[i - 1]); + /* Set collation */ + sstate->tab_collations[i - 1] = opexpr->inputcollid; + i++; } @@ -992,6 +1001,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) ncols, sstate->keyColIdx, sstate->tab_eq_funcoids, + sstate->tab_collations, parent); } diff --git a/src/backend/executor/nodeUnique.c b/src/backend/executor/nodeUnique.c index ad7039937d..c553f150b8 100644 --- a/src/backend/executor/nodeUnique.c +++ b/src/backend/executor/nodeUnique.c @@ -152,6 +152,7 @@ ExecInitUnique(Unique *node, EState *estate, int eflags) node->numCols, node->uniqColIdx, node->uniqOperators, + node->uniqCollations, &uniquestate->ps); return uniquestate; diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index 7ae56074ca..4942003e82 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -2370,6 +2370,7 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) node->partNumCols, node->partColIdx, node->partOperators, + node->partCollations, &winstate->ss.ps); if (node->ordNumCols > 0) @@ -2378,6 +2379,7 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) node->ordNumCols, node->ordColIdx, node->ordOperators, + node->ordCollations, &winstate->ss.ps); /* diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 3eb7e95d64..8296da047c 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -297,6 +297,7 @@ _copyRecursiveUnion(const RecursiveUnion *from) { COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(dupCollations, from->numCols * sizeof(Oid)); } COPY_SCALAR_FIELD(numGroups); @@ -956,6 +957,7 @@ _copyGroup(const Group *from) COPY_SCALAR_FIELD(numCols); COPY_POINTER_FIELD(grpColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(grpCollations, from->numCols * sizeof(Oid)); return newnode; } @@ -977,6 +979,7 @@ _copyAgg(const Agg *from) { COPY_POINTER_FIELD(grpColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(grpCollations, from->numCols * sizeof(Oid)); } COPY_SCALAR_FIELD(numGroups); COPY_BITMAPSET_FIELD(aggParams); @@ -1002,12 +1005,14 @@ _copyWindowAgg(const WindowAgg *from) { COPY_POINTER_FIELD(partColIdx, from->partNumCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(partOperators, from->partNumCols * sizeof(Oid)); + COPY_POINTER_FIELD(partCollations, from->partNumCols * sizeof(Oid)); } COPY_SCALAR_FIELD(ordNumCols); if (from->ordNumCols > 0) { COPY_POINTER_FIELD(ordColIdx, from->ordNumCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(ordOperators, from->ordNumCols * sizeof(Oid)); + COPY_POINTER_FIELD(ordCollations, from->ordNumCols * sizeof(Oid)); } COPY_SCALAR_FIELD(frameOptions); COPY_NODE_FIELD(startOffset); @@ -1040,6 +1045,7 @@ _copyUnique(const Unique *from) COPY_SCALAR_FIELD(numCols); COPY_POINTER_FIELD(uniqColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(uniqOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(uniqCollations, from->numCols * sizeof(Oid)); return newnode; } @@ -1089,6 +1095,7 @@ _copySetOp(const SetOp *from) COPY_SCALAR_FIELD(numCols); COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(dupCollations, from->numCols * sizeof(Oid)); COPY_SCALAR_FIELD(flagColIdx); COPY_SCALAR_FIELD(firstFlag); COPY_SCALAR_FIELD(numGroups); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 0fde876c77..0af43aeb47 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -463,6 +463,7 @@ _outRecursiveUnion(StringInfo str, const RecursiveUnion *node) WRITE_INT_FIELD(numCols); WRITE_ATTRNUMBER_ARRAY(dupColIdx, node->numCols); WRITE_OID_ARRAY(dupOperators, node->numCols); + WRITE_OID_ARRAY(dupCollations, node->numCols); WRITE_LONG_FIELD(numGroups); } @@ -774,6 +775,7 @@ _outAgg(StringInfo str, const Agg *node) WRITE_INT_FIELD(numCols); WRITE_ATTRNUMBER_ARRAY(grpColIdx, node->numCols); WRITE_OID_ARRAY(grpOperators, node->numCols); + WRITE_OID_ARRAY(grpCollations, node->numCols); WRITE_LONG_FIELD(numGroups); WRITE_BITMAPSET_FIELD(aggParams); WRITE_NODE_FIELD(groupingSets); @@ -791,9 +793,11 @@ _outWindowAgg(StringInfo str, const WindowAgg *node) WRITE_INT_FIELD(partNumCols); WRITE_ATTRNUMBER_ARRAY(partColIdx, node->partNumCols); WRITE_OID_ARRAY(partOperators, node->partNumCols); + WRITE_OID_ARRAY(partCollations, node->partNumCols); WRITE_INT_FIELD(ordNumCols); WRITE_ATTRNUMBER_ARRAY(ordColIdx, node->ordNumCols); WRITE_OID_ARRAY(ordOperators, node->ordNumCols); + WRITE_OID_ARRAY(ordCollations, node->ordNumCols); WRITE_INT_FIELD(frameOptions); WRITE_NODE_FIELD(startOffset); WRITE_NODE_FIELD(endOffset); @@ -814,6 +818,7 @@ _outGroup(StringInfo str, const Group *node) WRITE_INT_FIELD(numCols); WRITE_ATTRNUMBER_ARRAY(grpColIdx, node->numCols); WRITE_OID_ARRAY(grpOperators, node->numCols); + WRITE_OID_ARRAY(grpCollations, node->numCols); } static void @@ -848,6 +853,7 @@ _outUnique(StringInfo str, const Unique *node) WRITE_INT_FIELD(numCols); WRITE_ATTRNUMBER_ARRAY(uniqColIdx, node->numCols); WRITE_OID_ARRAY(uniqOperators, node->numCols); + WRITE_OID_ARRAY(uniqCollations, node->numCols); } static void @@ -875,6 +881,7 @@ _outSetOp(StringInfo str, const SetOp *node) WRITE_INT_FIELD(numCols); WRITE_ATTRNUMBER_ARRAY(dupColIdx, node->numCols); WRITE_OID_ARRAY(dupOperators, node->numCols); + WRITE_OID_ARRAY(dupCollations, node->numCols); WRITE_INT_FIELD(flagColIdx); WRITE_INT_FIELD(firstFlag); WRITE_LONG_FIELD(numGroups); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index ec6f2569ab..7e40c2990b 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1672,6 +1672,7 @@ _readRecursiveUnion(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols); READ_OID_ARRAY(dupOperators, local_node->numCols); + READ_OID_ARRAY(dupCollations, local_node->numCols); READ_LONG_FIELD(numGroups); READ_DONE(); @@ -2138,6 +2139,7 @@ _readGroup(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols); READ_OID_ARRAY(grpOperators, local_node->numCols); + READ_OID_ARRAY(grpCollations, local_node->numCols); READ_DONE(); } @@ -2157,6 +2159,7 @@ _readAgg(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols); READ_OID_ARRAY(grpOperators, local_node->numCols); + READ_OID_ARRAY(grpCollations, local_node->numCols); READ_LONG_FIELD(numGroups); READ_BITMAPSET_FIELD(aggParams); READ_NODE_FIELD(groupingSets); @@ -2179,9 +2182,11 @@ _readWindowAgg(void) READ_INT_FIELD(partNumCols); READ_ATTRNUMBER_ARRAY(partColIdx, local_node->partNumCols); READ_OID_ARRAY(partOperators, local_node->partNumCols); + READ_OID_ARRAY(partCollations, local_node->partNumCols); READ_INT_FIELD(ordNumCols); READ_ATTRNUMBER_ARRAY(ordColIdx, local_node->ordNumCols); READ_OID_ARRAY(ordOperators, local_node->ordNumCols); + READ_OID_ARRAY(ordCollations, local_node->ordNumCols); READ_INT_FIELD(frameOptions); READ_NODE_FIELD(startOffset); READ_NODE_FIELD(endOffset); @@ -2207,6 +2212,7 @@ _readUnique(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(uniqColIdx, local_node->numCols); READ_OID_ARRAY(uniqOperators, local_node->numCols); + READ_OID_ARRAY(uniqCollations, local_node->numCols); READ_DONE(); } @@ -2285,6 +2291,7 @@ _readSetOp(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols); READ_OID_ARRAY(dupOperators, local_node->numCols); + READ_OID_ARRAY(dupCollations, local_node->numCols); READ_INT_FIELD(flagColIdx); READ_INT_FIELD(firstFlag); READ_LONG_FIELD(numGroups); diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 97d0c28132..61e9dc5938 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -253,14 +253,14 @@ static Sort *make_sort_from_groupcols(List *groupcls, Plan *lefttree); static Material *make_material(Plan *lefttree); static WindowAgg *make_windowagg(List *tlist, Index winref, - int partNumCols, AttrNumber *partColIdx, Oid *partOperators, - int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, + int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, + int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, int frameOptions, Node *startOffset, Node *endOffset, Oid startInRangeFunc, Oid endInRangeFunc, Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst, Plan *lefttree); static Group *make_group(List *tlist, List *qual, int numGroupCols, - AttrNumber *grpColIdx, Oid *grpOperators, + AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, Plan *lefttree); static Unique *make_unique_from_sortclauses(Plan *lefttree, List *distinctList); static Unique *make_unique_from_pathkeys(Plan *lefttree, @@ -1352,6 +1352,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) bool newitems; int numGroupCols; AttrNumber *groupColIdx; + Oid *groupCollations; int groupColPos; ListCell *l; @@ -1418,6 +1419,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) newtlist = subplan->targetlist; numGroupCols = list_length(uniq_exprs); groupColIdx = (AttrNumber *) palloc(numGroupCols * sizeof(AttrNumber)); + groupCollations = (Oid *) palloc(numGroupCols * sizeof(Oid)); groupColPos = 0; foreach(l, uniq_exprs) @@ -1428,7 +1430,9 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) tle = tlist_member(uniqexpr, newtlist); if (!tle) /* shouldn't happen */ elog(ERROR, "failed to find unique expression in subplan tlist"); - groupColIdx[groupColPos++] = tle->resno; + groupColIdx[groupColPos] = tle->resno; + groupCollations[groupColPos] = exprCollation((Node *) tle->expr); + groupColPos++; } if (best_path->umethod == UNIQUE_PATH_HASH) @@ -1466,6 +1470,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) numGroupCols, groupColIdx, groupOperators, + groupCollations, NIL, NIL, best_path->path.rows, @@ -1848,6 +1853,8 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) extract_grouping_cols(best_path->groupClause, subplan->targetlist), extract_grouping_ops(best_path->groupClause), + extract_grouping_collations(best_path->groupClause, + subplan->targetlist), subplan); copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -1914,6 +1921,8 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) extract_grouping_cols(best_path->groupClause, subplan->targetlist), extract_grouping_ops(best_path->groupClause), + extract_grouping_collations(best_path->groupClause, + subplan->targetlist), NIL, NIL, best_path->numGroups, @@ -2075,6 +2084,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) list_length((List *) linitial(rollup->gsets)), new_grpColIdx, extract_grouping_ops(rollup->groupClause), + extract_grouping_collations(rollup->groupClause, subplan->targetlist), rollup->gsets, NIL, rollup->numGroups, @@ -2112,6 +2122,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) numGroupCols, top_grpColIdx, extract_grouping_ops(rollup->groupClause), + extract_grouping_collations(rollup->groupClause, subplan->targetlist), rollup->gsets, chain, rollup->numGroups, @@ -2211,9 +2222,11 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) int partNumCols; AttrNumber *partColIdx; Oid *partOperators; + Oid *partCollations; int ordNumCols; AttrNumber *ordColIdx; Oid *ordOperators; + Oid *ordCollations; ListCell *lc; /* @@ -2235,6 +2248,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) */ partColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numPart); partOperators = (Oid *) palloc(sizeof(Oid) * numPart); + partCollations = (Oid *) palloc(sizeof(Oid) * numPart); partNumCols = 0; foreach(lc, wc->partitionClause) @@ -2245,11 +2259,13 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) Assert(OidIsValid(sgc->eqop)); partColIdx[partNumCols] = tle->resno; partOperators[partNumCols] = sgc->eqop; + partCollations[partNumCols] = exprCollation((Node *) tle->expr); partNumCols++; } ordColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numOrder); ordOperators = (Oid *) palloc(sizeof(Oid) * numOrder); + ordCollations = (Oid *) palloc(sizeof(Oid) * numOrder); ordNumCols = 0; foreach(lc, wc->orderClause) @@ -2260,6 +2276,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) Assert(OidIsValid(sgc->eqop)); ordColIdx[ordNumCols] = tle->resno; ordOperators[ordNumCols] = sgc->eqop; + ordCollations[ordNumCols] = exprCollation((Node *) tle->expr); ordNumCols++; } @@ -2269,9 +2286,11 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) partNumCols, partColIdx, partOperators, + partCollations, ordNumCols, ordColIdx, ordOperators, + ordCollations, wc->frameOptions, wc->startOffset, wc->endOffset, @@ -5292,10 +5311,12 @@ make_recursive_union(List *tlist, int keyno = 0; AttrNumber *dupColIdx; Oid *dupOperators; + Oid *dupCollations; ListCell *slitem; dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); dupOperators = (Oid *) palloc(sizeof(Oid) * numCols); + dupCollations = (Oid *) palloc(sizeof(Oid) * numCols); foreach(slitem, distinctList) { @@ -5305,11 +5326,13 @@ make_recursive_union(List *tlist, dupColIdx[keyno] = tle->resno; dupOperators[keyno] = sortcl->eqop; + dupCollations[keyno] = exprCollation((Node *) tle->expr); Assert(OidIsValid(dupOperators[keyno])); keyno++; } node->dupColIdx = dupColIdx; node->dupOperators = dupOperators; + node->dupCollations = dupCollations; } node->numGroups = numGroups; @@ -5981,7 +6004,7 @@ materialize_finished_plan(Plan *subplan) Agg * make_agg(List *tlist, List *qual, AggStrategy aggstrategy, AggSplit aggsplit, - int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, + int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, List *groupingSets, List *chain, double dNumGroups, Plan *lefttree) { @@ -5997,6 +6020,7 @@ make_agg(List *tlist, List *qual, node->numCols = numGroupCols; node->grpColIdx = grpColIdx; node->grpOperators = grpOperators; + node->grpCollations = grpCollations; node->numGroups = numGroups; node->aggParams = NULL; /* SS_finalize_plan() will fill this */ node->groupingSets = groupingSets; @@ -6012,8 +6036,8 @@ make_agg(List *tlist, List *qual, static WindowAgg * make_windowagg(List *tlist, Index winref, - int partNumCols, AttrNumber *partColIdx, Oid *partOperators, - int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, + int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, + int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, int frameOptions, Node *startOffset, Node *endOffset, Oid startInRangeFunc, Oid endInRangeFunc, Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst, @@ -6026,9 +6050,11 @@ make_windowagg(List *tlist, Index winref, node->partNumCols = partNumCols; node->partColIdx = partColIdx; node->partOperators = partOperators; + node->partCollations = partCollations; node->ordNumCols = ordNumCols; node->ordColIdx = ordColIdx; node->ordOperators = ordOperators; + node->ordCollations = ordCollations; node->frameOptions = frameOptions; node->startOffset = startOffset; node->endOffset = endOffset; @@ -6053,6 +6079,7 @@ make_group(List *tlist, int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, + Oid *grpCollations, Plan *lefttree) { Group *node = makeNode(Group); @@ -6061,6 +6088,7 @@ make_group(List *tlist, node->numCols = numGroupCols; node->grpColIdx = grpColIdx; node->grpOperators = grpOperators; + node->grpCollations = grpCollations; plan->qual = qual; plan->targetlist = tlist; @@ -6084,6 +6112,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) int keyno = 0; AttrNumber *uniqColIdx; Oid *uniqOperators; + Oid *uniqCollations; ListCell *slitem; plan->targetlist = lefttree->targetlist; @@ -6098,6 +6127,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) Assert(numCols > 0); uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols); + uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols); foreach(slitem, distinctList) { @@ -6106,6 +6136,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) uniqColIdx[keyno] = tle->resno; uniqOperators[keyno] = sortcl->eqop; + uniqCollations[keyno] = exprCollation((Node *) tle->expr); Assert(OidIsValid(uniqOperators[keyno])); keyno++; } @@ -6113,6 +6144,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) node->numCols = numCols; node->uniqColIdx = uniqColIdx; node->uniqOperators = uniqOperators; + node->uniqCollations = uniqCollations; return node; } @@ -6128,6 +6160,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) int keyno = 0; AttrNumber *uniqColIdx; Oid *uniqOperators; + Oid *uniqCollations; ListCell *lc; plan->targetlist = lefttree->targetlist; @@ -6143,6 +6176,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) Assert(numCols >= 0 && numCols <= list_length(pathkeys)); uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols); + uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols); foreach(lc, pathkeys) { @@ -6211,6 +6245,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) uniqColIdx[keyno] = tle->resno; uniqOperators[keyno] = eqop; + uniqCollations[keyno] = ec->ec_collation; keyno++; } @@ -6218,6 +6253,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) node->numCols = numCols; node->uniqColIdx = uniqColIdx; node->uniqOperators = uniqOperators; + node->uniqCollations = uniqCollations; return node; } @@ -6262,6 +6298,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, int keyno = 0; AttrNumber *dupColIdx; Oid *dupOperators; + Oid *dupCollations; ListCell *slitem; plan->targetlist = lefttree->targetlist; @@ -6275,6 +6312,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, */ dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); dupOperators = (Oid *) palloc(sizeof(Oid) * numCols); + dupCollations = (Oid *) palloc(sizeof(Oid) * numCols); foreach(slitem, distinctList) { @@ -6283,6 +6321,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, dupColIdx[keyno] = tle->resno; dupOperators[keyno] = sortcl->eqop; + dupCollations[keyno] = exprCollation((Node *) tle->expr); Assert(OidIsValid(dupOperators[keyno])); keyno++; } @@ -6292,6 +6331,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, node->numCols = numCols; node->dupColIdx = dupColIdx; node->dupOperators = dupOperators; + node->dupCollations = dupCollations; node->flagColIdx = flagColIdx; node->firstFlag = firstFlag; node->numGroups = numGroups; diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index d0cc14f11d..a673819890 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -502,6 +502,31 @@ extract_grouping_ops(List *groupClause) return groupOperators; } +/* + * extract_grouping_collations - make an array of the grouping column collations + * for a SortGroupClause list + */ +Oid * +extract_grouping_collations(List *groupClause, List *tlist) +{ + int numCols = list_length(groupClause); + int colno = 0; + Oid *grpCollations; + ListCell *glitem; + + grpCollations = (Oid *) palloc(sizeof(Oid) * numCols); + + foreach(glitem, groupClause) + { + SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); + TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist); + + grpCollations[colno++] = exprCollation((Node *) tle->expr); + } + + return grpCollations; +} + /* * extract_grouping_cols - make an array of the grouping column resnos * for a SortGroupClause list diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c index f21c9b32a6..22d5d7e7a8 100644 --- a/src/backend/partitioning/partbounds.c +++ b/src/backend/partitioning/partbounds.c @@ -2658,7 +2658,7 @@ get_range_nulltest(PartitionKey key) * Compute the hash value for given partition key values. */ uint64 -compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, +compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation, Datum *values, bool *isnull) { int i; @@ -2679,7 +2679,7 @@ compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, * datatype-specific hash functions of each partition key * attribute. */ - hash = FunctionCall2(&partsupfunc[i], values[i], seed); + hash = FunctionCall2Coll(&partsupfunc[i], partcollation[i], values[i], seed); /* Form a single 64-bit hash value */ rowHash = hash_combine64(rowHash, DatumGetUInt64(hash)); diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index 901433c68c..ee2cce452e 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -2158,6 +2158,7 @@ get_matching_hash_bounds(PartitionPruneContext *context, int i; uint64 rowHash; int greatest_modulus; + Oid *partcollation = context->partcollation; Assert(context->strategy == PARTITION_STRATEGY_HASH); @@ -2178,7 +2179,7 @@ get_matching_hash_bounds(PartitionPruneContext *context, isnull[i] = bms_is_member(i, nullkeys); greatest_modulus = get_hash_partition_greatest_modulus(boundinfo); - rowHash = compute_partition_hash_value(partnatts, partsupfunc, + rowHash = compute_partition_hash_value(partnatts, partsupfunc, partcollation, values, isnull); if (partindices[rowHash % greatest_modulus] >= 0) diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c index a8c0b156fa..4a808b7606 100644 --- a/src/backend/regex/regc_pg_locale.c +++ b/src/backend/regex/regc_pg_locale.c @@ -263,6 +263,11 @@ pg_set_regex_collation(Oid collation) errhint("Use the COLLATE clause to set the collation explicitly."))); } + if (pg_regex_locale && !pg_regex_locale->deterministic) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for regular expressions"))); + #ifdef USE_ICU if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_ICU) pg_regex_strategy = PG_REGEX_LOCALE_ICU; diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index e457d81f23..f6567075ba 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -3954,7 +3954,7 @@ hash_array(PG_FUNCTION_ARGS) * apply the hash function to each array element. */ InitFunctionCallInfoData(locfcinfo, &typentry->hash_proc_finfo, 1, - InvalidOid, NULL, NULL); + PG_GET_COLLATION(), NULL, NULL); /* Loop over source data */ nitems = ArrayGetNItems(ndims, dims); diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 46c4fc727c..43060e5288 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -45,7 +45,7 @@ static int UTF8_MatchText(char *t, int tlen, char *p, int plen, static int SB_IMatchText(char *t, int tlen, char *p, int plen, pg_locale_t locale, bool locale_is_c); -static int GenericMatchText(char *s, int slen, char *p, int plen); +static int GenericMatchText(char *s, int slen, char *p, int plen, Oid collation); static int Generic_Text_IC_like(text *str, text *pat, Oid collation); /*-------------------- @@ -148,8 +148,18 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c) /* Generic for all cases not requiring inline case-folding */ static inline int -GenericMatchText(char *s, int slen, char *p, int plen) +GenericMatchText(char *s, int slen, char *p, int plen, Oid collation) { + if (collation) + { + pg_locale_t locale = pg_newlocale_from_collation(collation); + + if (locale && !locale->deterministic) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for LIKE"))); + } + if (pg_database_encoding_max_length() == 1) return SB_MatchText(s, slen, p, plen, 0, true); else if (GetDatabaseEncoding() == PG_UTF8) @@ -184,6 +194,11 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) errhint("Use the COLLATE clause to set the collation explicitly."))); } locale = pg_newlocale_from_collation(collation); + + if (locale && !locale->deterministic) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for ILIKE"))); } /* @@ -240,7 +255,7 @@ namelike(PG_FUNCTION_ARGS) p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); - result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE); + result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE); PG_RETURN_BOOL(result); } @@ -261,7 +276,7 @@ namenlike(PG_FUNCTION_ARGS) p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); - result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE); + result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE); PG_RETURN_BOOL(result); } @@ -282,7 +297,7 @@ textlike(PG_FUNCTION_ARGS) p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); - result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE); + result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE); PG_RETURN_BOOL(result); } @@ -303,7 +318,7 @@ textnlike(PG_FUNCTION_ARGS) p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); - result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE); + result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE); PG_RETURN_BOOL(result); } diff --git a/src/backend/utils/adt/name.c b/src/backend/utils/adt/name.c index 3a7887d455..54425925ed 100644 --- a/src/backend/utils/adt/name.c +++ b/src/backend/utils/adt/name.c @@ -131,14 +131,26 @@ namesend(PG_FUNCTION_ARGS) * have a '\0' terminator. Whatever might be past the terminator is not * considered relevant to comparisons. */ +static int +namecmp(Name arg1, Name arg2, Oid collid) +{ + /* Fast path for common case used in system catalogs */ + if (collid == C_COLLATION_OID) + return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN); + + /* Else rely on the varstr infrastructure */ + return varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)), + NameStr(*arg2), strlen(NameStr(*arg2)), + collid); +} + Datum nameeq(PG_FUNCTION_ARGS) { Name arg1 = PG_GETARG_NAME(0); Name arg2 = PG_GETARG_NAME(1); - /* Collation doesn't matter: equal only if bitwise-equal */ - PG_RETURN_BOOL(strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) == 0); + PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) == 0); } Datum @@ -147,21 +159,7 @@ namene(PG_FUNCTION_ARGS) Name arg1 = PG_GETARG_NAME(0); Name arg2 = PG_GETARG_NAME(1); - /* Collation doesn't matter: equal only if bitwise-equal */ - PG_RETURN_BOOL(strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) != 0); -} - -static int -namecmp(Name arg1, Name arg2, Oid collid) -{ - /* Fast path for common case used in system catalogs */ - if (collid == C_COLLATION_OID) - return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN); - - /* Else rely on the varstr infrastructure */ - return varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)), - NameStr(*arg2), strlen(NameStr(*arg2)), - collid); + PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) != 0); } Datum diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c index 6075bc3db7..0165b23a6e 100644 --- a/src/backend/utils/adt/orderedsetaggs.c +++ b/src/backend/utils/adt/orderedsetaggs.c @@ -1084,7 +1084,7 @@ mode_final(PG_FUNCTION_ARGS) last_abbrev_val = abbrev_val; } else if (abbrev_val == last_abbrev_val && - DatumGetBool(FunctionCall2(equalfn, val, last_val))) + DatumGetBool(FunctionCall2Coll(equalfn, PG_GET_COLLATION(), val, last_val))) { /* value equal to previous value, count it */ if (last_val_is_mode) @@ -1345,6 +1345,7 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS) numDistinctCols, sortColIdx, osastate->qstate->eqOperators, + osastate->qstate->sortCollations, NULL); MemoryContextSwitchTo(oldContext); osastate->qstate->compareTuple = compareTuple; diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 50b8b31645..1f7fdc0593 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1308,6 +1308,7 @@ pg_newlocale_from_collation(Oid collid) /* We'll fill in the result struct locally before allocating memory */ memset(&result, 0, sizeof(result)); result.provider = collform->collprovider; + result.deterministic = collform->collisdeterministic; if (collform->collprovider == COLLPROVIDER_LIBC) { diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index e1aa3d0044..55488f1192 100644 --- a/src/backend/utils/adt/ri_triggers.c +++ b/src/backend/utils/adt/ri_triggers.c @@ -778,6 +778,8 @@ ri_restrict(TriggerData *trigdata, bool is_no_action) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -786,6 +788,8 @@ ri_restrict(TriggerData *trigdata, bool is_no_action) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); querysep = "AND"; queryoids[i] = pk_type; } @@ -918,6 +922,8 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -926,6 +932,8 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); querysep = "AND"; queryoids[i] = pk_type; } @@ -1069,6 +1077,8 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -1080,6 +1090,8 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); querysep = ","; qualsep = "AND"; queryoids[i] = pk_type; @@ -1257,6 +1269,8 @@ ri_setnull(TriggerData *trigdata) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -1268,6 +1282,8 @@ ri_setnull(TriggerData *trigdata) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); querysep = ","; qualsep = "AND"; queryoids[i] = pk_type; @@ -1444,6 +1460,8 @@ ri_setdefault(TriggerData *trigdata) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -1455,6 +1473,8 @@ ri_setdefault(TriggerData *trigdata) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); querysep = ","; qualsep = "AND"; queryoids[i] = pk_type; @@ -2901,11 +2921,20 @@ ri_AttributesEqual(Oid eq_opr, Oid typeid, } /* - * Apply the comparison operator. We assume it doesn't care about - * collations. - */ - return DatumGetBool(FunctionCall2(&entry->eq_opr_finfo, - oldvalue, newvalue)); + * Apply the comparison operator. + * + * Note: This function is part of a call stack that determines whether an + * update to a row is significant enough that it needs checking or action + * on the other side of a foreign-key constraint. Therefore, the + * comparison here would need to be done with the collation of the *other* + * table. For simplicity (e.g., we might not even have the other table + * open), we'll just use the default collation here, which could lead to + * some false negatives. All this would break if we ever allow + * database-wide collations to be nondeterministic. + */ + return DatumGetBool(FunctionCall2Coll(&entry->eq_opr_finfo, + DEFAULT_COLLATION_OID, + oldvalue, newvalue)); } /* ---------- diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c index 5cf927e27f..d43fbe9a03 100644 --- a/src/backend/utils/adt/varchar.c +++ b/src/backend/utils/adt/varchar.c @@ -23,6 +23,7 @@ #include "nodes/nodeFuncs.h" #include "utils/array.h" #include "utils/builtins.h" +#include "utils/pg_locale.h" #include "utils/varlena.h" #include "mb/pg_wchar.h" @@ -708,6 +709,22 @@ bpcharoctetlen(PG_FUNCTION_ARGS) * need to be so careful. *****************************************************************************/ +static void +check_collation_set(Oid collid) +{ + if (!OidIsValid(collid)) + { + /* + * This typically means that the parser could not resolve a conflict + * of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string comparison"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } +} + Datum bpchareq(PG_FUNCTION_ARGS) { @@ -716,10 +733,19 @@ bpchareq(PG_FUNCTION_ARGS) int len1, len2; bool result; + Oid collid = PG_GET_COLLATION(); + + check_collation_set(collid); len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); + if (collid != DEFAULT_COLLATION_OID && + !pg_newlocale_from_collation(collid)->deterministic) + result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + collid) == 0); + else + { /* * Since we only care about equality or not-equality, we can avoid all the * expense of strcoll() here, and just do bitwise comparison. @@ -728,6 +754,7 @@ bpchareq(PG_FUNCTION_ARGS) result = false; else result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0); + } PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -743,10 +770,17 @@ bpcharne(PG_FUNCTION_ARGS) int len1, len2; bool result; + Oid collid = PG_GET_COLLATION(); len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); + if (collid != DEFAULT_COLLATION_OID && + !pg_newlocale_from_collation(collid)->deterministic) + result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, + collid) != 0); + else + { /* * Since we only care about equality or not-equality, we can avoid all the * expense of strcoll() here, and just do bitwise comparison. @@ -755,6 +789,7 @@ bpcharne(PG_FUNCTION_ARGS) result = true; else result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0); + } PG_FREE_IF_COPY(arg1, 0); PG_FREE_IF_COPY(arg2, 1); @@ -933,13 +968,53 @@ Datum hashbpchar(PG_FUNCTION_ARGS) { BpChar *key = PG_GETARG_BPCHAR_PP(0); + Oid collid = PG_GET_COLLATION(); char *keydata; int keylen; Datum result; + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + keydata = VARDATA_ANY(key); keylen = bcTruelen(key); + if (collid != DEFAULT_COLLATION_OID) + { + pg_locale_t mylocale = pg_newlocale_from_collation(collid); + + if (mylocale && !mylocale->deterministic) + { + if (mylocale->provider == COLLPROVIDER_ICU) + { +#ifdef USE_ICU + int32_t ulen = -1; + UChar *uchar = NULL; + Size bsize; + uint8_t *buf; + + ulen = icu_to_uchar(&uchar, keydata, keylen); + + bsize = ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, NULL, 0); + buf = palloc(bsize); + ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, buf, bsize); + + result = hash_any(buf, bsize); + + return result; +#else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", mylocale->provider); +#endif + } + } + } + result = hash_any((unsigned char *) keydata, keylen); /* Avoid leaking memory for toasted inputs */ @@ -952,13 +1027,55 @@ Datum hashbpcharextended(PG_FUNCTION_ARGS) { BpChar *key = PG_GETARG_BPCHAR_PP(0); + Oid collid = PG_GET_COLLATION(); char *keydata; int keylen; Datum result; + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + keydata = VARDATA_ANY(key); keylen = bcTruelen(key); + if (collid != DEFAULT_COLLATION_OID) + { + pg_locale_t mylocale = pg_newlocale_from_collation(collid); + + if (mylocale && !mylocale->deterministic) + { + if (mylocale->provider == COLLPROVIDER_ICU) + { +#ifdef USE_ICU + int32_t ulen = -1; + UChar *uchar = NULL; + Size bsize; + uint8_t *buf; + + ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); + + bsize = ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, NULL, 0); + buf = palloc(bsize); + ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, buf, bsize); + + result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1)); + + PG_FREE_IF_COPY(key, 0); + + return result; +#else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", mylocale->provider); +#endif + } + } + } + result = hash_any_extended((unsigned char *) keydata, keylen, PG_GETARG_INT64(1)); diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 801d8c5c29..5bd65e422f 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -1373,6 +1373,22 @@ text_position_cleanup(TextPositionState *state) } } +static void +check_collation_set(Oid collid) +{ + if (!OidIsValid(collid)) + { + /* + * This typically means that the parser could not resolve a conflict + * of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string comparison"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } +} + /* varstr_cmp() * Comparison function for text strings with given lengths. * Includes locale support, but must copy strings to temporary memory @@ -1385,6 +1401,8 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) { int result; + check_collation_set(collid); + /* * Unfortunately, there is no strncoll(), so in the non-C locale case we * have to do some memory copying. This turns out to be significantly @@ -1406,20 +1424,7 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) pg_locale_t mylocale = 0; if (collid != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collid)) - { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for string comparison"), - errhint("Use the COLLATE clause to set the collation explicitly."))); - } mylocale = pg_newlocale_from_collation(collid); - } /* * memcmp() can't tell us which of two unequal strings sorts first, @@ -1430,7 +1435,8 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) * equal strings in the input - then we win big by avoiding expensive * collation-aware comparisons. */ - if (len1 == len2 && memcmp(arg1, arg2, len1) == 0) + if ((!mylocale || (mylocale && mylocale->deterministic)) && + len1 == len2 && memcmp(arg1, arg2, len1) == 0) return 0; #ifdef WIN32 @@ -1508,7 +1514,8 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) * reasons, so we follow Perl's lead and sort "equal" strings * according to strcmp (on the UTF-8 representation). */ - if (result == 0) + if (result == 0 && + (!mylocale || (mylocale && mylocale->deterministic))) { result = memcmp(arg1, arg2, Min(len1, len2)); if ((result == 0) && (len1 != len2)) @@ -1599,7 +1606,8 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) * so we follow Perl's lead and sort "equal" strings according to * strcmp(). */ - if (result == 0) + if (result == 0 && + (!mylocale || (mylocale && mylocale->deterministic))) result = strcmp(a1p, a2p); if (a1p != a1buf) @@ -1643,6 +1651,26 @@ text_cmp(text *arg1, text *arg2, Oid collid) Datum texteq(PG_FUNCTION_ARGS) { + Oid collid = PG_GET_COLLATION(); + + check_collation_set(collid); + + if (collid != DEFAULT_COLLATION_OID && + !pg_newlocale_from_collation(collid)->deterministic) + { + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + bool result; + + result = (text_cmp(arg1, arg2, collid) == 0); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); + } + else + { Datum arg1 = PG_GETARG_DATUM(0); Datum arg2 = PG_GETARG_DATUM(1); bool result; @@ -1673,11 +1701,32 @@ texteq(PG_FUNCTION_ARGS) } PG_RETURN_BOOL(result); + } } Datum textne(PG_FUNCTION_ARGS) { + Oid collid = PG_GET_COLLATION(); + + check_collation_set(collid); + + if (collid != DEFAULT_COLLATION_OID && + !pg_newlocale_from_collation(collid)->deterministic) + { + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + bool result; + + result = (text_cmp(arg1, arg2, collid) != 0); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); + } + else + { Datum arg1 = PG_GETARG_DATUM(0); Datum arg2 = PG_GETARG_DATUM(1); bool result; @@ -1702,6 +1751,7 @@ textne(PG_FUNCTION_ARGS) } PG_RETURN_BOOL(result); + } } Datum @@ -1842,6 +1892,8 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid) VarStringSortSupport *sss; pg_locale_t locale = 0; + check_collation_set(collid); + /* * If possible, set ssup->comparator to a function which can be used to * directly compare two datums. If we can do this, we'll avoid the @@ -1878,20 +1930,7 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid) * result. */ if (collid != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collid)) - { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for string comparison"), - errhint("Use the COLLATE clause to set the collation explicitly."))); - } locale = pg_newlocale_from_collation(collid); - } /* * There is a further exception on Windows. When the database @@ -2142,7 +2181,8 @@ varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup) bool arg1_match; /* Fast pre-check for equality, as discussed in varstr_cmp() */ - if (len1 == len2 && memcmp(a1p, a2p, len1) == 0) + if ((!sss->locale || (sss->locale && sss->locale->deterministic)) && + len1 == len2 && memcmp(a1p, a2p, len1) == 0) { /* * No change in buf1 or buf2 contents, so avoid changing last_len1 or @@ -2277,7 +2317,8 @@ varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup) * equal. Believing that would be bad news for a number of reasons, so we * follow Perl's lead and sort "equal" strings according to strcmp(). */ - if (result == 0) + if (result == 0 && + (!sss->locale || (sss->locale && sss->locale->deterministic))) result = strcmp(sss->buf1, sss->buf2); /* Cache result, perhaps saving an expensive strcoll() call next time */ @@ -2704,10 +2745,18 @@ nameeqtext(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); size_t len1 = strlen(NameStr(*arg1)); size_t len2 = VARSIZE_ANY_EXHDR(arg2); + Oid collid = PG_GET_COLLATION(); bool result; - result = (len1 == len2 && - memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0); + check_collation_set(collid); + + if (collid == C_COLLATION_OID) + result = (len1 == len2 && + memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0); + else + result = (varstr_cmp(NameStr(*arg1), len1, + VARDATA_ANY(arg2), len2, + collid) == 0); PG_FREE_IF_COPY(arg2, 1); @@ -2721,10 +2770,18 @@ texteqname(PG_FUNCTION_ARGS) Name arg2 = PG_GETARG_NAME(1); size_t len1 = VARSIZE_ANY_EXHDR(arg1); size_t len2 = strlen(NameStr(*arg2)); + Oid collid = PG_GET_COLLATION(); bool result; - result = (len1 == len2 && - memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0); + check_collation_set(collid); + + if (collid == C_COLLATION_OID) + result = (len1 == len2 && + memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0); + else + result = (varstr_cmp(VARDATA_ANY(arg1), len1, + NameStr(*arg2), len2, + collid) == 0); PG_FREE_IF_COPY(arg1, 0); @@ -2738,10 +2795,18 @@ namenetext(PG_FUNCTION_ARGS) text *arg2 = PG_GETARG_TEXT_PP(1); size_t len1 = strlen(NameStr(*arg1)); size_t len2 = VARSIZE_ANY_EXHDR(arg2); + Oid collid = PG_GET_COLLATION(); bool result; - result = !(len1 == len2 && - memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0); + check_collation_set(collid); + + if (collid == C_COLLATION_OID) + result = !(len1 == len2 && + memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0); + else + result = !(varstr_cmp(NameStr(*arg1), len1, + VARDATA_ANY(arg2), len2, + collid) == 0); PG_FREE_IF_COPY(arg2, 1); @@ -2755,10 +2820,18 @@ textnename(PG_FUNCTION_ARGS) Name arg2 = PG_GETARG_NAME(1); size_t len1 = VARSIZE_ANY_EXHDR(arg1); size_t len2 = strlen(NameStr(*arg2)); + Oid collid = PG_GET_COLLATION(); bool result; - result = !(len1 == len2 && - memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0); + check_collation_set(collid); + + if (collid == C_COLLATION_OID) + result = !(len1 == len2 && + memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0); + else + result = !(varstr_cmp(VARDATA_ANY(arg1), len1, + NameStr(*arg2), len2, + collid) == 0); PG_FREE_IF_COPY(arg1, 0); @@ -4494,11 +4567,12 @@ split_text(PG_FUNCTION_ARGS) * Convenience function to return true when two text params are equal. */ static bool -text_isequal(text *txt1, text *txt2) +text_isequal(text *txt1, text *txt2, Oid collid) { - return DatumGetBool(DirectFunctionCall2(texteq, - PointerGetDatum(txt1), - PointerGetDatum(txt2))); + return DatumGetBool(DirectFunctionCall2Coll(texteq, + collid, + PointerGetDatum(txt1), + PointerGetDatum(txt2))); } /* @@ -4604,7 +4678,7 @@ text_to_array_internal(PG_FUNCTION_ARGS) text_position_cleanup(&state); /* single element can be a NULL too */ - is_null = null_string ? text_isequal(inputstring, null_string) : false; + is_null = null_string ? text_isequal(inputstring, null_string, PG_GET_COLLATION()) : false; elems[0] = PointerGetDatum(inputstring); nulls[0] = is_null; @@ -4639,7 +4713,7 @@ text_to_array_internal(PG_FUNCTION_ARGS) /* must build a temp text datum to pass to accumArrayResult */ result_text = cstring_to_text_with_len(start_ptr, chunk_len); - is_null = null_string ? text_isequal(result_text, null_string) : false; + is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false; /* stash away this field */ astate = accumArrayResult(astate, @@ -4684,7 +4758,7 @@ text_to_array_internal(PG_FUNCTION_ARGS) /* must build a temp text datum to pass to accumArrayResult */ result_text = cstring_to_text_with_len(start_ptr, chunk_len); - is_null = null_string ? text_isequal(result_text, null_string) : false; + is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false; /* stash away this field */ astate = accumArrayResult(astate, diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 258a1d64cc..85dc0fbf8c 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -181,13 +181,18 @@ int4hashfast(Datum datum) static bool texteqfast(Datum a, Datum b) { - return DatumGetBool(DirectFunctionCall2(texteq, a, b)); + /* + * The use of DEFAULT_COLLATION_OID is fairly arbitrary here. We just + * want to take the fast "deterministic" path in texteq(). + */ + return DatumGetBool(DirectFunctionCall2Coll(texteq, DEFAULT_COLLATION_OID, a, b)); } static uint32 texthashfast(Datum datum) { - return DatumGetInt32(DirectFunctionCall1(hashtext, datum)); + /* analogously here as in texteqfast() */ + return DatumGetInt32(DirectFunctionCall1Coll(hashtext, DEFAULT_COLLATION_OID, datum)); } static bool diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index fba0ee8b84..f3bbaed992 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -908,6 +908,22 @@ get_collation_name(Oid colloid) return NULL; } +bool +get_collation_isdeterministic(Oid colloid) +{ + HeapTuple tp; + Form_pg_collation colltup; + bool result; + + tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(colloid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for collation %u", colloid); + colltup = (Form_pg_collation) GETSTRUCT(tp); + result = colltup->collisdeterministic; + ReleaseSysCache(tp); + return result; +} + /* ---------- CONSTRAINT CACHE ---------- */ /* diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index fd50a809ea..4886090132 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -1765,8 +1765,8 @@ setup_collation(FILE *cmdfd) * in pg_collation.h. But add it before reading system collations, so * that it wins if libc defines a locale named ucs_basic. */ - PG_CMD_PRINTF3("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collencoding, collcollate, collctype)" - "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', %d, 'C', 'C');\n\n", + PG_CMD_PRINTF3("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, collcollate, collctype)" + "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', true, %d, 'C', 'C');\n\n", BOOTSTRAP_SUPERUSERID, COLLPROVIDER_LIBC, PG_UTF8); /* Now import all collations we can find in the operating system */ diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 2b1a94733b..4be0c42175 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -13259,6 +13259,7 @@ dumpCollation(Archive *fout, CollInfo *collinfo) char *qcollname; PGresult *res; int i_collprovider; + int i_collisdeterministic; int i_collcollate; int i_collctype; const char *collprovider; @@ -13276,28 +13277,35 @@ dumpCollation(Archive *fout, CollInfo *collinfo) qcollname = pg_strdup(fmtId(collinfo->dobj.name)); /* Get collation-specific details */ + appendPQExpBuffer(query, "SELECT "); + if (fout->remoteVersion >= 100000) - appendPQExpBuffer(query, "SELECT " + appendPQExpBuffer(query, "collprovider, " - "collcollate, " - "collctype, " - "collversion " - "FROM pg_catalog.pg_collation c " - "WHERE c.oid = '%u'::pg_catalog.oid", - collinfo->dobj.catId.oid); + "collversion, "); else - appendPQExpBuffer(query, "SELECT " + appendPQExpBuffer(query, "'c' AS collprovider, " - "collcollate, " - "collctype, " - "NULL AS collversion " - "FROM pg_catalog.pg_collation c " - "WHERE c.oid = '%u'::pg_catalog.oid", - collinfo->dobj.catId.oid); + "NULL AS collversion, "); + + if (fout->remoteVersion >= 120000) + appendPQExpBuffer(query, + "collisdeterministic, "); + else + appendPQExpBuffer(query, + "true AS collisdeterministic, "); + + appendPQExpBuffer(query, + "collcollate, " + "collctype " + "FROM pg_catalog.pg_collation c " + "WHERE c.oid = '%u'::pg_catalog.oid", + collinfo->dobj.catId.oid); res = ExecuteSqlQueryForSingleRow(fout, query->data); i_collprovider = PQfnumber(res, "collprovider"); + i_collisdeterministic = PQfnumber(res, "collisdeterministic"); i_collcollate = PQfnumber(res, "collcollate"); i_collctype = PQfnumber(res, "collctype"); @@ -13324,6 +13332,9 @@ dumpCollation(Archive *fout, CollInfo *collinfo) "unrecognized collation provider: %s\n", collprovider); + if (strcmp(PQgetvalue(res, 0, i_collisdeterministic), "f") == 0) + appendPQExpBufferStr(q, ", deterministic = false"); + if (strcmp(collcollate, collctype) == 0) { appendPQExpBufferStr(q, ", locale = "); diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 4da6719ce7..b43300c262 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -4092,7 +4092,7 @@ listCollations(const char *pattern, bool verbose, bool showSystem) PQExpBufferData buf; PGresult *res; printQueryOpt myopt = pset.popt; - static const bool translate_columns[] = {false, false, false, false, false, false}; + static const bool translate_columns[] = {false, false, false, false, false, true, false}; if (pset.sversion < 90100) { @@ -4120,6 +4120,21 @@ listCollations(const char *pattern, bool verbose, bool showSystem) appendPQExpBuffer(&buf, ",\n CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\"", gettext_noop("Provider")); + else + appendPQExpBuffer(&buf, + ",\n 'libc' AS \"%s\"", + gettext_noop("Provider")); + + if (pset.sversion >= 120000) + appendPQExpBuffer(&buf, + ",\n CASE WHEN c.collisdeterministic THEN '%s' ELSE '%s' END AS \"%s\"", + gettext_noop("yes"), gettext_noop("no"), + gettext_noop("Deterministic?")); + else + appendPQExpBuffer(&buf, + ",\n '%s' AS \"%s\"", + gettext_noop("yes"), + gettext_noop("Deterministic?")); if (verbose) appendPQExpBuffer(&buf, diff --git a/src/include/catalog/pg_collation.h b/src/include/catalog/pg_collation.h index 10fe711a91..4d2fcb3858 100644 --- a/src/include/catalog/pg_collation.h +++ b/src/include/catalog/pg_collation.h @@ -33,6 +33,7 @@ CATALOG(pg_collation,3456,CollationRelationId) Oid collnamespace; /* OID of namespace containing collation */ Oid collowner; /* owner of collation */ char collprovider; /* see constants below */ + bool collisdeterministic BKI_DEFAULT(t); int32 collencoding; /* encoding for this collation; -1 = "all" */ NameData collcollate; /* LC_COLLATE setting */ NameData collctype; /* LC_CTYPE setting */ @@ -61,6 +62,7 @@ typedef FormData_pg_collation *Form_pg_collation; extern Oid CollationCreate(const char *collname, Oid collnamespace, Oid collowner, char collprovider, + bool collisdeterministic, int32 collencoding, const char *collcollate, const char *collctype, const char *collversion, diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 3831cceedf..76cc5caa61 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -111,6 +111,7 @@ extern ExprState *execTuplesMatchPrepare(TupleDesc desc, int numCols, const AttrNumber *keyColIdx, const Oid *eqOperators, + const Oid *collations, PlanState *parent); extern void execTuplesHashPrepare(int numCols, const Oid *eqOperators, @@ -121,6 +122,7 @@ extern TupleHashTable BuildTupleHashTable(PlanState *parent, int numCols, AttrNumber *keyColIdx, const Oid *eqfuncoids, FmgrInfo *hashfunctions, + Oid *collations, long nbuckets, Size additionalsize, MemoryContext tablecxt, MemoryContext tempcxt, bool use_variable_hash_iv); @@ -247,6 +249,7 @@ extern ExprState *ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, int numCols, const AttrNumber *keyColIdx, const Oid *eqfunctions, + const Oid *collations, PlanState *parent); extern ProjectionInfo *ExecBuildProjectionInfo(List *targetList, ExprContext *econtext, diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h index e7bf158c1b..2c94b926d3 100644 --- a/src/include/executor/hashjoin.h +++ b/src/include/executor/hashjoin.h @@ -337,6 +337,7 @@ typedef struct HashJoinTableData FmgrInfo *outer_hashfunctions; /* lookup data for hash functions */ FmgrInfo *inner_hashfunctions; /* lookup data for hash functions */ bool *hashStrict; /* is each hash join operator strict? */ + Oid *collations; Size spaceUsed; /* memory space currently used by tuples */ Size spaceAllowed; /* upper limit for space used */ diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h index 1309b32b90..1233766023 100644 --- a/src/include/executor/nodeHash.h +++ b/src/include/executor/nodeHash.h @@ -24,7 +24,7 @@ extern Node *MultiExecHash(HashState *node); extern void ExecEndHash(HashState *node); extern void ExecReScanHash(HashState *node); -extern HashJoinTable ExecHashTableCreate(HashState *state, List *hashOperators, +extern HashJoinTable ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, bool keepNulls); extern void ExecParallelHashTableAlloc(HashJoinTable hashtable, int batchno); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 64e8ef3740..73fa8b78be 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -688,6 +688,7 @@ typedef struct TupleHashTableData AttrNumber *keyColIdx; /* attr numbers of key columns */ FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */ ExprState *tab_eq_func; /* comparator for table datatype(s) */ + Oid *tab_collations; /* collations for hash and comparison */ MemoryContext tablecxt; /* memory context containing table */ MemoryContext tempcxt; /* context for function evaluations */ Size entrysize; /* actual size to make each hash entry */ @@ -857,6 +858,7 @@ typedef struct SubPlanState AttrNumber *keyColIdx; /* control data for hash tables */ Oid *tab_eq_funcoids; /* equality func oids for table * datatype(s) */ + Oid *tab_collations; /* collations for hash and comparison */ FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */ FmgrInfo *tab_eq_funcs; /* equality functions for table datatype(s) */ FmgrInfo *lhs_hash_funcs; /* hash functions for lefthand datatype(s) */ @@ -1869,6 +1871,7 @@ typedef struct HashJoinState List *hj_OuterHashKeys; /* list of ExprState nodes */ List *hj_InnerHashKeys; /* list of ExprState nodes */ List *hj_HashOperators; /* list of operator OIDs */ + List *hj_Collations; HashJoinTable hj_HashTable; uint32 hj_CurHashValue; int hj_CurBucketNo; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 6d087c268f..840ba416a0 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -297,6 +297,7 @@ typedef struct RecursiveUnion * duplicate-ness */ AttrNumber *dupColIdx; /* their indexes in the target list */ Oid *dupOperators; /* equality operators to compare with */ + Oid *dupCollations; long numGroups; /* estimated number of groups in input */ } RecursiveUnion; @@ -773,6 +774,7 @@ typedef struct Group int numCols; /* number of grouping columns */ AttrNumber *grpColIdx; /* their indexes in the target list */ Oid *grpOperators; /* equality operators to compare with */ + Oid *grpCollations; } Group; /* --------------- @@ -797,6 +799,7 @@ typedef struct Agg int numCols; /* number of grouping columns */ AttrNumber *grpColIdx; /* their indexes in the target list */ Oid *grpOperators; /* equality operators to compare with */ + Oid *grpCollations; long numGroups; /* estimated number of groups in input */ Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */ /* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */ @@ -815,9 +818,11 @@ typedef struct WindowAgg int partNumCols; /* number of columns in partition clause */ AttrNumber *partColIdx; /* their indexes in the target list */ Oid *partOperators; /* equality operators for partition columns */ + Oid *partCollations; /* collations for partition columns */ int ordNumCols; /* number of columns in ordering clause */ AttrNumber *ordColIdx; /* their indexes in the target list */ Oid *ordOperators; /* equality operators for ordering columns */ + Oid *ordCollations; /* collations for ordering columns */ int frameOptions; /* frame_clause options, see WindowDef */ Node *startOffset; /* expression for starting bound, if any */ Node *endOffset; /* expression for ending bound, if any */ @@ -839,6 +844,7 @@ typedef struct Unique int numCols; /* number of columns to check for uniqueness */ AttrNumber *uniqColIdx; /* their indexes in the target list */ Oid *uniqOperators; /* equality operators to compare with */ + Oid *uniqCollations; /* collations for equality comparisons */ } Unique; /* ------------ @@ -913,6 +919,7 @@ typedef struct SetOp * duplicate-ness */ AttrNumber *dupColIdx; /* their indexes in the target list */ Oid *dupOperators; /* equality operators to compare with */ + Oid *dupCollations; AttrNumber flagColIdx; /* where is the flag column, if any */ int firstFlag; /* flag value for first input relation */ long numGroups; /* estimated number of groups in input */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index bec0c38617..8e8f570e00 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -63,7 +63,7 @@ extern bool is_projection_capable_plan(Plan *plan); extern Sort *make_sort_from_sortclauses(List *sortcls, Plan *lefttree); extern Agg *make_agg(List *tlist, List *qual, AggStrategy aggstrategy, AggSplit aggsplit, - int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, + int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, List *groupingSets, List *chain, double dNumGroups, Plan *lefttree); extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount); diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h index 8b967f9583..70f8454a44 100644 --- a/src/include/optimizer/tlist.h +++ b/src/include/optimizer/tlist.h @@ -48,6 +48,7 @@ extern SortGroupClause *get_sortgroupref_clause_noerr(Index sortref, List *clauses); extern Oid *extract_grouping_ops(List *groupClause); +extern Oid *extract_grouping_collations(List *groupClause, List *tlist); extern AttrNumber *extract_grouping_cols(List *groupClause, List *tlist); extern bool grouping_is_sortable(List *groupClause); extern bool grouping_is_hashable(List *groupClause); diff --git a/src/include/partitioning/partbounds.h b/src/include/partitioning/partbounds.h index b1ae39ad63..683e1574ea 100644 --- a/src/include/partitioning/partbounds.h +++ b/src/include/partitioning/partbounds.h @@ -77,6 +77,7 @@ typedef struct PartitionBoundInfoData extern int get_hash_partition_greatest_modulus(PartitionBoundInfo b); extern uint64 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, + Oid *partcollation, Datum *values, bool *isnull); extern List *get_qual_from_partbound(Relation rel, Relation parent, PartitionBoundSpec *spec); diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index ceec85db92..f622b9e512 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -90,6 +90,7 @@ extern Oid get_atttype(Oid relid, AttrNumber attnum); extern void get_atttypetypmodcoll(Oid relid, AttrNumber attnum, Oid *typid, int32 *typmod, Oid *collid); extern char *get_collation_name(Oid colloid); +extern bool get_collation_isdeterministic(Oid colloid); extern char *get_constraint_name(Oid conoid); extern char *get_language_name(Oid langoid, bool missing_ok); extern Oid get_opclass_family(Oid opclass); diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 606952afd7..a342a62549 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -82,6 +82,7 @@ extern void cache_locale_time(void); struct pg_locale_struct { char provider; + bool deterministic; union { #ifdef HAVE_LOCALE_T diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index f485b5c330..eaf58637f8 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -1100,27 +1100,691 @@ select textrange_en_us('A','Z') @> 'b'::text; drop type textrange_c; drop type textrange_en_us; +-- nondeterministic collations +CREATE COLLATION ctest_det (provider = icu, locale = 'und', deterministic = true); +CREATE COLLATION ctest_nondet (provider = icu, locale = 'und', deterministic = false); +CREATE TABLE test6 (a int, b text); +-- same string in different normal forms +INSERT INTO test6 VALUES (1, U&'\00E4bc'); +INSERT INTO test6 VALUES (2, U&'\0061\0308bc'); +SELECT * FROM test6; + a | b +---+----- + 1 | äbc + 2 | äbc +(2 rows) + +SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_det; + a | b +---+----- + 1 | äbc +(1 row) + +SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_nondet; + a | b +---+----- + 1 | äbc + 2 | äbc +(2 rows) + +CREATE COLLATION case_sensitive (provider = icu, locale = 'und'); +CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false); +SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive; + ?column? | ?column? +----------+---------- + t | f +(1 row) + +SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive; + ?column? | ?column? +----------+---------- + t | t +(1 row) + +CREATE TABLE test1cs (x text COLLATE case_sensitive); +CREATE TABLE test2cs (x text COLLATE case_sensitive); +CREATE TABLE test3cs (x text COLLATE case_sensitive); +INSERT INTO test1cs VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2cs VALUES ('ABC'), ('ghi'); +INSERT INTO test3cs VALUES ('abc'), ('ABC'), ('def'), ('ghi'); +SELECT x FROM test3cs WHERE x = 'abc'; + x +----- + abc +(1 row) + +SELECT x FROM test3cs WHERE x <> 'abc'; + x +----- + ABC + def + ghi +(3 rows) + +SELECT x FROM test3cs WHERE x LIKE 'a%'; + x +----- + abc +(1 row) + +SELECT x FROM test3cs WHERE x ILIKE 'a%'; + x +----- + abc + ABC +(2 rows) + +SELECT x FROM test3cs WHERE x SIMILAR TO 'a%'; + x +----- + abc +(1 row) + +SELECT x FROM test3cs WHERE x ~ 'a'; + x +----- + abc +(1 row) + +SELECT x FROM test1cs UNION SELECT x FROM test2cs; + x +----- + ABC + abc + def + ghi +(4 rows) + +SELECT x FROM test2cs UNION SELECT x FROM test1cs; + x +----- + ABC + abc + def + ghi +(4 rows) + +SELECT x FROM test1cs INTERSECT SELECT x FROM test2cs; + x +----- + ghi +(1 row) + +SELECT x FROM test2cs INTERSECT SELECT x FROM test1cs; + x +----- + ghi +(1 row) + +SELECT x FROM test1cs EXCEPT SELECT x FROM test2cs; + x +----- + abc + def +(2 rows) + +SELECT x FROM test2cs EXCEPT SELECT x FROM test1cs; + x +----- + ABC +(1 row) + +SELECT DISTINCT x FROM test3cs; + x +----- + ABC + ghi + abc + def +(4 rows) + +SELECT count(DISTINCT x) FROM test3cs; + count +------- + 4 +(1 row) + +SELECT x, count(*) FROM test3cs GROUP BY x ORDER BY x; + x | count +-----+------- + abc | 1 + ABC | 1 + def | 1 + ghi | 1 +(4 rows) + +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3cs ORDER BY x; + x | row_number | rank +-----+------------+------ + abc | 1 | 1 + ABC | 2 | 2 + def | 3 | 3 + ghi | 4 | 4 +(4 rows) + +CREATE UNIQUE INDEX ON test1cs (x); -- ok +INSERT INTO test1cs VALUES ('ABC'); -- ok +CREATE UNIQUE INDEX ON test3cs (x); -- ok +SELECT string_to_array('ABC,DEF,GHI' COLLATE case_sensitive, ',', 'abc'); + string_to_array +----------------- + {ABC,DEF,GHI} +(1 row) + +CREATE TABLE test1ci (x text COLLATE case_insensitive); +CREATE TABLE test2ci (x text COLLATE case_insensitive); +CREATE TABLE test3ci (x text COLLATE case_insensitive); +INSERT INTO test1ci VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2ci VALUES ('ABC'), ('ghi'); +INSERT INTO test3ci VALUES ('abc'), ('ABC'), ('def'), ('ghi'); +SELECT x FROM test3ci WHERE x = 'abc'; + x +----- + abc + ABC +(2 rows) + +SELECT x FROM test3ci WHERE x <> 'abc'; + x +----- + def + ghi +(2 rows) + +SELECT x FROM test3ci WHERE x LIKE 'a%'; +ERROR: nondeterministic collations are not supported for LIKE +SELECT x FROM test3ci WHERE x ILIKE 'a%'; +ERROR: nondeterministic collations are not supported for ILIKE +SELECT x FROM test3ci WHERE x SIMILAR TO 'a%'; +ERROR: nondeterministic collations are not supported for regular expressions +SELECT x FROM test3ci WHERE x ~ 'a'; +ERROR: nondeterministic collations are not supported for regular expressions +SELECT x FROM test1ci UNION SELECT x FROM test2ci; + x +----- + abc + ghi + def +(3 rows) + +SELECT x FROM test2ci UNION SELECT x FROM test1ci; + x +----- + ABC + ghi + def +(3 rows) + +SELECT x FROM test1ci INTERSECT SELECT x FROM test2ci; + x +----- + ghi + abc +(2 rows) + +SELECT x FROM test2ci INTERSECT SELECT x FROM test1ci; + x +----- + ghi + ABC +(2 rows) + +SELECT x FROM test1ci EXCEPT SELECT x FROM test2ci; + x +----- + def +(1 row) + +SELECT x FROM test2ci EXCEPT SELECT x FROM test1ci; + x +--- +(0 rows) + +SELECT DISTINCT x FROM test3ci; + x +----- + ghi + def + abc +(3 rows) + +SELECT count(DISTINCT x) FROM test3ci; + count +------- + 3 +(1 row) + +SELECT x, count(*) FROM test3ci GROUP BY x ORDER BY x; + x | count +-----+------- + abc | 2 + def | 1 + ghi | 1 +(3 rows) + +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3ci ORDER BY x; + x | row_number | rank +-----+------------+------ + abc | 1 | 1 + ABC | 2 | 1 + def | 3 | 3 + ghi | 4 | 4 +(4 rows) + +CREATE UNIQUE INDEX ON test1ci (x); -- ok +INSERT INTO test1ci VALUES ('ABC'); -- error +ERROR: duplicate key value violates unique constraint "test1ci_x_idx" +DETAIL: Key (x)=(ABC) already exists. +CREATE UNIQUE INDEX ON test3ci (x); -- error +ERROR: could not create unique index "test3ci_x_idx" +DETAIL: Key (x)=(abc) is duplicated. +SELECT string_to_array('ABC,DEF,GHI' COLLATE case_insensitive, ',', 'abc'); + string_to_array +----------------- + {NULL,DEF,GHI} +(1 row) + +-- bpchar +CREATE TABLE test1bpci (x char(3) COLLATE case_insensitive); +CREATE TABLE test2bpci (x char(3) COLLATE case_insensitive); +CREATE TABLE test3bpci (x char(3) COLLATE case_insensitive); +INSERT INTO test1bpci VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2bpci VALUES ('ABC'), ('ghi'); +INSERT INTO test3bpci VALUES ('abc'), ('ABC'), ('def'), ('ghi'); +SELECT x FROM test3bpci WHERE x = 'abc'; + x +----- + abc + ABC +(2 rows) + +SELECT x FROM test3bpci WHERE x <> 'abc'; + x +----- + def + ghi +(2 rows) + +SELECT x FROM test3bpci WHERE x LIKE 'a%'; +ERROR: nondeterministic collations are not supported for LIKE +SELECT x FROM test3bpci WHERE x ILIKE 'a%'; +ERROR: nondeterministic collations are not supported for ILIKE +SELECT x FROM test3bpci WHERE x SIMILAR TO 'a%'; +ERROR: nondeterministic collations are not supported for regular expressions +SELECT x FROM test3bpci WHERE x ~ 'a'; +ERROR: nondeterministic collations are not supported for regular expressions +SELECT x FROM test1bpci UNION SELECT x FROM test2bpci; + x +----- + abc + ghi + def +(3 rows) + +SELECT x FROM test2bpci UNION SELECT x FROM test1bpci; + x +----- + ABC + ghi + def +(3 rows) + +SELECT x FROM test1bpci INTERSECT SELECT x FROM test2bpci; + x +----- + ghi + abc +(2 rows) + +SELECT x FROM test2bpci INTERSECT SELECT x FROM test1bpci; + x +----- + ghi + ABC +(2 rows) + +SELECT x FROM test1bpci EXCEPT SELECT x FROM test2bpci; + x +----- + def +(1 row) + +SELECT x FROM test2bpci EXCEPT SELECT x FROM test1bpci; + x +--- +(0 rows) + +SELECT DISTINCT x FROM test3bpci; + x +----- + ghi + def + abc +(3 rows) + +SELECT count(DISTINCT x) FROM test3bpci; + count +------- + 3 +(1 row) + +SELECT x, count(*) FROM test3bpci GROUP BY x ORDER BY x; + x | count +-----+------- + abc | 2 + def | 1 + ghi | 1 +(3 rows) + +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3bpci ORDER BY x; + x | row_number | rank +-----+------------+------ + abc | 1 | 1 + ABC | 2 | 1 + def | 3 | 3 + ghi | 4 | 4 +(4 rows) + +CREATE UNIQUE INDEX ON test1bpci (x); -- ok +INSERT INTO test1bpci VALUES ('ABC'); -- error +ERROR: duplicate key value violates unique constraint "test1bpci_x_idx" +DETAIL: Key (x)=(ABC) already exists. +CREATE UNIQUE INDEX ON test3bpci (x); -- error +ERROR: could not create unique index "test3bpci_x_idx" +DETAIL: Key (x)=(abc) is duplicated. +SELECT string_to_array('ABC,DEF,GHI'::char(11) COLLATE case_insensitive, ',', 'abc'); + string_to_array +----------------- + {NULL,DEF,GHI} +(1 row) + +-- name vs. text comparison operators +SELECT relname FROM pg_class WHERE relname = 'PG_CLASS'::text COLLATE case_insensitive; + relname +---------- + pg_class +(1 row) + +SELECT relname FROM pg_class WHERE 'PG_CLASS'::text = relname COLLATE case_insensitive; + relname +---------- + pg_class +(1 row) + +SELECT typname FROM pg_type WHERE typname LIKE 'int_' AND typname <> 'INT2'::text COLLATE case_insensitive; + typname +--------- + int4 + int8 +(2 rows) + +SELECT typname FROM pg_type WHERE typname LIKE 'int_' AND 'INT2'::text <> typname COLLATE case_insensitive;; + typname +--------- + int4 + int8 +(2 rows) + +-- test case adapted from subselect.sql +CREATE TEMP TABLE outer_text (f1 text COLLATE case_insensitive, f2 text); +INSERT INTO outer_text VALUES ('a', 'a'); +INSERT INTO outer_text VALUES ('b', 'a'); +INSERT INTO outer_text VALUES ('A', NULL); +INSERT INTO outer_text VALUES ('B', NULL); +CREATE TEMP TABLE inner_text (c1 text COLLATE case_insensitive, c2 text); +INSERT INTO inner_text VALUES ('a', NULL); +SELECT * FROM outer_text WHERE (f1, f2) NOT IN (SELECT * FROM inner_text); + f1 | f2 +----+---- + b | a + B | +(2 rows) + +-- accents +CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-true', deterministic = false); +CREATE TABLE test4 (a int, b text); +INSERT INTO test4 VALUES (1, 'cote'), (2, 'côte'), (3, 'coté'), (4, 'côté'); +SELECT * FROM test4 WHERE b = 'cote'; + a | b +---+------ + 1 | cote +(1 row) + +SELECT * FROM test4 WHERE b = 'cote' COLLATE ignore_accents; + a | b +---+------ + 1 | cote + 2 | côte + 3 | coté + 4 | côté +(4 rows) + +SELECT * FROM test4 WHERE b = 'Cote' COLLATE ignore_accents; -- still case-sensitive + a | b +---+--- +(0 rows) + +SELECT * FROM test4 WHERE b = 'Cote' COLLATE case_insensitive; + a | b +---+------ + 1 | cote +(1 row) + +-- foreign keys (should use collation of primary key) +-- PK is case-sensitive, FK is case-insensitive +CREATE TABLE test10pk (x text COLLATE case_sensitive PRIMARY KEY); +INSERT INTO test10pk VALUES ('abc'), ('def'), ('ghi'); +CREATE TABLE test10fk (x text COLLATE case_insensitive REFERENCES test10pk (x) ON UPDATE CASCADE ON DELETE CASCADE); +INSERT INTO test10fk VALUES ('abc'); -- ok +INSERT INTO test10fk VALUES ('ABC'); -- error +ERROR: insert or update on table "test10fk" violates foreign key constraint "test10fk_x_fkey" +DETAIL: Key (x)=(ABC) is not present in table "test10pk". +INSERT INTO test10fk VALUES ('xyz'); -- error +ERROR: insert or update on table "test10fk" violates foreign key constraint "test10fk_x_fkey" +DETAIL: Key (x)=(xyz) is not present in table "test10pk". +SELECT * FROM test10pk; + x +----- + abc + def + ghi +(3 rows) + +SELECT * FROM test10fk; + x +----- + abc +(1 row) + +-- restrict update even though the values are "equal" in the FK table +UPDATE test10fk SET x = 'ABC' WHERE x = 'abc'; -- error +ERROR: insert or update on table "test10fk" violates foreign key constraint "test10fk_x_fkey" +DETAIL: Key (x)=(ABC) is not present in table "test10pk". +SELECT * FROM test10fk; + x +----- + abc +(1 row) + +DELETE FROM test10pk WHERE x = 'abc'; +SELECT * FROM test10pk; + x +----- + def + ghi +(2 rows) + +SELECT * FROM test10fk; + x +--- +(0 rows) + +-- PK is case-insensitive, FK is case-sensitive +CREATE TABLE test11pk (x text COLLATE case_insensitive PRIMARY KEY); +INSERT INTO test11pk VALUES ('abc'), ('def'), ('ghi'); +CREATE TABLE test11fk (x text COLLATE case_sensitive REFERENCES test11pk (x) ON UPDATE CASCADE ON DELETE CASCADE); +INSERT INTO test11fk VALUES ('abc'); -- ok +INSERT INTO test11fk VALUES ('ABC'); -- ok +INSERT INTO test11fk VALUES ('xyz'); -- error +ERROR: insert or update on table "test11fk" violates foreign key constraint "test11fk_x_fkey" +DETAIL: Key (x)=(xyz) is not present in table "test11pk". +SELECT * FROM test11pk; + x +----- + abc + def + ghi +(3 rows) + +SELECT * FROM test11fk; + x +----- + abc + ABC +(2 rows) + +-- cascade update even though the values are "equal" in the PK table +UPDATE test11pk SET x = 'ABC' WHERE x = 'abc'; +SELECT * FROM test11fk; + x +----- + ABC + ABC +(2 rows) + +DELETE FROM test11pk WHERE x = 'abc'; +SELECT * FROM test11pk; + x +----- + def + ghi +(2 rows) + +SELECT * FROM test11fk; + x +--- +(0 rows) + +-- partitioning +CREATE TABLE test20 (a int, b text COLLATE case_insensitive) PARTITION BY LIST (b); +CREATE TABLE test20_1 PARTITION OF test20 FOR VALUES IN ('abc'); +INSERT INTO test20 VALUES (1, 'abc'); +INSERT INTO test20 VALUES (2, 'ABC'); +SELECT * FROM test20_1; + a | b +---+----- + 1 | abc + 2 | ABC +(2 rows) + +CREATE TABLE test21 (a int, b text COLLATE case_insensitive) PARTITION BY RANGE (b); +CREATE TABLE test21_1 PARTITION OF test21 FOR VALUES FROM ('ABC') TO ('DEF'); +INSERT INTO test21 VALUES (1, 'abc'); +INSERT INTO test21 VALUES (2, 'ABC'); +SELECT * FROM test21_1; + a | b +---+----- + 1 | abc + 2 | ABC +(2 rows) + +CREATE TABLE test22 (a int, b text COLLATE case_sensitive) PARTITION BY HASH (b); +CREATE TABLE test22_0 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test22_1 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test22 VALUES (1, 'def'); +INSERT INTO test22 VALUES (2, 'DEF'); +-- they end up in different partitions +SELECT * FROM test22_0; + a | b +---+----- + 1 | def +(1 row) + +SELECT * FROM test22_1; + a | b +---+----- + 2 | DEF +(1 row) + +CREATE TABLE test23 (a int, b text COLLATE case_insensitive) PARTITION BY HASH (b); +CREATE TABLE test23_0 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test23_1 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test23 VALUES (1, 'def'); +INSERT INTO test23 VALUES (2, 'DEF'); +-- they end up in the same partition +SELECT * FROM test23_0; + a | b +---+--- +(0 rows) + +SELECT * FROM test23_1; + a | b +---+----- + 1 | def + 2 | DEF +(2 rows) + +CREATE TABLE test30 (a int, b char(3) COLLATE case_insensitive) PARTITION BY LIST (b); +CREATE TABLE test30_1 PARTITION OF test30 FOR VALUES IN ('abc'); +INSERT INTO test30 VALUES (1, 'abc'); +INSERT INTO test30 VALUES (2, 'ABC'); +SELECT * FROM test30_1; + a | b +---+----- + 1 | abc + 2 | ABC +(2 rows) + +CREATE TABLE test31 (a int, b char(3) COLLATE case_insensitive) PARTITION BY RANGE (b); +CREATE TABLE test31_1 PARTITION OF test31 FOR VALUES FROM ('ABC') TO ('DEF'); +INSERT INTO test31 VALUES (1, 'abc'); +INSERT INTO test31 VALUES (2, 'ABC'); +SELECT * FROM test31_1; + a | b +---+----- + 1 | abc + 2 | ABC +(2 rows) + +CREATE TABLE test32 (a int, b char(3) COLLATE case_sensitive) PARTITION BY HASH (b); +CREATE TABLE test32_0 PARTITION OF test32 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test32_1 PARTITION OF test32 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test32 VALUES (1, 'def'); +INSERT INTO test32 VALUES (2, 'DEF'); +-- they end up in different partitions +SELECT * FROM test32_0; + a | b +---+----- + 1 | def +(1 row) + +SELECT * FROM test32_1; + a | b +---+----- + 2 | DEF +(1 row) + +CREATE TABLE test33 (a int, b char(3) COLLATE case_insensitive) PARTITION BY HASH (b); +CREATE TABLE test33_0 PARTITION OF test33 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test33_1 PARTITION OF test33 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test33 VALUES (1, 'def'); +INSERT INTO test33 VALUES (2, 'DEF'); +-- they end up in the same partition +SELECT * FROM test33_0; + a | b +---+--- +(0 rows) + +SELECT * FROM test33_1; + a | b +---+----- + 1 | def + 2 | DEF +(2 rows) + -- cleanup +SET client_min_messages TO 'warning'; DROP SCHEMA collate_tests CASCADE; -NOTICE: drop cascades to 18 other objects -DETAIL: drop cascades to table collate_test1 -drop cascades to table collate_test_like -drop cascades to table collate_test2 -drop cascades to table collate_test3 -drop cascades to type testdomain_sv -drop cascades to table collate_test4 -drop cascades to table collate_test5 -drop cascades to table collate_test10 -drop cascades to table collate_test6 -drop cascades to view collview1 -drop cascades to view collview2 -drop cascades to view collview3 -drop cascades to type testdomain -drop cascades to function mylt(text,text) -drop cascades to function mylt_noninline(text,text) -drop cascades to function mylt_plpgsql(text,text) -drop cascades to function mylt2(text,text) -drop cascades to function dup(anyelement) RESET search_path; -- leave a collation for pg_upgrade test CREATE COLLATION coll_icu_upgrade FROM "und-x-icu"; diff --git a/src/test/regress/expected/collate.linux.utf8.out b/src/test/regress/expected/collate.linux.utf8.out index 400a747cdc..9c5b8abef8 100644 --- a/src/test/regress/expected/collate.linux.utf8.out +++ b/src/test/regress/expected/collate.linux.utf8.out @@ -1117,24 +1117,11 @@ select textrange_en_us('A','Z') @> 'b'::text; drop type textrange_c; drop type textrange_en_us; +-- nondeterministic collations +-- (not supported with libc provider) +CREATE COLLATION ctest_det (locale = 'en_US.utf8', deterministic = true); +ERROR: nondeterministic collations not supported with this provider +CREATE COLLATION ctest_nondet (locale = 'en_US.utf8', deterministic = false); -- cleanup +SET client_min_messages TO 'warning'; DROP SCHEMA collate_tests CASCADE; -NOTICE: drop cascades to 18 other objects -DETAIL: drop cascades to table collate_test1 -drop cascades to table collate_test_like -drop cascades to table collate_test2 -drop cascades to table collate_test3 -drop cascades to type testdomain_sv -drop cascades to table collate_test4 -drop cascades to table collate_test5 -drop cascades to table collate_test10 -drop cascades to table collate_test6 -drop cascades to view collview1 -drop cascades to view collview2 -drop cascades to view collview3 -drop cascades to type testdomain -drop cascades to function mylt(text,text) -drop cascades to function mylt_noninline(text,text) -drop cascades to function mylt_plpgsql(text,text) -drop cascades to function mylt2(text,text) -drop cascades to function dup(anyelement) diff --git a/src/test/regress/expected/collate.out b/src/test/regress/expected/collate.out index fcbe3a5cc8..dbfa5c9348 100644 --- a/src/test/regress/expected/collate.out +++ b/src/test/regress/expected/collate.out @@ -498,6 +498,21 @@ SELECT a, b, a < b as lt FROM A | b | t (2 rows) +-- collation mismatch in subselects +SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y, x FROM collate_test10); +ERROR: could not determine which collation to use for string hashing +HINT: Use the COLLATE clause to set the collation explicitly. +-- now it works with overrides +SELECT * FROM collate_test10 WHERE (x COLLATE "POSIX", y COLLATE "C") NOT IN (SELECT y, x FROM collate_test10); + a | x | y +---+---+--- +(0 rows) + +SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y COLLATE "C", x COLLATE "POSIX" FROM collate_test10); + a | x | y +---+---+--- +(0 rows) + -- casting SELECT CAST('42' AS text COLLATE "C"); ERROR: syntax error at or near "COLLATE" diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 588d069589..043d9fa532 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -745,6 +745,25 @@ select * from outer_7597 where (f1, f2) not in (select * from inner_7597); 1 | (2 rows) +-- +-- Similar test case using text that verifies that collation +-- information is passed through by execTuplesEqual() in nodeSubplan.c +-- (otherwise it would error in texteq()) +-- +create temp table outer_text (f1 text, f2 text); +insert into outer_text values ('a', 'a'); +insert into outer_text values ('b', 'a'); +insert into outer_text values ('a', null); +insert into outer_text values ('b', null); +create temp table inner_text (c1 text, c2 text); +insert into inner_text values ('a', null); +select * from outer_text where (f1, f2) not in (select * from inner_text); + f1 | f2 +----+---- + b | a + b | +(2 rows) + -- -- Test case for premature memory release during hashing of subplan output -- diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index ef39445b30..cb0f221982 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -425,7 +425,237 @@ CREATE INDEX collate_dep_test4i ON collate_dep_test4t (b COLLATE test0); drop type textrange_en_us; +-- nondeterministic collations + +CREATE COLLATION ctest_det (provider = icu, locale = 'und', deterministic = true); +CREATE COLLATION ctest_nondet (provider = icu, locale = 'und', deterministic = false); + +CREATE TABLE test6 (a int, b text); +-- same string in different normal forms +INSERT INTO test6 VALUES (1, U&'\00E4bc'); +INSERT INTO test6 VALUES (2, U&'\0061\0308bc'); +SELECT * FROM test6; +SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_det; +SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_nondet; + +CREATE COLLATION case_sensitive (provider = icu, locale = 'und'); +CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false); + +SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive; +SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive; + +CREATE TABLE test1cs (x text COLLATE case_sensitive); +CREATE TABLE test2cs (x text COLLATE case_sensitive); +CREATE TABLE test3cs (x text COLLATE case_sensitive); +INSERT INTO test1cs VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2cs VALUES ('ABC'), ('ghi'); +INSERT INTO test3cs VALUES ('abc'), ('ABC'), ('def'), ('ghi'); + +SELECT x FROM test3cs WHERE x = 'abc'; +SELECT x FROM test3cs WHERE x <> 'abc'; +SELECT x FROM test3cs WHERE x LIKE 'a%'; +SELECT x FROM test3cs WHERE x ILIKE 'a%'; +SELECT x FROM test3cs WHERE x SIMILAR TO 'a%'; +SELECT x FROM test3cs WHERE x ~ 'a'; +SELECT x FROM test1cs UNION SELECT x FROM test2cs; +SELECT x FROM test2cs UNION SELECT x FROM test1cs; +SELECT x FROM test1cs INTERSECT SELECT x FROM test2cs; +SELECT x FROM test2cs INTERSECT SELECT x FROM test1cs; +SELECT x FROM test1cs EXCEPT SELECT x FROM test2cs; +SELECT x FROM test2cs EXCEPT SELECT x FROM test1cs; +SELECT DISTINCT x FROM test3cs; +SELECT count(DISTINCT x) FROM test3cs; +SELECT x, count(*) FROM test3cs GROUP BY x ORDER BY x; +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3cs ORDER BY x; +CREATE UNIQUE INDEX ON test1cs (x); -- ok +INSERT INTO test1cs VALUES ('ABC'); -- ok +CREATE UNIQUE INDEX ON test3cs (x); -- ok +SELECT string_to_array('ABC,DEF,GHI' COLLATE case_sensitive, ',', 'abc'); + +CREATE TABLE test1ci (x text COLLATE case_insensitive); +CREATE TABLE test2ci (x text COLLATE case_insensitive); +CREATE TABLE test3ci (x text COLLATE case_insensitive); +INSERT INTO test1ci VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2ci VALUES ('ABC'), ('ghi'); +INSERT INTO test3ci VALUES ('abc'), ('ABC'), ('def'), ('ghi'); + +SELECT x FROM test3ci WHERE x = 'abc'; +SELECT x FROM test3ci WHERE x <> 'abc'; +SELECT x FROM test3ci WHERE x LIKE 'a%'; +SELECT x FROM test3ci WHERE x ILIKE 'a%'; +SELECT x FROM test3ci WHERE x SIMILAR TO 'a%'; +SELECT x FROM test3ci WHERE x ~ 'a'; +SELECT x FROM test1ci UNION SELECT x FROM test2ci; +SELECT x FROM test2ci UNION SELECT x FROM test1ci; +SELECT x FROM test1ci INTERSECT SELECT x FROM test2ci; +SELECT x FROM test2ci INTERSECT SELECT x FROM test1ci; +SELECT x FROM test1ci EXCEPT SELECT x FROM test2ci; +SELECT x FROM test2ci EXCEPT SELECT x FROM test1ci; +SELECT DISTINCT x FROM test3ci; +SELECT count(DISTINCT x) FROM test3ci; +SELECT x, count(*) FROM test3ci GROUP BY x ORDER BY x; +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3ci ORDER BY x; +CREATE UNIQUE INDEX ON test1ci (x); -- ok +INSERT INTO test1ci VALUES ('ABC'); -- error +CREATE UNIQUE INDEX ON test3ci (x); -- error +SELECT string_to_array('ABC,DEF,GHI' COLLATE case_insensitive, ',', 'abc'); + +-- bpchar +CREATE TABLE test1bpci (x char(3) COLLATE case_insensitive); +CREATE TABLE test2bpci (x char(3) COLLATE case_insensitive); +CREATE TABLE test3bpci (x char(3) COLLATE case_insensitive); +INSERT INTO test1bpci VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2bpci VALUES ('ABC'), ('ghi'); +INSERT INTO test3bpci VALUES ('abc'), ('ABC'), ('def'), ('ghi'); + +SELECT x FROM test3bpci WHERE x = 'abc'; +SELECT x FROM test3bpci WHERE x <> 'abc'; +SELECT x FROM test3bpci WHERE x LIKE 'a%'; +SELECT x FROM test3bpci WHERE x ILIKE 'a%'; +SELECT x FROM test3bpci WHERE x SIMILAR TO 'a%'; +SELECT x FROM test3bpci WHERE x ~ 'a'; +SELECT x FROM test1bpci UNION SELECT x FROM test2bpci; +SELECT x FROM test2bpci UNION SELECT x FROM test1bpci; +SELECT x FROM test1bpci INTERSECT SELECT x FROM test2bpci; +SELECT x FROM test2bpci INTERSECT SELECT x FROM test1bpci; +SELECT x FROM test1bpci EXCEPT SELECT x FROM test2bpci; +SELECT x FROM test2bpci EXCEPT SELECT x FROM test1bpci; +SELECT DISTINCT x FROM test3bpci; +SELECT count(DISTINCT x) FROM test3bpci; +SELECT x, count(*) FROM test3bpci GROUP BY x ORDER BY x; +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3bpci ORDER BY x; +CREATE UNIQUE INDEX ON test1bpci (x); -- ok +INSERT INTO test1bpci VALUES ('ABC'); -- error +CREATE UNIQUE INDEX ON test3bpci (x); -- error +SELECT string_to_array('ABC,DEF,GHI'::char(11) COLLATE case_insensitive, ',', 'abc'); + +-- name vs. text comparison operators +SELECT relname FROM pg_class WHERE relname = 'PG_CLASS'::text COLLATE case_insensitive; +SELECT relname FROM pg_class WHERE 'PG_CLASS'::text = relname COLLATE case_insensitive; + +SELECT typname FROM pg_type WHERE typname LIKE 'int_' AND typname <> 'INT2'::text COLLATE case_insensitive; +SELECT typname FROM pg_type WHERE typname LIKE 'int_' AND 'INT2'::text <> typname COLLATE case_insensitive;; + +-- test case adapted from subselect.sql +CREATE TEMP TABLE outer_text (f1 text COLLATE case_insensitive, f2 text); +INSERT INTO outer_text VALUES ('a', 'a'); +INSERT INTO outer_text VALUES ('b', 'a'); +INSERT INTO outer_text VALUES ('A', NULL); +INSERT INTO outer_text VALUES ('B', NULL); + +CREATE TEMP TABLE inner_text (c1 text COLLATE case_insensitive, c2 text); +INSERT INTO inner_text VALUES ('a', NULL); + +SELECT * FROM outer_text WHERE (f1, f2) NOT IN (SELECT * FROM inner_text); + +-- accents +CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-true', deterministic = false); + +CREATE TABLE test4 (a int, b text); +INSERT INTO test4 VALUES (1, 'cote'), (2, 'côte'), (3, 'coté'), (4, 'côté'); +SELECT * FROM test4 WHERE b = 'cote'; +SELECT * FROM test4 WHERE b = 'cote' COLLATE ignore_accents; +SELECT * FROM test4 WHERE b = 'Cote' COLLATE ignore_accents; -- still case-sensitive +SELECT * FROM test4 WHERE b = 'Cote' COLLATE case_insensitive; + +-- foreign keys (should use collation of primary key) + +-- PK is case-sensitive, FK is case-insensitive +CREATE TABLE test10pk (x text COLLATE case_sensitive PRIMARY KEY); +INSERT INTO test10pk VALUES ('abc'), ('def'), ('ghi'); +CREATE TABLE test10fk (x text COLLATE case_insensitive REFERENCES test10pk (x) ON UPDATE CASCADE ON DELETE CASCADE); +INSERT INTO test10fk VALUES ('abc'); -- ok +INSERT INTO test10fk VALUES ('ABC'); -- error +INSERT INTO test10fk VALUES ('xyz'); -- error +SELECT * FROM test10pk; +SELECT * FROM test10fk; +-- restrict update even though the values are "equal" in the FK table +UPDATE test10fk SET x = 'ABC' WHERE x = 'abc'; -- error +SELECT * FROM test10fk; +DELETE FROM test10pk WHERE x = 'abc'; +SELECT * FROM test10pk; +SELECT * FROM test10fk; + +-- PK is case-insensitive, FK is case-sensitive +CREATE TABLE test11pk (x text COLLATE case_insensitive PRIMARY KEY); +INSERT INTO test11pk VALUES ('abc'), ('def'), ('ghi'); +CREATE TABLE test11fk (x text COLLATE case_sensitive REFERENCES test11pk (x) ON UPDATE CASCADE ON DELETE CASCADE); +INSERT INTO test11fk VALUES ('abc'); -- ok +INSERT INTO test11fk VALUES ('ABC'); -- ok +INSERT INTO test11fk VALUES ('xyz'); -- error +SELECT * FROM test11pk; +SELECT * FROM test11fk; +-- cascade update even though the values are "equal" in the PK table +UPDATE test11pk SET x = 'ABC' WHERE x = 'abc'; +SELECT * FROM test11fk; +DELETE FROM test11pk WHERE x = 'abc'; +SELECT * FROM test11pk; +SELECT * FROM test11fk; + +-- partitioning +CREATE TABLE test20 (a int, b text COLLATE case_insensitive) PARTITION BY LIST (b); +CREATE TABLE test20_1 PARTITION OF test20 FOR VALUES IN ('abc'); +INSERT INTO test20 VALUES (1, 'abc'); +INSERT INTO test20 VALUES (2, 'ABC'); +SELECT * FROM test20_1; + +CREATE TABLE test21 (a int, b text COLLATE case_insensitive) PARTITION BY RANGE (b); +CREATE TABLE test21_1 PARTITION OF test21 FOR VALUES FROM ('ABC') TO ('DEF'); +INSERT INTO test21 VALUES (1, 'abc'); +INSERT INTO test21 VALUES (2, 'ABC'); +SELECT * FROM test21_1; + +CREATE TABLE test22 (a int, b text COLLATE case_sensitive) PARTITION BY HASH (b); +CREATE TABLE test22_0 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test22_1 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test22 VALUES (1, 'def'); +INSERT INTO test22 VALUES (2, 'DEF'); +-- they end up in different partitions +SELECT * FROM test22_0; +SELECT * FROM test22_1; + +CREATE TABLE test23 (a int, b text COLLATE case_insensitive) PARTITION BY HASH (b); +CREATE TABLE test23_0 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test23_1 PARTITION OF test23 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test23 VALUES (1, 'def'); +INSERT INTO test23 VALUES (2, 'DEF'); +-- they end up in the same partition +SELECT * FROM test23_0; +SELECT * FROM test23_1; + +CREATE TABLE test30 (a int, b char(3) COLLATE case_insensitive) PARTITION BY LIST (b); +CREATE TABLE test30_1 PARTITION OF test30 FOR VALUES IN ('abc'); +INSERT INTO test30 VALUES (1, 'abc'); +INSERT INTO test30 VALUES (2, 'ABC'); +SELECT * FROM test30_1; + +CREATE TABLE test31 (a int, b char(3) COLLATE case_insensitive) PARTITION BY RANGE (b); +CREATE TABLE test31_1 PARTITION OF test31 FOR VALUES FROM ('ABC') TO ('DEF'); +INSERT INTO test31 VALUES (1, 'abc'); +INSERT INTO test31 VALUES (2, 'ABC'); +SELECT * FROM test31_1; + +CREATE TABLE test32 (a int, b char(3) COLLATE case_sensitive) PARTITION BY HASH (b); +CREATE TABLE test32_0 PARTITION OF test32 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test32_1 PARTITION OF test32 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test32 VALUES (1, 'def'); +INSERT INTO test32 VALUES (2, 'DEF'); +-- they end up in different partitions +SELECT * FROM test32_0; +SELECT * FROM test32_1; + +CREATE TABLE test33 (a int, b char(3) COLLATE case_insensitive) PARTITION BY HASH (b); +CREATE TABLE test33_0 PARTITION OF test33 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE test33_1 PARTITION OF test33 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO test33 VALUES (1, 'def'); +INSERT INTO test33 VALUES (2, 'DEF'); +-- they end up in the same partition +SELECT * FROM test33_0; +SELECT * FROM test33_1; + + -- cleanup +SET client_min_messages TO 'warning'; DROP SCHEMA collate_tests CASCADE; RESET search_path; diff --git a/src/test/regress/sql/collate.linux.utf8.sql b/src/test/regress/sql/collate.linux.utf8.sql index b51162e3a1..3a3ece9c62 100644 --- a/src/test/regress/sql/collate.linux.utf8.sql +++ b/src/test/regress/sql/collate.linux.utf8.sql @@ -428,5 +428,13 @@ CREATE INDEX collate_dep_test4i ON collate_dep_test4t (b COLLATE test0); drop type textrange_en_us; +-- nondeterministic collations +-- (not supported with libc provider) + +CREATE COLLATION ctest_det (locale = 'en_US.utf8', deterministic = true); +CREATE COLLATION ctest_nondet (locale = 'en_US.utf8', deterministic = false); + + -- cleanup +SET client_min_messages TO 'warning'; DROP SCHEMA collate_tests CASCADE; diff --git a/src/test/regress/sql/collate.sql b/src/test/regress/sql/collate.sql index 4ddde95a5e..cb2bc22155 100644 --- a/src/test/regress/sql/collate.sql +++ b/src/test/regress/sql/collate.sql @@ -163,6 +163,11 @@ CREATE TABLE test_u AS SELECT a, b FROM collate_test1 UNION ALL SELECT a, b FROM SELECT a, b, a < b as lt FROM (VALUES ('a', 'B'), ('A', 'b' COLLATE "C")) v(a,b); +-- collation mismatch in subselects +SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y, x FROM collate_test10); +-- now it works with overrides +SELECT * FROM collate_test10 WHERE (x COLLATE "POSIX", y COLLATE "C") NOT IN (SELECT y, x FROM collate_test10); +SELECT * FROM collate_test10 WHERE (x, y) NOT IN (SELECT y COLLATE "C", x COLLATE "POSIX" FROM collate_test10); -- casting diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index 843f511b3d..82d5a7edcd 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -435,6 +435,23 @@ CREATE VIEW orders_view AS select * from outer_7597 where (f1, f2) not in (select * from inner_7597); +-- +-- Similar test case using text that verifies that collation +-- information is passed through by execTuplesEqual() in nodeSubplan.c +-- (otherwise it would error in texteq()) +-- + +create temp table outer_text (f1 text, f2 text); +insert into outer_text values ('a', 'a'); +insert into outer_text values ('b', 'a'); +insert into outer_text values ('a', null); +insert into outer_text values ('b', null); + +create temp table inner_text (c1 text, c2 text); +insert into inner_text values ('a', null); + +select * from outer_text where (f1, f2) not in (select * from inner_text); + -- -- Test case for premature memory release during hashing of subplan output -- diff --git a/src/test/subscription/Makefile b/src/test/subscription/Makefile index e7bbb454c7..4378819530 100644 --- a/src/test/subscription/Makefile +++ b/src/test/subscription/Makefile @@ -15,6 +15,8 @@ include $(top_builddir)/src/Makefile.global EXTRA_INSTALL = contrib/hstore +export with_icu + check: $(prove_check) diff --git a/src/test/subscription/t/012_collation.pl b/src/test/subscription/t/012_collation.pl new file mode 100644 index 0000000000..f9edda6ab6 --- /dev/null +++ b/src/test/subscription/t/012_collation.pl @@ -0,0 +1,98 @@ +# Test collations, in particular nondeterministic ones +# (only works with ICU) +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +if ($ENV{with_icu} eq 'yes') +{ + plan tests => 2; +} +else +{ + plan skip_all => 'ICU not supported by this build'; +} + +my $node_publisher = get_new_node('publisher'); +$node_publisher->init(allows_streaming => 'logical'); +$node_publisher->start; + +my $node_subscriber = get_new_node('subscriber'); +$node_subscriber->init(allows_streaming => 'logical'); +$node_subscriber->start; + +my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres'; + +$node_subscriber->safe_psql('postgres', + "CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false)"); + +# table with replica identity index + +$node_publisher->safe_psql('postgres', + "CREATE TABLE tab1 (a text PRIMARY KEY, b text)"); + +$node_publisher->safe_psql('postgres', + "INSERT INTO tab1 VALUES ('abc', 'abc'), ('def', 'def')"); + +$node_subscriber->safe_psql('postgres', + "CREATE TABLE tab1 (a text COLLATE case_insensitive PRIMARY KEY, b text)"); + +$node_subscriber->safe_psql('postgres', + "INSERT INTO tab1 VALUES ('ABC', 'abc'), ('GHI', 'ghi')"); + +# table with replica identity full + +$node_publisher->safe_psql('postgres', + "CREATE TABLE tab2 (a text, b text)"); +$node_publisher->safe_psql('postgres', + "ALTER TABLE tab2 REPLICA IDENTITY FULL"); + +$node_publisher->safe_psql('postgres', + "INSERT INTO tab2 VALUES ('abc', 'abc'), ('def', 'def')"); + +$node_subscriber->safe_psql('postgres', + "CREATE TABLE tab2 (a text COLLATE case_insensitive, b text)"); +$node_subscriber->safe_psql('postgres', + "ALTER TABLE tab2 REPLICA IDENTITY FULL"); + +$node_subscriber->safe_psql('postgres', + "INSERT INTO tab2 VALUES ('ABC', 'abc'), ('GHI', 'ghi')"); + +# set up publication, subscription + +$node_publisher->safe_psql('postgres', + "CREATE PUBLICATION pub1 FOR ALL TABLES"); + +$node_subscriber->safe_psql('postgres', + "CREATE SUBSCRIPTION sub1 CONNECTION '$publisher_connstr application_name=sub1' PUBLICATION pub1 WITH (copy_data = false)"); + +$node_publisher->wait_for_catchup('sub1'); + +# test with replica identity index + +$node_publisher->safe_psql('postgres', + "UPDATE tab1 SET b = 'xyz' WHERE b = 'abc'"); + +$node_publisher->wait_for_catchup('sub1'); + +# Note: Even though the UPDATE command above only updates column "b", +# the replication target will also update column "a", because the +# whole row is shippped. +is($node_subscriber->safe_psql('postgres', "SELECT a, b FROM tab1 ORDER BY a"), + qq(abc|xyz +GHI|ghi), + 'update of case insensitive primary key'); + +# test with replica identity full + +$node_publisher->safe_psql('postgres', + "UPDATE tab2 SET b = 'xyz' WHERE b = 'abc'"); + +$node_publisher->wait_for_catchup('sub1'); + +is($node_subscriber->safe_psql('postgres', "SELECT a, b FROM tab2 ORDER BY a"), + qq(abc|xyz +GHI|ghi), + 'update of case insensitive column with replica identity full'); base-commit: 7c079d7417a8f2d4bf5144732e2f85117db9214f -- 2.20.1