From dcd081ba5fdc3a29adddcd52deac5fb67d18fcc1 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Tue, 15 Jan 2019 15:54:33 +0100 Subject: [PATCH v3] Collations with nondeterministic comparison This adds a flag "deterministic" to collations. If that is false, such a collation disables various optimizations that assume that strings are equal only if they are byte-wise equal. That then allows use cases such as case-insensitive or accent-insensitive comparisons or handling of strings with different Unicode normal forms. The term "deterministic comparison" in this context is from Unicode Technical Standard #10 (https://unicode.org/reports/tr10/#Deterministic_Comparison). --- contrib/bloom/bloom.h | 1 + contrib/bloom/blutils.c | 3 +- doc/src/sgml/catalogs.sgml | 7 + doc/src/sgml/charset.sgml | 11 +- doc/src/sgml/ref/create_collation.sgml | 22 ++ src/backend/access/hash/hashfunc.c | 86 +++++ src/backend/catalog/pg_collation.c | 2 + src/backend/commands/collationcmds.c | 25 +- src/backend/executor/execExpr.c | 4 +- src/backend/executor/execGrouping.c | 12 +- src/backend/executor/execPartition.c | 1 + src/backend/executor/nodeAgg.c | 4 + src/backend/executor/nodeGroup.c | 1 + src/backend/executor/nodeHash.c | 14 +- src/backend/executor/nodeHashjoin.c | 5 + src/backend/executor/nodeRecursiveunion.c | 1 + src/backend/executor/nodeSetOp.c | 2 + src/backend/executor/nodeSubplan.c | 8 + src/backend/executor/nodeUnique.c | 1 + src/backend/executor/nodeWindowAgg.c | 2 + src/backend/nodes/copyfuncs.c | 7 + src/backend/nodes/outfuncs.c | 7 + src/backend/nodes/readfuncs.c | 7 + src/backend/optimizer/plan/createplan.c | 54 ++- src/backend/optimizer/util/tlist.c | 25 ++ src/backend/partitioning/partbounds.c | 4 +- src/backend/partitioning/partprune.c | 3 +- src/backend/utils/adt/arrayfuncs.c | 2 +- src/backend/utils/adt/name.c | 32 +- src/backend/utils/adt/orderedsetaggs.c | 3 +- src/backend/utils/adt/pg_locale.c | 1 + src/backend/utils/adt/ri_triggers.c | 20 + src/backend/utils/adt/varchar.c | 20 + src/backend/utils/adt/varlena.c | 51 ++- src/backend/utils/cache/catcache.c | 2 +- src/backend/utils/cache/lsyscache.c | 16 + src/bin/initdb/initdb.c | 4 +- src/bin/pg_dump/pg_dump.c | 39 +- src/bin/psql/describe.c | 17 +- src/include/catalog/pg_collation.h | 2 + src/include/executor/executor.h | 3 + src/include/executor/hashjoin.h | 1 + src/include/executor/nodeHash.h | 2 +- src/include/nodes/execnodes.h | 3 + src/include/nodes/plannodes.h | 7 + src/include/optimizer/planmain.h | 2 +- src/include/optimizer/tlist.h | 1 + src/include/partitioning/partbounds.h | 1 + src/include/utils/lsyscache.h | 1 + src/include/utils/pg_locale.h | 1 + .../regress/expected/collate.icu.utf8.out | 341 +++++++++++++++++- .../regress/expected/collate.linux.utf8.out | 25 +- src/test/regress/sql/collate.icu.utf8.sql | 115 ++++++ src/test/regress/sql/collate.linux.utf8.sql | 8 + 54 files changed, 928 insertions(+), 111 deletions(-) diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h index 24200fb5fa..c4672f6853 100644 --- a/contrib/bloom/bloom.h +++ b/contrib/bloom/bloom.h @@ -137,6 +137,7 @@ typedef struct BloomMetaPageData typedef struct BloomState { FmgrInfo hashFn[INDEX_MAX_KEYS]; + Oid collations[INDEX_MAX_KEYS]; BloomOptions opts; /* copy of options on index's metapage */ int32 nColumns; diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c index 6458376578..d078dfbd46 100644 --- a/contrib/bloom/blutils.c +++ b/contrib/bloom/blutils.c @@ -163,6 +163,7 @@ initBloomState(BloomState *state, Relation index) fmgr_info_copy(&(state->hashFn[i]), index_getprocinfo(index, i + 1, BLOOM_HASH_PROC), CurrentMemoryContext); + state->collations[i] = index->rd_indcollation[i]; } /* Initialize amcache if needed with options from metapage */ @@ -267,7 +268,7 @@ signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno) * different columns will be mapped into different bits because of step * above */ - hashVal = DatumGetInt32(FunctionCall1(&state->hashFn[attno], value)); + hashVal = DatumGetInt32(FunctionCall1Coll(&state->hashFn[attno], state->collations[attno], value)); mySrand(hashVal ^ myRand()); for (j = 0; j < state->opts.bitSize[attno]; j++) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index af4d0625ea..b087c1ec74 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -2077,6 +2077,13 @@ <structname>pg_collation</structname> Columns default, c = libc, i = icu + + collisdeterministic + bool + + Is the collation deterministic? + + collencoding int4 diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index a6143ef8a7..ec6343fa80 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -847,11 +847,12 @@ ICU collations Note that while this system allows creating collations that ignore - case or ignore accents or similar (using - the ks key), PostgreSQL does not at the moment allow - such collations to act in a truly case- or accent-insensitive manner. Any - strings that compare equal according to the collation but are not - byte-wise equal will be sorted according to their byte values. + case or ignore accents or similar (using the + ks key), in order for such such collations to act in a + truly case- or accent-insensitive manner, they also need to be declared as not + deterministic in CREATE COLLATION. + Otherwise, any strings that compare equal according to the collation but + are not byte-wise equal will be sorted according to their byte values. diff --git a/doc/src/sgml/ref/create_collation.sgml b/doc/src/sgml/ref/create_collation.sgml index 038797fce1..def4dda6e8 100644 --- a/doc/src/sgml/ref/create_collation.sgml +++ b/doc/src/sgml/ref/create_collation.sgml @@ -23,6 +23,7 @@ [ LC_COLLATE = lc_collate, ] [ LC_CTYPE = lc_ctype, ] [ PROVIDER = provider, ] + [ DETERMINISTIC = boolean, ] [ VERSION = version ] ) CREATE COLLATION [ IF NOT EXISTS ] name FROM existing_collation @@ -124,6 +125,27 @@ Parameters + + DETERMINISTIC + + + + Specifies whether the collation should use deterministic comparisons. + The default is true. A deterministic comparison considers strings that + are not byte-wise equal to be unequal even if they are considered + logically equal by the comparison. PostgreSQL breaks ties using a + byte-wise comparison. Comparison that is not deterministic can make the + collation be, say, case- or accent-insensitive. For that, you need to + choose an appropriate LC_COLLATE setting + and set the collation to not deterministic here. + + + + Nondeterministic collations are only supported with the ICU provider. + + + + version diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index 63005ddc4d..144dd114a1 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -27,7 +27,9 @@ #include "postgres.h" #include "access/hash.h" +#include "catalog/pg_collation.h" #include "utils/builtins.h" +#include "utils/pg_locale.h" /* * Datatype-specific hash functions. @@ -242,8 +244,50 @@ Datum hashtext(PG_FUNCTION_ARGS) { text *key = PG_GETARG_TEXT_PP(0); + Oid collid = PG_GET_COLLATION(); Datum result; + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + if (collid != DEFAULT_COLLATION_OID) + { + pg_locale_t mylocale = pg_newlocale_from_collation(collid); + + if (mylocale && !mylocale->deterministic) + { + if (mylocale->provider == COLLPROVIDER_ICU) + { +#ifdef USE_ICU + int32_t ulen = -1; + UChar *uchar = NULL; + Size bsize; + uint8_t *buf; + + ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); + + bsize = ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, NULL, 0); + buf = palloc(bsize); + ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, buf, bsize); + + result = hash_any(buf, bsize); + + PG_FREE_IF_COPY(key, 0); + + return result; +#else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", mylocale->provider); +#endif + } + } + } + /* * Note: this is currently identical in behavior to hashvarlena, but keep * it as a separate function in case we someday want to do something @@ -262,8 +306,50 @@ Datum hashtextextended(PG_FUNCTION_ARGS) { text *key = PG_GETARG_TEXT_PP(0); + Oid collid = PG_GET_COLLATION(); Datum result; + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + if (collid != DEFAULT_COLLATION_OID) + { + pg_locale_t mylocale = pg_newlocale_from_collation(collid); + + if (mylocale && !mylocale->deterministic) + { + if (mylocale->provider == COLLPROVIDER_ICU) + { +#ifdef USE_ICU + int32_t ulen = -1; + UChar *uchar = NULL; + Size bsize; + uint8_t *buf; + + ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); + + bsize = ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, NULL, 0); + buf = palloc(bsize); + ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, buf, bsize); + + result = hash_any(buf, bsize); + + PG_FREE_IF_COPY(key, 0); + + return result; +#else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", mylocale->provider); +#endif + } + } + } + /* Same approach as hashtext */ result = hash_any_extended((unsigned char *) VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key), diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c index 467ab5a5e5..e7529ac1e8 100644 --- a/src/backend/catalog/pg_collation.c +++ b/src/backend/catalog/pg_collation.c @@ -47,6 +47,7 @@ Oid CollationCreate(const char *collname, Oid collnamespace, Oid collowner, char collprovider, + bool collisdeterministic, int32 collencoding, const char *collcollate, const char *collctype, const char *collversion, @@ -161,6 +162,7 @@ CollationCreate(const char *collname, Oid collnamespace, values[Anum_pg_collation_collnamespace - 1] = ObjectIdGetDatum(collnamespace); values[Anum_pg_collation_collowner - 1] = ObjectIdGetDatum(collowner); values[Anum_pg_collation_collprovider - 1] = CharGetDatum(collprovider); + values[Anum_pg_collation_collisdeterministic - 1] = BoolGetDatum(collisdeterministic); values[Anum_pg_collation_collencoding - 1] = Int32GetDatum(collencoding); namestrcpy(&name_collate, collcollate); values[Anum_pg_collation_collcollate - 1] = NameGetDatum(&name_collate); diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index e6a5c3a555..c33ab76f99 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -59,10 +59,12 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e DefElem *lccollateEl = NULL; DefElem *lcctypeEl = NULL; DefElem *providerEl = NULL; + DefElem *deterministicEl = NULL; DefElem *versionEl = NULL; char *collcollate = NULL; char *collctype = NULL; char *collproviderstr = NULL; + bool collisdeterministic = true; int collencoding = 0; char collprovider = 0; char *collversion = NULL; @@ -91,6 +93,8 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e defelp = &lcctypeEl; else if (strcmp(defel->defname, "provider") == 0) defelp = &providerEl; + else if (strcmp(defel->defname, "deterministic") == 0) + defelp = &deterministicEl; else if (strcmp(defel->defname, "version") == 0) defelp = &versionEl; else @@ -125,6 +129,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate)); collctype = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype)); collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider; + collisdeterministic = ((Form_pg_collation) GETSTRUCT(tp))->collisdeterministic; collencoding = ((Form_pg_collation) GETSTRUCT(tp))->collencoding; ReleaseSysCache(tp); @@ -157,6 +162,9 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e if (providerEl) collproviderstr = defGetString(providerEl); + if (deterministicEl) + collisdeterministic = defGetBoolean(deterministicEl); + if (versionEl) collversion = defGetString(versionEl); @@ -185,6 +193,16 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("parameter \"lc_ctype\" must be specified"))); + /* + * Nondeterministic collations are currently only supported with ICU + * because that's the only case where it can actually make a difference. + * So we can save writing the code for the other providers. + */ + if (!collisdeterministic && collprovider != COLLPROVIDER_ICU) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations not supported with this provider"))); + if (!fromEl) { if (collprovider == COLLPROVIDER_ICU) @@ -203,6 +221,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e collNamespace, GetUserId(), collprovider, + collisdeterministic, collencoding, collcollate, collctype, @@ -586,7 +605,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) * about existing ones. */ collid = CollationCreate(localebuf, nspid, GetUserId(), - COLLPROVIDER_LIBC, enc, + COLLPROVIDER_LIBC, true, enc, localebuf, localebuf, get_collation_actual_version(COLLPROVIDER_LIBC, localebuf), true, true); @@ -647,7 +666,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) int enc = aliases[i].enc; collid = CollationCreate(alias, nspid, GetUserId(), - COLLPROVIDER_LIBC, enc, + COLLPROVIDER_LIBC, true, enc, locale, locale, get_collation_actual_version(COLLPROVIDER_LIBC, locale), true, true); @@ -709,7 +728,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) collid = CollationCreate(psprintf("%s-x-icu", langtag), nspid, GetUserId(), - COLLPROVIDER_ICU, -1, + COLLPROVIDER_ICU, true, -1, collcollate, collcollate, get_collation_actual_version(COLLPROVIDER_ICU, collcollate), true, true); diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index 4047d24b03..4eada12e70 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -3316,6 +3316,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, int numCols, const AttrNumber *keyColIdx, const Oid *eqfunctions, + const Oid *collations, PlanState *parent) { ExprState *state = makeNode(ExprState); @@ -3376,6 +3377,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, Form_pg_attribute latt = TupleDescAttr(ldesc, attno - 1); Form_pg_attribute ratt = TupleDescAttr(rdesc, attno - 1); Oid foid = eqfunctions[natt]; + Oid collid = collations[natt]; FmgrInfo *finfo; FunctionCallInfo fcinfo; AclResult aclresult; @@ -3393,7 +3395,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, fmgr_info(foid, finfo); fmgr_info_set_expr(NULL, finfo); InitFunctionCallInfoData(*fcinfo, finfo, 2, - InvalidOid, NULL, NULL); + collid, NULL, NULL); /* left arg */ scratch.opcode = EEOP_INNER_VAR; diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index 4e7600e4ed..cef6323c63 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -61,6 +61,7 @@ execTuplesMatchPrepare(TupleDesc desc, int numCols, const AttrNumber *keyColIdx, const Oid *eqOperators, + const Oid *collations, PlanState *parent) { Oid *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid)); @@ -76,7 +77,7 @@ execTuplesMatchPrepare(TupleDesc desc, /* build actual expression */ expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL, - numCols, keyColIdx, eqFunctions, + numCols, keyColIdx, eqFunctions, collations, parent); return expr; @@ -155,6 +156,7 @@ BuildTupleHashTable(PlanState *parent, int numCols, AttrNumber *keyColIdx, const Oid *eqfuncoids, FmgrInfo *hashfunctions, + Oid *collations, long nbuckets, Size additionalsize, MemoryContext tablecxt, MemoryContext tempcxt, bool use_variable_hash_iv) @@ -174,6 +176,7 @@ BuildTupleHashTable(PlanState *parent, hashtable->numCols = numCols; hashtable->keyColIdx = keyColIdx; hashtable->tab_hash_funcs = hashfunctions; + hashtable->tab_collations = collations; hashtable->tablecxt = tablecxt; hashtable->tempcxt = tempcxt; hashtable->entrysize = entrysize; @@ -211,7 +214,7 @@ BuildTupleHashTable(PlanState *parent, hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc, &TTSOpsMinimalTuple, &TTSOpsMinimalTuple, numCols, - keyColIdx, eqfuncoids, + keyColIdx, eqfuncoids, collations, parent); MemoryContextSwitchTo(oldcontext); @@ -374,8 +377,9 @@ TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple) { uint32 hkey; - hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i], - attr)); + hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], + hashtable->tab_collations[i], + attr)); hashkey ^= hkey; } } diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 7415dfa45e..f83b5787fe 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -1218,6 +1218,7 @@ get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull) greatest_modulus = get_hash_partition_greatest_modulus(boundinfo); rowHash = compute_partition_hash_value(key->partnatts, key->partsupfunc, + key->partcollation, values, isnull); part_index = boundinfo->indexes[rowHash % greatest_modulus]; diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 508c919574..1783216e63 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -1283,6 +1283,7 @@ build_hash_table(AggState *aggstate) perhash->hashGrpColIdxHash, perhash->eqfuncoids, perhash->hashfunctions, + perhash->aggnode->grpCollations, perhash->aggnode->numGroups, additionalsize, aggstate->hashcontext->ecxt_per_tuple_memory, @@ -2376,6 +2377,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) length, aggnode->grpColIdx, aggnode->grpOperators, + aggnode->grpCollations, (PlanState *) aggstate); } @@ -2387,6 +2389,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) aggnode->numCols, aggnode->grpColIdx, aggnode->grpOperators, + aggnode->grpCollations, (PlanState *) aggstate); } } @@ -3141,6 +3144,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans, numDistinctCols, pertrans->sortColIdx, ops, + pertrans->sortCollations, &aggstate->ss.ps); pfree(ops); } diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c index 655084d7b5..05f1d33150 100644 --- a/src/backend/executor/nodeGroup.c +++ b/src/backend/executor/nodeGroup.c @@ -212,6 +212,7 @@ ExecInitGroup(Group *node, EState *estate, int eflags) node->numCols, node->grpColIdx, node->grpOperators, + node->grpCollations, &grpstate->ss.ps); return grpstate; diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 856daf6a7f..64eec91f8b 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -425,7 +425,7 @@ ExecEndHash(HashState *node) * ---------------------------------------------------------------- */ HashJoinTable -ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls) +ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, bool keepNulls) { Hash *node; HashJoinTable hashtable; @@ -439,6 +439,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls) int nkeys; int i; ListCell *ho; + ListCell *hc; MemoryContext oldcxt; /* @@ -541,8 +542,9 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls) hashtable->inner_hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool)); + hashtable->collations = (Oid *) palloc(nkeys * sizeof(Oid)); i = 0; - foreach(ho, hashOperators) + forboth(ho, hashOperators, hc, hashCollations) { Oid hashop = lfirst_oid(ho); Oid left_hashfn; @@ -554,6 +556,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls) fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]); fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]); hashtable->hashStrict[i] = op_strict(hashop); + hashtable->collations[i] = lfirst_oid(hc); i++; } @@ -1847,7 +1850,7 @@ ExecHashGetHashValue(HashJoinTable hashtable, /* Compute the hash function */ uint32 hkey; - hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i], keyval)); + hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], hashtable->collations[i], keyval)); hashkey ^= hkey; } @@ -2303,8 +2306,9 @@ ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse) uint32 hashvalue; int bucket; - hashvalue = DatumGetUInt32(FunctionCall1(&hashfunctions[0], - sslot.values[i])); + hashvalue = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[0], + hashtable->collations[0], + sslot.values[i])); /* * While we have not hit a hole in the hashtable and have not hit diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 2098708864..aa43296e26 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -278,6 +278,7 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel) */ hashtable = ExecHashTableCreate(hashNode, node->hj_HashOperators, + node->hj_Collations, HJ_FILL_INNER(node)); node->hj_HashTable = hashtable; @@ -603,6 +604,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) List *rclauses; List *rhclauses; List *hoperators; + List *hcollations; TupleDesc outerDesc, innerDesc; ListCell *l; @@ -738,6 +740,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) rclauses = NIL; rhclauses = NIL; hoperators = NIL; + hcollations = NIL; foreach(l, node->hashclauses) { OpExpr *hclause = lfirst_node(OpExpr, l); @@ -749,10 +752,12 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) rhclauses = lappend(rhclauses, ExecInitExpr(lsecond(hclause->args), innerPlanState(hjstate))); hoperators = lappend_oid(hoperators, hclause->opno); + hcollations = lappend_oid(hcollations, hclause->inputcollid); } hjstate->hj_OuterHashKeys = lclauses; hjstate->hj_InnerHashKeys = rclauses; hjstate->hj_HashOperators = hoperators; + hjstate->hj_Collations = hcollations; /* child Hash node needs to evaluate inner hash keys, too */ ((HashState *) innerPlanState(hjstate))->hashkeys = rhclauses; diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c index d2488ad988..bef2c7fbdc 100644 --- a/src/backend/executor/nodeRecursiveunion.c +++ b/src/backend/executor/nodeRecursiveunion.c @@ -43,6 +43,7 @@ build_hash_table(RecursiveUnionState *rustate) node->dupColIdx, rustate->eqfuncoids, rustate->hashfunctions, + node->dupCollations, node->numGroups, 0, rustate->tableContext, diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c index 5d8c8b8b02..5a86bbcc95 100644 --- a/src/backend/executor/nodeSetOp.c +++ b/src/backend/executor/nodeSetOp.c @@ -132,6 +132,7 @@ build_hash_table(SetOpState *setopstate) node->dupColIdx, setopstate->eqfuncoids, setopstate->hashfunctions, + node->dupCollations, node->numGroups, 0, setopstate->tableContext, @@ -553,6 +554,7 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags) node->numCols, node->dupColIdx, node->dupOperators, + node->dupCollations, &setopstate->ps); if (node->strategy == SETOP_HASHED) diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index 574e7bc4fa..1e7db81c7f 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -511,6 +511,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) node->keyColIdx, node->tab_eq_funcoids, node->tab_hash_funcs, + node->tab_collations, nbuckets, 0, node->hashtablecxt, @@ -533,6 +534,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) node->keyColIdx, node->tab_eq_funcoids, node->tab_hash_funcs, + node->tab_collations, nbuckets, 0, node->hashtablecxt, @@ -808,6 +810,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) sstate->tab_eq_funcoids = NULL; sstate->tab_hash_funcs = NULL; sstate->tab_eq_funcs = NULL; + sstate->tab_collations = NULL; sstate->lhs_hash_funcs = NULL; sstate->cur_eq_funcs = NULL; @@ -906,6 +909,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) sstate->tab_eq_funcoids = (Oid *) palloc(ncols * sizeof(Oid)); sstate->tab_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); sstate->tab_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); + sstate->tab_collations = (Oid *) palloc(ncols * sizeof(Oid)); sstate->lhs_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); sstate->cur_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); i = 1; @@ -956,6 +960,9 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) fmgr_info(left_hashfn, &sstate->lhs_hash_funcs[i - 1]); fmgr_info(right_hashfn, &sstate->tab_hash_funcs[i - 1]); + /* Set collation */ + sstate->tab_collations[i - 1] = opexpr->inputcollid; + i++; } @@ -992,6 +999,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) ncols, sstate->keyColIdx, sstate->tab_eq_funcoids, + sstate->tab_collations, parent); } diff --git a/src/backend/executor/nodeUnique.c b/src/backend/executor/nodeUnique.c index ad7039937d..c553f150b8 100644 --- a/src/backend/executor/nodeUnique.c +++ b/src/backend/executor/nodeUnique.c @@ -152,6 +152,7 @@ ExecInitUnique(Unique *node, EState *estate, int eflags) node->numCols, node->uniqColIdx, node->uniqOperators, + node->uniqCollations, &uniquestate->ps); return uniquestate; diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index 7ae56074ca..4942003e82 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -2370,6 +2370,7 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) node->partNumCols, node->partColIdx, node->partOperators, + node->partCollations, &winstate->ss.ps); if (node->ordNumCols > 0) @@ -2378,6 +2379,7 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) node->ordNumCols, node->ordColIdx, node->ordOperators, + node->ordCollations, &winstate->ss.ps); /* diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 006a3d1772..63dc0dacf9 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -297,6 +297,7 @@ _copyRecursiveUnion(const RecursiveUnion *from) { COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(dupCollations, from->numCols * sizeof(Oid)); } COPY_SCALAR_FIELD(numGroups); @@ -956,6 +957,7 @@ _copyGroup(const Group *from) COPY_SCALAR_FIELD(numCols); COPY_POINTER_FIELD(grpColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(grpCollations, from->numCols * sizeof(Oid)); return newnode; } @@ -977,6 +979,7 @@ _copyAgg(const Agg *from) { COPY_POINTER_FIELD(grpColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(grpCollations, from->numCols * sizeof(Oid)); } COPY_SCALAR_FIELD(numGroups); COPY_BITMAPSET_FIELD(aggParams); @@ -1002,12 +1005,14 @@ _copyWindowAgg(const WindowAgg *from) { COPY_POINTER_FIELD(partColIdx, from->partNumCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(partOperators, from->partNumCols * sizeof(Oid)); + COPY_POINTER_FIELD(partCollations, from->partNumCols * sizeof(Oid)); } COPY_SCALAR_FIELD(ordNumCols); if (from->ordNumCols > 0) { COPY_POINTER_FIELD(ordColIdx, from->ordNumCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(ordOperators, from->ordNumCols * sizeof(Oid)); + COPY_POINTER_FIELD(ordCollations, from->ordNumCols * sizeof(Oid)); } COPY_SCALAR_FIELD(frameOptions); COPY_NODE_FIELD(startOffset); @@ -1040,6 +1045,7 @@ _copyUnique(const Unique *from) COPY_SCALAR_FIELD(numCols); COPY_POINTER_FIELD(uniqColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(uniqOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(uniqCollations, from->numCols * sizeof(Oid)); return newnode; } @@ -1089,6 +1095,7 @@ _copySetOp(const SetOp *from) COPY_SCALAR_FIELD(numCols); COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(dupCollations, from->numCols * sizeof(Oid)); COPY_SCALAR_FIELD(flagColIdx); COPY_SCALAR_FIELD(firstFlag); COPY_SCALAR_FIELD(numGroups); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 0fde876c77..0af43aeb47 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -463,6 +463,7 @@ _outRecursiveUnion(StringInfo str, const RecursiveUnion *node) WRITE_INT_FIELD(numCols); WRITE_ATTRNUMBER_ARRAY(dupColIdx, node->numCols); WRITE_OID_ARRAY(dupOperators, node->numCols); + WRITE_OID_ARRAY(dupCollations, node->numCols); WRITE_LONG_FIELD(numGroups); } @@ -774,6 +775,7 @@ _outAgg(StringInfo str, const Agg *node) WRITE_INT_FIELD(numCols); WRITE_ATTRNUMBER_ARRAY(grpColIdx, node->numCols); WRITE_OID_ARRAY(grpOperators, node->numCols); + WRITE_OID_ARRAY(grpCollations, node->numCols); WRITE_LONG_FIELD(numGroups); WRITE_BITMAPSET_FIELD(aggParams); WRITE_NODE_FIELD(groupingSets); @@ -791,9 +793,11 @@ _outWindowAgg(StringInfo str, const WindowAgg *node) WRITE_INT_FIELD(partNumCols); WRITE_ATTRNUMBER_ARRAY(partColIdx, node->partNumCols); WRITE_OID_ARRAY(partOperators, node->partNumCols); + WRITE_OID_ARRAY(partCollations, node->partNumCols); WRITE_INT_FIELD(ordNumCols); WRITE_ATTRNUMBER_ARRAY(ordColIdx, node->ordNumCols); WRITE_OID_ARRAY(ordOperators, node->ordNumCols); + WRITE_OID_ARRAY(ordCollations, node->ordNumCols); WRITE_INT_FIELD(frameOptions); WRITE_NODE_FIELD(startOffset); WRITE_NODE_FIELD(endOffset); @@ -814,6 +818,7 @@ _outGroup(StringInfo str, const Group *node) WRITE_INT_FIELD(numCols); WRITE_ATTRNUMBER_ARRAY(grpColIdx, node->numCols); WRITE_OID_ARRAY(grpOperators, node->numCols); + WRITE_OID_ARRAY(grpCollations, node->numCols); } static void @@ -848,6 +853,7 @@ _outUnique(StringInfo str, const Unique *node) WRITE_INT_FIELD(numCols); WRITE_ATTRNUMBER_ARRAY(uniqColIdx, node->numCols); WRITE_OID_ARRAY(uniqOperators, node->numCols); + WRITE_OID_ARRAY(uniqCollations, node->numCols); } static void @@ -875,6 +881,7 @@ _outSetOp(StringInfo str, const SetOp *node) WRITE_INT_FIELD(numCols); WRITE_ATTRNUMBER_ARRAY(dupColIdx, node->numCols); WRITE_OID_ARRAY(dupOperators, node->numCols); + WRITE_OID_ARRAY(dupCollations, node->numCols); WRITE_INT_FIELD(flagColIdx); WRITE_INT_FIELD(firstFlag); WRITE_LONG_FIELD(numGroups); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index ec6f2569ab..7e40c2990b 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1672,6 +1672,7 @@ _readRecursiveUnion(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols); READ_OID_ARRAY(dupOperators, local_node->numCols); + READ_OID_ARRAY(dupCollations, local_node->numCols); READ_LONG_FIELD(numGroups); READ_DONE(); @@ -2138,6 +2139,7 @@ _readGroup(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols); READ_OID_ARRAY(grpOperators, local_node->numCols); + READ_OID_ARRAY(grpCollations, local_node->numCols); READ_DONE(); } @@ -2157,6 +2159,7 @@ _readAgg(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols); READ_OID_ARRAY(grpOperators, local_node->numCols); + READ_OID_ARRAY(grpCollations, local_node->numCols); READ_LONG_FIELD(numGroups); READ_BITMAPSET_FIELD(aggParams); READ_NODE_FIELD(groupingSets); @@ -2179,9 +2182,11 @@ _readWindowAgg(void) READ_INT_FIELD(partNumCols); READ_ATTRNUMBER_ARRAY(partColIdx, local_node->partNumCols); READ_OID_ARRAY(partOperators, local_node->partNumCols); + READ_OID_ARRAY(partCollations, local_node->partNumCols); READ_INT_FIELD(ordNumCols); READ_ATTRNUMBER_ARRAY(ordColIdx, local_node->ordNumCols); READ_OID_ARRAY(ordOperators, local_node->ordNumCols); + READ_OID_ARRAY(ordCollations, local_node->ordNumCols); READ_INT_FIELD(frameOptions); READ_NODE_FIELD(startOffset); READ_NODE_FIELD(endOffset); @@ -2207,6 +2212,7 @@ _readUnique(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(uniqColIdx, local_node->numCols); READ_OID_ARRAY(uniqOperators, local_node->numCols); + READ_OID_ARRAY(uniqCollations, local_node->numCols); READ_DONE(); } @@ -2285,6 +2291,7 @@ _readSetOp(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols); READ_OID_ARRAY(dupOperators, local_node->numCols); + READ_OID_ARRAY(dupCollations, local_node->numCols); READ_INT_FIELD(flagColIdx); READ_INT_FIELD(firstFlag); READ_LONG_FIELD(numGroups); diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 97d0c28132..61e9dc5938 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -253,14 +253,14 @@ static Sort *make_sort_from_groupcols(List *groupcls, Plan *lefttree); static Material *make_material(Plan *lefttree); static WindowAgg *make_windowagg(List *tlist, Index winref, - int partNumCols, AttrNumber *partColIdx, Oid *partOperators, - int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, + int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, + int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, int frameOptions, Node *startOffset, Node *endOffset, Oid startInRangeFunc, Oid endInRangeFunc, Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst, Plan *lefttree); static Group *make_group(List *tlist, List *qual, int numGroupCols, - AttrNumber *grpColIdx, Oid *grpOperators, + AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, Plan *lefttree); static Unique *make_unique_from_sortclauses(Plan *lefttree, List *distinctList); static Unique *make_unique_from_pathkeys(Plan *lefttree, @@ -1352,6 +1352,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) bool newitems; int numGroupCols; AttrNumber *groupColIdx; + Oid *groupCollations; int groupColPos; ListCell *l; @@ -1418,6 +1419,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) newtlist = subplan->targetlist; numGroupCols = list_length(uniq_exprs); groupColIdx = (AttrNumber *) palloc(numGroupCols * sizeof(AttrNumber)); + groupCollations = (Oid *) palloc(numGroupCols * sizeof(Oid)); groupColPos = 0; foreach(l, uniq_exprs) @@ -1428,7 +1430,9 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) tle = tlist_member(uniqexpr, newtlist); if (!tle) /* shouldn't happen */ elog(ERROR, "failed to find unique expression in subplan tlist"); - groupColIdx[groupColPos++] = tle->resno; + groupColIdx[groupColPos] = tle->resno; + groupCollations[groupColPos] = exprCollation((Node *) tle->expr); + groupColPos++; } if (best_path->umethod == UNIQUE_PATH_HASH) @@ -1466,6 +1470,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) numGroupCols, groupColIdx, groupOperators, + groupCollations, NIL, NIL, best_path->path.rows, @@ -1848,6 +1853,8 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) extract_grouping_cols(best_path->groupClause, subplan->targetlist), extract_grouping_ops(best_path->groupClause), + extract_grouping_collations(best_path->groupClause, + subplan->targetlist), subplan); copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -1914,6 +1921,8 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) extract_grouping_cols(best_path->groupClause, subplan->targetlist), extract_grouping_ops(best_path->groupClause), + extract_grouping_collations(best_path->groupClause, + subplan->targetlist), NIL, NIL, best_path->numGroups, @@ -2075,6 +2084,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) list_length((List *) linitial(rollup->gsets)), new_grpColIdx, extract_grouping_ops(rollup->groupClause), + extract_grouping_collations(rollup->groupClause, subplan->targetlist), rollup->gsets, NIL, rollup->numGroups, @@ -2112,6 +2122,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) numGroupCols, top_grpColIdx, extract_grouping_ops(rollup->groupClause), + extract_grouping_collations(rollup->groupClause, subplan->targetlist), rollup->gsets, chain, rollup->numGroups, @@ -2211,9 +2222,11 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) int partNumCols; AttrNumber *partColIdx; Oid *partOperators; + Oid *partCollations; int ordNumCols; AttrNumber *ordColIdx; Oid *ordOperators; + Oid *ordCollations; ListCell *lc; /* @@ -2235,6 +2248,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) */ partColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numPart); partOperators = (Oid *) palloc(sizeof(Oid) * numPart); + partCollations = (Oid *) palloc(sizeof(Oid) * numPart); partNumCols = 0; foreach(lc, wc->partitionClause) @@ -2245,11 +2259,13 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) Assert(OidIsValid(sgc->eqop)); partColIdx[partNumCols] = tle->resno; partOperators[partNumCols] = sgc->eqop; + partCollations[partNumCols] = exprCollation((Node *) tle->expr); partNumCols++; } ordColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numOrder); ordOperators = (Oid *) palloc(sizeof(Oid) * numOrder); + ordCollations = (Oid *) palloc(sizeof(Oid) * numOrder); ordNumCols = 0; foreach(lc, wc->orderClause) @@ -2260,6 +2276,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) Assert(OidIsValid(sgc->eqop)); ordColIdx[ordNumCols] = tle->resno; ordOperators[ordNumCols] = sgc->eqop; + ordCollations[ordNumCols] = exprCollation((Node *) tle->expr); ordNumCols++; } @@ -2269,9 +2286,11 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) partNumCols, partColIdx, partOperators, + partCollations, ordNumCols, ordColIdx, ordOperators, + ordCollations, wc->frameOptions, wc->startOffset, wc->endOffset, @@ -5292,10 +5311,12 @@ make_recursive_union(List *tlist, int keyno = 0; AttrNumber *dupColIdx; Oid *dupOperators; + Oid *dupCollations; ListCell *slitem; dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); dupOperators = (Oid *) palloc(sizeof(Oid) * numCols); + dupCollations = (Oid *) palloc(sizeof(Oid) * numCols); foreach(slitem, distinctList) { @@ -5305,11 +5326,13 @@ make_recursive_union(List *tlist, dupColIdx[keyno] = tle->resno; dupOperators[keyno] = sortcl->eqop; + dupCollations[keyno] = exprCollation((Node *) tle->expr); Assert(OidIsValid(dupOperators[keyno])); keyno++; } node->dupColIdx = dupColIdx; node->dupOperators = dupOperators; + node->dupCollations = dupCollations; } node->numGroups = numGroups; @@ -5981,7 +6004,7 @@ materialize_finished_plan(Plan *subplan) Agg * make_agg(List *tlist, List *qual, AggStrategy aggstrategy, AggSplit aggsplit, - int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, + int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, List *groupingSets, List *chain, double dNumGroups, Plan *lefttree) { @@ -5997,6 +6020,7 @@ make_agg(List *tlist, List *qual, node->numCols = numGroupCols; node->grpColIdx = grpColIdx; node->grpOperators = grpOperators; + node->grpCollations = grpCollations; node->numGroups = numGroups; node->aggParams = NULL; /* SS_finalize_plan() will fill this */ node->groupingSets = groupingSets; @@ -6012,8 +6036,8 @@ make_agg(List *tlist, List *qual, static WindowAgg * make_windowagg(List *tlist, Index winref, - int partNumCols, AttrNumber *partColIdx, Oid *partOperators, - int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, + int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, + int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, int frameOptions, Node *startOffset, Node *endOffset, Oid startInRangeFunc, Oid endInRangeFunc, Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst, @@ -6026,9 +6050,11 @@ make_windowagg(List *tlist, Index winref, node->partNumCols = partNumCols; node->partColIdx = partColIdx; node->partOperators = partOperators; + node->partCollations = partCollations; node->ordNumCols = ordNumCols; node->ordColIdx = ordColIdx; node->ordOperators = ordOperators; + node->ordCollations = ordCollations; node->frameOptions = frameOptions; node->startOffset = startOffset; node->endOffset = endOffset; @@ -6053,6 +6079,7 @@ make_group(List *tlist, int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, + Oid *grpCollations, Plan *lefttree) { Group *node = makeNode(Group); @@ -6061,6 +6088,7 @@ make_group(List *tlist, node->numCols = numGroupCols; node->grpColIdx = grpColIdx; node->grpOperators = grpOperators; + node->grpCollations = grpCollations; plan->qual = qual; plan->targetlist = tlist; @@ -6084,6 +6112,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) int keyno = 0; AttrNumber *uniqColIdx; Oid *uniqOperators; + Oid *uniqCollations; ListCell *slitem; plan->targetlist = lefttree->targetlist; @@ -6098,6 +6127,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) Assert(numCols > 0); uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols); + uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols); foreach(slitem, distinctList) { @@ -6106,6 +6136,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) uniqColIdx[keyno] = tle->resno; uniqOperators[keyno] = sortcl->eqop; + uniqCollations[keyno] = exprCollation((Node *) tle->expr); Assert(OidIsValid(uniqOperators[keyno])); keyno++; } @@ -6113,6 +6144,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) node->numCols = numCols; node->uniqColIdx = uniqColIdx; node->uniqOperators = uniqOperators; + node->uniqCollations = uniqCollations; return node; } @@ -6128,6 +6160,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) int keyno = 0; AttrNumber *uniqColIdx; Oid *uniqOperators; + Oid *uniqCollations; ListCell *lc; plan->targetlist = lefttree->targetlist; @@ -6143,6 +6176,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) Assert(numCols >= 0 && numCols <= list_length(pathkeys)); uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols); + uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols); foreach(lc, pathkeys) { @@ -6211,6 +6245,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) uniqColIdx[keyno] = tle->resno; uniqOperators[keyno] = eqop; + uniqCollations[keyno] = ec->ec_collation; keyno++; } @@ -6218,6 +6253,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) node->numCols = numCols; node->uniqColIdx = uniqColIdx; node->uniqOperators = uniqOperators; + node->uniqCollations = uniqCollations; return node; } @@ -6262,6 +6298,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, int keyno = 0; AttrNumber *dupColIdx; Oid *dupOperators; + Oid *dupCollations; ListCell *slitem; plan->targetlist = lefttree->targetlist; @@ -6275,6 +6312,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, */ dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); dupOperators = (Oid *) palloc(sizeof(Oid) * numCols); + dupCollations = (Oid *) palloc(sizeof(Oid) * numCols); foreach(slitem, distinctList) { @@ -6283,6 +6321,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, dupColIdx[keyno] = tle->resno; dupOperators[keyno] = sortcl->eqop; + dupCollations[keyno] = exprCollation((Node *) tle->expr); Assert(OidIsValid(dupOperators[keyno])); keyno++; } @@ -6292,6 +6331,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, node->numCols = numCols; node->dupColIdx = dupColIdx; node->dupOperators = dupOperators; + node->dupCollations = dupCollations; node->flagColIdx = flagColIdx; node->firstFlag = firstFlag; node->numGroups = numGroups; diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index d0cc14f11d..a673819890 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -502,6 +502,31 @@ extract_grouping_ops(List *groupClause) return groupOperators; } +/* + * extract_grouping_collations - make an array of the grouping column collations + * for a SortGroupClause list + */ +Oid * +extract_grouping_collations(List *groupClause, List *tlist) +{ + int numCols = list_length(groupClause); + int colno = 0; + Oid *grpCollations; + ListCell *glitem; + + grpCollations = (Oid *) palloc(sizeof(Oid) * numCols); + + foreach(glitem, groupClause) + { + SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); + TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist); + + grpCollations[colno++] = exprCollation((Node *) tle->expr); + } + + return grpCollations; +} + /* * extract_grouping_cols - make an array of the grouping column resnos * for a SortGroupClause list diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c index 60993c3a7a..7898f4a043 100644 --- a/src/backend/partitioning/partbounds.c +++ b/src/backend/partitioning/partbounds.c @@ -2658,7 +2658,7 @@ get_range_nulltest(PartitionKey key) * Compute the hash value for given partition key values. */ uint64 -compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, +compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation, Datum *values, bool *isnull) { int i; @@ -2679,7 +2679,7 @@ compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, * datatype-specific hash functions of each partition key * attribute. */ - hash = FunctionCall2(&partsupfunc[i], values[i], seed); + hash = FunctionCall2Coll(&partsupfunc[i], partcollation[i], values[i], seed); /* Form a single 64-bit hash value */ rowHash = hash_combine64(rowHash, DatumGetUInt64(hash)); diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index 901433c68c..ee2cce452e 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -2158,6 +2158,7 @@ get_matching_hash_bounds(PartitionPruneContext *context, int i; uint64 rowHash; int greatest_modulus; + Oid *partcollation = context->partcollation; Assert(context->strategy == PARTITION_STRATEGY_HASH); @@ -2178,7 +2179,7 @@ get_matching_hash_bounds(PartitionPruneContext *context, isnull[i] = bms_is_member(i, nullkeys); greatest_modulus = get_hash_partition_greatest_modulus(boundinfo); - rowHash = compute_partition_hash_value(partnatts, partsupfunc, + rowHash = compute_partition_hash_value(partnatts, partsupfunc, partcollation, values, isnull); if (partindices[rowHash % greatest_modulus] >= 0) diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index e457d81f23..f6567075ba 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -3954,7 +3954,7 @@ hash_array(PG_FUNCTION_ARGS) * apply the hash function to each array element. */ InitFunctionCallInfoData(locfcinfo, &typentry->hash_proc_finfo, 1, - InvalidOid, NULL, NULL); + PG_GET_COLLATION(), NULL, NULL); /* Loop over source data */ nitems = ArrayGetNItems(ndims, dims); diff --git a/src/backend/utils/adt/name.c b/src/backend/utils/adt/name.c index 3a7887d455..54425925ed 100644 --- a/src/backend/utils/adt/name.c +++ b/src/backend/utils/adt/name.c @@ -131,14 +131,26 @@ namesend(PG_FUNCTION_ARGS) * have a '\0' terminator. Whatever might be past the terminator is not * considered relevant to comparisons. */ +static int +namecmp(Name arg1, Name arg2, Oid collid) +{ + /* Fast path for common case used in system catalogs */ + if (collid == C_COLLATION_OID) + return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN); + + /* Else rely on the varstr infrastructure */ + return varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)), + NameStr(*arg2), strlen(NameStr(*arg2)), + collid); +} + Datum nameeq(PG_FUNCTION_ARGS) { Name arg1 = PG_GETARG_NAME(0); Name arg2 = PG_GETARG_NAME(1); - /* Collation doesn't matter: equal only if bitwise-equal */ - PG_RETURN_BOOL(strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) == 0); + PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) == 0); } Datum @@ -147,21 +159,7 @@ namene(PG_FUNCTION_ARGS) Name arg1 = PG_GETARG_NAME(0); Name arg2 = PG_GETARG_NAME(1); - /* Collation doesn't matter: equal only if bitwise-equal */ - PG_RETURN_BOOL(strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN) != 0); -} - -static int -namecmp(Name arg1, Name arg2, Oid collid) -{ - /* Fast path for common case used in system catalogs */ - if (collid == C_COLLATION_OID) - return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN); - - /* Else rely on the varstr infrastructure */ - return varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)), - NameStr(*arg2), strlen(NameStr(*arg2)), - collid); + PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) != 0); } Datum diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c index 6075bc3db7..0165b23a6e 100644 --- a/src/backend/utils/adt/orderedsetaggs.c +++ b/src/backend/utils/adt/orderedsetaggs.c @@ -1084,7 +1084,7 @@ mode_final(PG_FUNCTION_ARGS) last_abbrev_val = abbrev_val; } else if (abbrev_val == last_abbrev_val && - DatumGetBool(FunctionCall2(equalfn, val, last_val))) + DatumGetBool(FunctionCall2Coll(equalfn, PG_GET_COLLATION(), val, last_val))) { /* value equal to previous value, count it */ if (last_val_is_mode) @@ -1345,6 +1345,7 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS) numDistinctCols, sortColIdx, osastate->qstate->eqOperators, + osastate->qstate->sortCollations, NULL); MemoryContextSwitchTo(oldContext); osastate->qstate->compareTuple = compareTuple; diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 50b8b31645..1f7fdc0593 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1308,6 +1308,7 @@ pg_newlocale_from_collation(Oid collid) /* We'll fill in the result struct locally before allocating memory */ memset(&result, 0, sizeof(result)); result.provider = collform->collprovider; + result.deterministic = collform->collisdeterministic; if (collform->collprovider == COLLPROVIDER_LIBC) { diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index e606eb342f..d1c6a0100d 100644 --- a/src/backend/utils/adt/ri_triggers.c +++ b/src/backend/utils/adt/ri_triggers.c @@ -778,6 +778,8 @@ ri_restrict(TriggerData *trigdata, bool is_no_action) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -786,6 +788,8 @@ ri_restrict(TriggerData *trigdata, bool is_no_action) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); querysep = "AND"; queryoids[i] = pk_type; } @@ -918,6 +922,8 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -926,6 +932,8 @@ RI_FKey_cascade_del(PG_FUNCTION_ARGS) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); querysep = "AND"; queryoids[i] = pk_type; } @@ -1069,6 +1077,8 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -1080,6 +1090,8 @@ RI_FKey_cascade_upd(PG_FUNCTION_ARGS) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); querysep = ","; qualsep = "AND"; queryoids[i] = pk_type; @@ -1257,6 +1269,8 @@ ri_setnull(TriggerData *trigdata) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -1268,6 +1282,8 @@ ri_setnull(TriggerData *trigdata) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); querysep = ","; qualsep = "AND"; queryoids[i] = pk_type; @@ -1444,6 +1460,8 @@ ri_setdefault(TriggerData *trigdata) { Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]); Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]); + Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]); + Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]); quoteOneName(attname, RIAttName(fk_rel, riinfo->fk_attnums[i])); @@ -1455,6 +1473,8 @@ ri_setdefault(TriggerData *trigdata) paramname, pk_type, riinfo->pf_eq_oprs[i], attname, fk_type); + if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll)) + ri_GenerateQualCollation(&querybuf, pk_coll); querysep = ","; qualsep = "AND"; queryoids[i] = pk_type; diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c index 5cf927e27f..b50f441c8f 100644 --- a/src/backend/utils/adt/varchar.c +++ b/src/backend/utils/adt/varchar.c @@ -933,10 +933,20 @@ Datum hashbpchar(PG_FUNCTION_ARGS) { BpChar *key = PG_GETARG_BPCHAR_PP(0); + Oid collid = PG_GET_COLLATION(); char *keydata; int keylen; Datum result; + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + if (collid != DEFAULT_COLLATION_OID) + elog(ERROR, "TODO"); + keydata = VARDATA_ANY(key); keylen = bcTruelen(key); @@ -952,10 +962,20 @@ Datum hashbpcharextended(PG_FUNCTION_ARGS) { BpChar *key = PG_GETARG_BPCHAR_PP(0); + Oid collid = PG_GET_COLLATION(); char *keydata; int keylen; Datum result; + if (!collid) + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for string hashing"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + + if (collid != DEFAULT_COLLATION_OID) + elog(ERROR, "TODO"); + keydata = VARDATA_ANY(key); keylen = bcTruelen(key); diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 801d8c5c29..66b0e5fe99 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -1430,7 +1430,8 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) * equal strings in the input - then we win big by avoiding expensive * collation-aware comparisons. */ - if (len1 == len2 && memcmp(arg1, arg2, len1) == 0) + if ((!mylocale || (mylocale && mylocale->deterministic)) && + len1 == len2 && memcmp(arg1, arg2, len1) == 0) return 0; #ifdef WIN32 @@ -1508,7 +1509,8 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) * reasons, so we follow Perl's lead and sort "equal" strings * according to strcmp (on the UTF-8 representation). */ - if (result == 0) + if (result == 0 && + (!mylocale || (mylocale && mylocale->deterministic))) { result = memcmp(arg1, arg2, Min(len1, len2)); if ((result == 0) && (len1 != len2)) @@ -1599,7 +1601,8 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) * so we follow Perl's lead and sort "equal" strings according to * strcmp(). */ - if (result == 0) + if (result == 0 && + (!mylocale || (mylocale && mylocale->deterministic))) result = strcmp(a1p, a2p); if (a1p != a1buf) @@ -1643,6 +1646,23 @@ text_cmp(text *arg1, text *arg2, Oid collid) Datum texteq(PG_FUNCTION_ARGS) { + if (PG_GET_COLLATION() && + PG_GET_COLLATION() != DEFAULT_COLLATION_OID && + !pg_newlocale_from_collation(PG_GET_COLLATION())->deterministic) + { + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + bool result; + + result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) == 0); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); + } + else + { Datum arg1 = PG_GETARG_DATUM(0); Datum arg2 = PG_GETARG_DATUM(1); bool result; @@ -1673,11 +1693,29 @@ texteq(PG_FUNCTION_ARGS) } PG_RETURN_BOOL(result); + } } Datum textne(PG_FUNCTION_ARGS) { + if (PG_GET_COLLATION() && + PG_GET_COLLATION() != DEFAULT_COLLATION_OID && + !pg_newlocale_from_collation(PG_GET_COLLATION())->deterministic) + { + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + bool result; + + result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) != 0); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); + } + else + { Datum arg1 = PG_GETARG_DATUM(0); Datum arg2 = PG_GETARG_DATUM(1); bool result; @@ -1702,6 +1740,7 @@ textne(PG_FUNCTION_ARGS) } PG_RETURN_BOOL(result); + } } Datum @@ -2142,7 +2181,8 @@ varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup) bool arg1_match; /* Fast pre-check for equality, as discussed in varstr_cmp() */ - if (len1 == len2 && memcmp(a1p, a2p, len1) == 0) + if ((!sss->locale || (sss->locale && sss->locale->deterministic)) && + len1 == len2 && memcmp(a1p, a2p, len1) == 0) { /* * No change in buf1 or buf2 contents, so avoid changing last_len1 or @@ -2277,7 +2317,8 @@ varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup) * equal. Believing that would be bad news for a number of reasons, so we * follow Perl's lead and sort "equal" strings according to strcmp(). */ - if (result == 0) + if (result == 0 && + (!sss->locale || (sss->locale && sss->locale->deterministic))) result = strcmp(sss->buf1, sss->buf2); /* Cache result, perhaps saving an expensive strcoll() call next time */ diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 8152f7e21e..2097e20945 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -188,7 +188,7 @@ texteqfast(Datum a, Datum b) static uint32 texthashfast(Datum datum) { - return DatumGetInt32(DirectFunctionCall1(hashtext, datum)); + return DatumGetInt32(DirectFunctionCall1Coll(hashtext, DEFAULT_COLLATION_OID, datum)); } static bool diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index fba0ee8b84..f3bbaed992 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -908,6 +908,22 @@ get_collation_name(Oid colloid) return NULL; } +bool +get_collation_isdeterministic(Oid colloid) +{ + HeapTuple tp; + Form_pg_collation colltup; + bool result; + + tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(colloid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for collation %u", colloid); + colltup = (Form_pg_collation) GETSTRUCT(tp); + result = colltup->collisdeterministic; + ReleaseSysCache(tp); + return result; +} + /* ---------- CONSTRAINT CACHE ---------- */ /* diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index e55ba668ce..7df4e85e2f 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -1765,8 +1765,8 @@ setup_collation(FILE *cmdfd) * in pg_collation.h. But add it before reading system collations, so * that it wins if libc defines a locale named ucs_basic. */ - PG_CMD_PRINTF3("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collencoding, collcollate, collctype)" - "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', %d, 'C', 'C');\n\n", + PG_CMD_PRINTF3("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, collcollate, collctype)" + "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', true, %d, 'C', 'C');\n\n", BOOTSTRAP_SUPERUSERID, COLLPROVIDER_LIBC, PG_UTF8); /* Now import all collations we can find in the operating system */ diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 0e129f9654..9ff35fd31e 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -13259,6 +13259,7 @@ dumpCollation(Archive *fout, CollInfo *collinfo) char *qcollname; PGresult *res; int i_collprovider; + int i_collisdeterministic; int i_collcollate; int i_collctype; const char *collprovider; @@ -13276,28 +13277,35 @@ dumpCollation(Archive *fout, CollInfo *collinfo) qcollname = pg_strdup(fmtId(collinfo->dobj.name)); /* Get collation-specific details */ + appendPQExpBuffer(query, "SELECT "); + if (fout->remoteVersion >= 100000) - appendPQExpBuffer(query, "SELECT " + appendPQExpBuffer(query, "collprovider, " - "collcollate, " - "collctype, " - "collversion " - "FROM pg_catalog.pg_collation c " - "WHERE c.oid = '%u'::pg_catalog.oid", - collinfo->dobj.catId.oid); + "collversion, "); else - appendPQExpBuffer(query, "SELECT " + appendPQExpBuffer(query, "'c' AS collprovider, " - "collcollate, " - "collctype, " - "NULL AS collversion " - "FROM pg_catalog.pg_collation c " - "WHERE c.oid = '%u'::pg_catalog.oid", - collinfo->dobj.catId.oid); + "NULL AS collversion, "); + + if (fout->remoteVersion >= 120000) + appendPQExpBuffer(query, + "collisdeterministic, "); + else + appendPQExpBuffer(query, + "true AS collisdeterministic, "); + + appendPQExpBuffer(query, + "collcollate, " + "collctype " + "FROM pg_catalog.pg_collation c " + "WHERE c.oid = '%u'::pg_catalog.oid", + collinfo->dobj.catId.oid); res = ExecuteSqlQueryForSingleRow(fout, query->data); i_collprovider = PQfnumber(res, "collprovider"); + i_collisdeterministic = PQfnumber(res, "collisdeterministic"); i_collcollate = PQfnumber(res, "collcollate"); i_collctype = PQfnumber(res, "collctype"); @@ -13324,6 +13332,9 @@ dumpCollation(Archive *fout, CollInfo *collinfo) "unrecognized collation provider: %s\n", collprovider); + if (strcmp(PQgetvalue(res, 0, i_collisdeterministic), "f") == 0) + appendPQExpBufferStr(q, ", deterministic = false"); + if (strcmp(collcollate, collctype) == 0) { appendPQExpBufferStr(q, ", locale = "); diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 4da6719ce7..b43300c262 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -4092,7 +4092,7 @@ listCollations(const char *pattern, bool verbose, bool showSystem) PQExpBufferData buf; PGresult *res; printQueryOpt myopt = pset.popt; - static const bool translate_columns[] = {false, false, false, false, false, false}; + static const bool translate_columns[] = {false, false, false, false, false, true, false}; if (pset.sversion < 90100) { @@ -4120,6 +4120,21 @@ listCollations(const char *pattern, bool verbose, bool showSystem) appendPQExpBuffer(&buf, ",\n CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\"", gettext_noop("Provider")); + else + appendPQExpBuffer(&buf, + ",\n 'libc' AS \"%s\"", + gettext_noop("Provider")); + + if (pset.sversion >= 120000) + appendPQExpBuffer(&buf, + ",\n CASE WHEN c.collisdeterministic THEN '%s' ELSE '%s' END AS \"%s\"", + gettext_noop("yes"), gettext_noop("no"), + gettext_noop("Deterministic?")); + else + appendPQExpBuffer(&buf, + ",\n '%s' AS \"%s\"", + gettext_noop("yes"), + gettext_noop("Deterministic?")); if (verbose) appendPQExpBuffer(&buf, diff --git a/src/include/catalog/pg_collation.h b/src/include/catalog/pg_collation.h index 10fe711a91..4d2fcb3858 100644 --- a/src/include/catalog/pg_collation.h +++ b/src/include/catalog/pg_collation.h @@ -33,6 +33,7 @@ CATALOG(pg_collation,3456,CollationRelationId) Oid collnamespace; /* OID of namespace containing collation */ Oid collowner; /* owner of collation */ char collprovider; /* see constants below */ + bool collisdeterministic BKI_DEFAULT(t); int32 collencoding; /* encoding for this collation; -1 = "all" */ NameData collcollate; /* LC_COLLATE setting */ NameData collctype; /* LC_CTYPE setting */ @@ -61,6 +62,7 @@ typedef FormData_pg_collation *Form_pg_collation; extern Oid CollationCreate(const char *collname, Oid collnamespace, Oid collowner, char collprovider, + bool collisdeterministic, int32 collencoding, const char *collcollate, const char *collctype, const char *collversion, diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 3831cceedf..76cc5caa61 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -111,6 +111,7 @@ extern ExprState *execTuplesMatchPrepare(TupleDesc desc, int numCols, const AttrNumber *keyColIdx, const Oid *eqOperators, + const Oid *collations, PlanState *parent); extern void execTuplesHashPrepare(int numCols, const Oid *eqOperators, @@ -121,6 +122,7 @@ extern TupleHashTable BuildTupleHashTable(PlanState *parent, int numCols, AttrNumber *keyColIdx, const Oid *eqfuncoids, FmgrInfo *hashfunctions, + Oid *collations, long nbuckets, Size additionalsize, MemoryContext tablecxt, MemoryContext tempcxt, bool use_variable_hash_iv); @@ -247,6 +249,7 @@ extern ExprState *ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, int numCols, const AttrNumber *keyColIdx, const Oid *eqfunctions, + const Oid *collations, PlanState *parent); extern ProjectionInfo *ExecBuildProjectionInfo(List *targetList, ExprContext *econtext, diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h index e7bf158c1b..2c94b926d3 100644 --- a/src/include/executor/hashjoin.h +++ b/src/include/executor/hashjoin.h @@ -337,6 +337,7 @@ typedef struct HashJoinTableData FmgrInfo *outer_hashfunctions; /* lookup data for hash functions */ FmgrInfo *inner_hashfunctions; /* lookup data for hash functions */ bool *hashStrict; /* is each hash join operator strict? */ + Oid *collations; Size spaceUsed; /* memory space currently used by tuples */ Size spaceAllowed; /* upper limit for space used */ diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h index 1309b32b90..1233766023 100644 --- a/src/include/executor/nodeHash.h +++ b/src/include/executor/nodeHash.h @@ -24,7 +24,7 @@ extern Node *MultiExecHash(HashState *node); extern void ExecEndHash(HashState *node); extern void ExecReScanHash(HashState *node); -extern HashJoinTable ExecHashTableCreate(HashState *state, List *hashOperators, +extern HashJoinTable ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, bool keepNulls); extern void ExecParallelHashTableAlloc(HashJoinTable hashtable, int batchno); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 7cae085177..9c512bddf9 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -688,6 +688,7 @@ typedef struct TupleHashTableData AttrNumber *keyColIdx; /* attr numbers of key columns */ FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */ ExprState *tab_eq_func; /* comparator for table datatype(s) */ + Oid *tab_collations; /* collations for hash and comparison */ MemoryContext tablecxt; /* memory context containing table */ MemoryContext tempcxt; /* context for function evaluations */ Size entrysize; /* actual size to make each hash entry */ @@ -857,6 +858,7 @@ typedef struct SubPlanState AttrNumber *keyColIdx; /* control data for hash tables */ Oid *tab_eq_funcoids; /* equality func oids for table * datatype(s) */ + Oid *tab_collations; /* collations for hash and comparison */ FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */ FmgrInfo *tab_eq_funcs; /* equality functions for table datatype(s) */ FmgrInfo *lhs_hash_funcs; /* hash functions for lefthand datatype(s) */ @@ -1869,6 +1871,7 @@ typedef struct HashJoinState List *hj_OuterHashKeys; /* list of ExprState nodes */ List *hj_InnerHashKeys; /* list of ExprState nodes */ List *hj_HashOperators; /* list of operator OIDs */ + List *hj_Collations; HashJoinTable hj_HashTable; uint32 hj_CurHashValue; int hj_CurBucketNo; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 6d087c268f..840ba416a0 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -297,6 +297,7 @@ typedef struct RecursiveUnion * duplicate-ness */ AttrNumber *dupColIdx; /* their indexes in the target list */ Oid *dupOperators; /* equality operators to compare with */ + Oid *dupCollations; long numGroups; /* estimated number of groups in input */ } RecursiveUnion; @@ -773,6 +774,7 @@ typedef struct Group int numCols; /* number of grouping columns */ AttrNumber *grpColIdx; /* their indexes in the target list */ Oid *grpOperators; /* equality operators to compare with */ + Oid *grpCollations; } Group; /* --------------- @@ -797,6 +799,7 @@ typedef struct Agg int numCols; /* number of grouping columns */ AttrNumber *grpColIdx; /* their indexes in the target list */ Oid *grpOperators; /* equality operators to compare with */ + Oid *grpCollations; long numGroups; /* estimated number of groups in input */ Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */ /* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */ @@ -815,9 +818,11 @@ typedef struct WindowAgg int partNumCols; /* number of columns in partition clause */ AttrNumber *partColIdx; /* their indexes in the target list */ Oid *partOperators; /* equality operators for partition columns */ + Oid *partCollations; /* collations for partition columns */ int ordNumCols; /* number of columns in ordering clause */ AttrNumber *ordColIdx; /* their indexes in the target list */ Oid *ordOperators; /* equality operators for ordering columns */ + Oid *ordCollations; /* collations for ordering columns */ int frameOptions; /* frame_clause options, see WindowDef */ Node *startOffset; /* expression for starting bound, if any */ Node *endOffset; /* expression for ending bound, if any */ @@ -839,6 +844,7 @@ typedef struct Unique int numCols; /* number of columns to check for uniqueness */ AttrNumber *uniqColIdx; /* their indexes in the target list */ Oid *uniqOperators; /* equality operators to compare with */ + Oid *uniqCollations; /* collations for equality comparisons */ } Unique; /* ------------ @@ -913,6 +919,7 @@ typedef struct SetOp * duplicate-ness */ AttrNumber *dupColIdx; /* their indexes in the target list */ Oid *dupOperators; /* equality operators to compare with */ + Oid *dupCollations; AttrNumber flagColIdx; /* where is the flag column, if any */ int firstFlag; /* flag value for first input relation */ long numGroups; /* estimated number of groups in input */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index bec0c38617..8e8f570e00 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -63,7 +63,7 @@ extern bool is_projection_capable_plan(Plan *plan); extern Sort *make_sort_from_sortclauses(List *sortcls, Plan *lefttree); extern Agg *make_agg(List *tlist, List *qual, AggStrategy aggstrategy, AggSplit aggsplit, - int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, + int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, List *groupingSets, List *chain, double dNumGroups, Plan *lefttree); extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount); diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h index 8b967f9583..70f8454a44 100644 --- a/src/include/optimizer/tlist.h +++ b/src/include/optimizer/tlist.h @@ -48,6 +48,7 @@ extern SortGroupClause *get_sortgroupref_clause_noerr(Index sortref, List *clauses); extern Oid *extract_grouping_ops(List *groupClause); +extern Oid *extract_grouping_collations(List *groupClause, List *tlist); extern AttrNumber *extract_grouping_cols(List *groupClause, List *tlist); extern bool grouping_is_sortable(List *groupClause); extern bool grouping_is_hashable(List *groupClause); diff --git a/src/include/partitioning/partbounds.h b/src/include/partitioning/partbounds.h index b1ae39ad63..683e1574ea 100644 --- a/src/include/partitioning/partbounds.h +++ b/src/include/partitioning/partbounds.h @@ -77,6 +77,7 @@ typedef struct PartitionBoundInfoData extern int get_hash_partition_greatest_modulus(PartitionBoundInfo b); extern uint64 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, + Oid *partcollation, Datum *values, bool *isnull); extern List *get_qual_from_partbound(Relation rel, Relation parent, PartitionBoundSpec *spec); diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index ceec85db92..f622b9e512 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -90,6 +90,7 @@ extern Oid get_atttype(Oid relid, AttrNumber attnum); extern void get_atttypetypmodcoll(Oid relid, AttrNumber attnum, Oid *typid, int32 *typmod, Oid *collid); extern char *get_collation_name(Oid colloid); +extern bool get_collation_isdeterministic(Oid colloid); extern char *get_constraint_name(Oid conoid); extern char *get_language_name(Oid langoid, bool missing_ok); extern Oid get_opclass_family(Oid opclass); diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 606952afd7..a342a62549 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -82,6 +82,7 @@ extern void cache_locale_time(void); struct pg_locale_struct { char provider; + bool deterministic; union { #ifdef HAVE_LOCALE_T diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index f485b5c330..02699ae102 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -1100,27 +1100,330 @@ select textrange_en_us('A','Z') @> 'b'::text; drop type textrange_c; drop type textrange_en_us; +-- nondeterministic collations +CREATE COLLATION ctest_det (provider = icu, locale = 'und', deterministic = true); +CREATE COLLATION ctest_nondet (provider = icu, locale = 'und', deterministic = false); +CREATE TABLE test6 (a int, b text); +-- same string in different normal forms +INSERT INTO test6 VALUES (1, U&'\00E4bc'); +INSERT INTO test6 VALUES (2, U&'\0061\0308bc'); +SELECT * FROM test6; + a | b +---+----- + 1 | äbc + 2 | äbc +(2 rows) + +SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_det; + a | b +---+----- + 1 | äbc +(1 row) + +SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_nondet; + a | b +---+----- + 1 | äbc + 2 | äbc +(2 rows) + +CREATE COLLATION case_sensitive (provider = icu, locale = 'und'); +CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false); +SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive; + ?column? | ?column? +----------+---------- + t | f +(1 row) + +SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive; + ?column? | ?column? +----------+---------- + t | t +(1 row) + +CREATE TABLE test1cs (x text COLLATE case_sensitive); +CREATE TABLE test2cs (x text COLLATE case_sensitive); +CREATE TABLE test3cs (x text COLLATE case_sensitive); +INSERT INTO test1cs VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2cs VALUES ('ABC'), ('ghi'); +INSERT INTO test3cs VALUES ('abc'), ('ABC'), ('def'), ('ghi'); +SELECT x FROM test1cs UNION SELECT x FROM test2cs; + x +----- + ABC + abc + def + ghi +(4 rows) + +SELECT x FROM test2cs UNION SELECT x FROM test1cs; + x +----- + ABC + abc + def + ghi +(4 rows) + +SELECT x FROM test1cs INTERSECT SELECT x FROM test2cs; + x +----- + ghi +(1 row) + +SELECT x FROM test2cs INTERSECT SELECT x FROM test1cs; + x +----- + ghi +(1 row) + +SELECT x FROM test1cs EXCEPT SELECT x FROM test2cs; + x +----- + abc + def +(2 rows) + +SELECT x FROM test2cs EXCEPT SELECT x FROM test1cs; + x +----- + ABC +(1 row) + +SELECT x, count(*) FROM test3cs GROUP BY x ORDER BY x; + x | count +-----+------- + abc | 1 + ABC | 1 + def | 1 + ghi | 1 +(4 rows) + +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3cs ORDER BY x; + x | row_number | rank +-----+------------+------ + abc | 1 | 1 + ABC | 2 | 2 + def | 3 | 3 + ghi | 4 | 4 +(4 rows) + +CREATE UNIQUE INDEX ON test1cs (x); -- ok +INSERT INTO test1cs VALUES ('ABC'); -- ok +CREATE UNIQUE INDEX ON test3cs (x); -- ok +CREATE TABLE test1ci (x text COLLATE case_insensitive); +CREATE TABLE test2ci (x text COLLATE case_insensitive); +CREATE TABLE test3ci (x text COLLATE case_insensitive); +INSERT INTO test1ci VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2ci VALUES ('ABC'), ('ghi'); +INSERT INTO test3ci VALUES ('abc'), ('ABC'), ('def'), ('ghi'); +SELECT x FROM test1ci UNION SELECT x FROM test2ci; + x +----- + abc + ghi + def +(3 rows) + +SELECT x FROM test2ci UNION SELECT x FROM test1ci; + x +----- + ABC + ghi + def +(3 rows) + +SELECT x FROM test1ci INTERSECT SELECT x FROM test2ci; + x +----- + ghi + abc +(2 rows) + +SELECT x FROM test2ci INTERSECT SELECT x FROM test1ci; + x +----- + ghi + ABC +(2 rows) + +SELECT x FROM test1ci EXCEPT SELECT x FROM test2ci; + x +----- + def +(1 row) + +SELECT x FROM test2ci EXCEPT SELECT x FROM test1ci; + x +--- +(0 rows) + +SELECT x, count(*) FROM test3ci GROUP BY x ORDER BY x; + x | count +-----+------- + abc | 2 + def | 1 + ghi | 1 +(3 rows) + +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3ci ORDER BY x; + x | row_number | rank +-----+------------+------ + abc | 1 | 1 + ABC | 2 | 1 + def | 3 | 3 + ghi | 4 | 4 +(4 rows) + +CREATE UNIQUE INDEX ON test1ci (x); -- ok +INSERT INTO test1ci VALUES ('ABC'); -- error +ERROR: duplicate key value violates unique constraint "test1ci_x_idx" +DETAIL: Key (x)=(ABC) already exists. +CREATE UNIQUE INDEX ON test3ci (x); -- error +ERROR: could not create unique index "test3ci_x_idx" +DETAIL: Key (x)=(abc) is duplicated. +CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-true', deterministic = false); +CREATE TABLE test4 (a int, b text); +INSERT INTO test4 VALUES (1, 'cote'), (2, 'côte'), (3, 'coté'), (4, 'côté'); +SELECT * FROM test4 WHERE b = 'cote'; + a | b +---+------ + 1 | cote +(1 row) + +SELECT * FROM test4 WHERE b = 'cote' COLLATE ignore_accents; + a | b +---+------ + 1 | cote + 2 | côte + 3 | coté + 4 | côté +(4 rows) + +SELECT * FROM test4 WHERE b = 'Cote' COLLATE ignore_accents; -- still case-sensitive + a | b +---+--- +(0 rows) + +SELECT * FROM test4 WHERE b = 'Cote' COLLATE case_insensitive; + a | b +---+------ + 1 | cote +(1 row) + +-- foreign keys (should use collation of primary key) +-- PK is case-sensitive, FK is case-insensitive +CREATE TABLE test10pk (x text COLLATE case_sensitive PRIMARY KEY); +INSERT INTO test10pk VALUES ('abc'), ('def'), ('ghi'); +CREATE TABLE test10fk (x text COLLATE case_insensitive REFERENCES test10pk (x) ON DELETE CASCADE); +INSERT INTO test10fk VALUES ('abc'); -- ok +INSERT INTO test10fk VALUES ('ABC'); -- error +ERROR: insert or update on table "test10fk" violates foreign key constraint "test10fk_x_fkey" +DETAIL: Key (x)=(ABC) is not present in table "test10pk". +INSERT INTO test10fk VALUES ('xyz'); -- error +ERROR: insert or update on table "test10fk" violates foreign key constraint "test10fk_x_fkey" +DETAIL: Key (x)=(xyz) is not present in table "test10pk". +SELECT * FROM test10pk; + x +----- + abc + def + ghi +(3 rows) + +SELECT * FROM test10fk; + x +----- + abc +(1 row) + +DELETE FROM test10pk WHERE x = 'abc'; +SELECT * FROM test10pk; + x +----- + def + ghi +(2 rows) + +SELECT * FROM test10fk; + x +--- +(0 rows) + +-- PK is case-insensitive, FK is case-sensitive +CREATE TABLE test11pk (x text COLLATE case_insensitive PRIMARY KEY); +INSERT INTO test11pk VALUES ('abc'), ('def'), ('ghi'); +CREATE TABLE test11fk (x text COLLATE case_sensitive REFERENCES test11pk (x) ON DELETE CASCADE); +INSERT INTO test11fk VALUES ('abc'); -- ok +INSERT INTO test11fk VALUES ('ABC'); -- ok +INSERT INTO test11fk VALUES ('xyz'); -- error +ERROR: insert or update on table "test11fk" violates foreign key constraint "test11fk_x_fkey" +DETAIL: Key (x)=(xyz) is not present in table "test11pk". +SELECT * FROM test11pk; + x +----- + abc + def + ghi +(3 rows) + +SELECT * FROM test11fk; + x +----- + abc + ABC +(2 rows) + +DELETE FROM test11pk WHERE x = 'abc'; +SELECT * FROM test11pk; + x +----- + def + ghi +(2 rows) + +SELECT * FROM test11fk; + x +--- +(0 rows) + +-- partitioning +CREATE TABLE test20 (a int, b text COLLATE case_insensitive) PARTITION BY LIST (b); +CREATE TABLE test20_1 PARTITION OF test20 FOR VALUES IN ('abc'); +INSERT INTO test20 VALUES (1, 'abc'); +INSERT INTO test20 VALUES (2, 'ABC'); +SELECT * FROM test20_1; + a | b +---+----- + 1 | abc + 2 | ABC +(2 rows) + +CREATE TABLE test21 (a int, b text COLLATE case_insensitive) PARTITION BY RANGE (b); +CREATE TABLE test21_1 PARTITION OF test21 FOR VALUES FROM ('ABC') TO ('DEF'); +INSERT INTO test21 VALUES (1, 'abc'); +INSERT INTO test21 VALUES (2, 'ABC'); +SELECT * FROM test21_1; + a | b +---+----- + 1 | abc + 2 | ABC +(2 rows) + +CREATE TABLE test22 (a int, b text COLLATE case_insensitive) PARTITION BY HASH (b); +CREATE TABLE test22_1 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +INSERT INTO test22 VALUES (1, 'abc'); +INSERT INTO test22 VALUES (2, 'ABC'); +SELECT * FROM test22_1; + a | b +---+----- + 1 | abc + 2 | ABC +(2 rows) + -- cleanup +SET client_min_messages TO 'warning'; DROP SCHEMA collate_tests CASCADE; -NOTICE: drop cascades to 18 other objects -DETAIL: drop cascades to table collate_test1 -drop cascades to table collate_test_like -drop cascades to table collate_test2 -drop cascades to table collate_test3 -drop cascades to type testdomain_sv -drop cascades to table collate_test4 -drop cascades to table collate_test5 -drop cascades to table collate_test10 -drop cascades to table collate_test6 -drop cascades to view collview1 -drop cascades to view collview2 -drop cascades to view collview3 -drop cascades to type testdomain -drop cascades to function mylt(text,text) -drop cascades to function mylt_noninline(text,text) -drop cascades to function mylt_plpgsql(text,text) -drop cascades to function mylt2(text,text) -drop cascades to function dup(anyelement) RESET search_path; -- leave a collation for pg_upgrade test CREATE COLLATION coll_icu_upgrade FROM "und-x-icu"; diff --git a/src/test/regress/expected/collate.linux.utf8.out b/src/test/regress/expected/collate.linux.utf8.out index 400a747cdc..9c5b8abef8 100644 --- a/src/test/regress/expected/collate.linux.utf8.out +++ b/src/test/regress/expected/collate.linux.utf8.out @@ -1117,24 +1117,11 @@ select textrange_en_us('A','Z') @> 'b'::text; drop type textrange_c; drop type textrange_en_us; +-- nondeterministic collations +-- (not supported with libc provider) +CREATE COLLATION ctest_det (locale = 'en_US.utf8', deterministic = true); +ERROR: nondeterministic collations not supported with this provider +CREATE COLLATION ctest_nondet (locale = 'en_US.utf8', deterministic = false); -- cleanup +SET client_min_messages TO 'warning'; DROP SCHEMA collate_tests CASCADE; -NOTICE: drop cascades to 18 other objects -DETAIL: drop cascades to table collate_test1 -drop cascades to table collate_test_like -drop cascades to table collate_test2 -drop cascades to table collate_test3 -drop cascades to type testdomain_sv -drop cascades to table collate_test4 -drop cascades to table collate_test5 -drop cascades to table collate_test10 -drop cascades to table collate_test6 -drop cascades to view collview1 -drop cascades to view collview2 -drop cascades to view collview3 -drop cascades to type testdomain -drop cascades to function mylt(text,text) -drop cascades to function mylt_noninline(text,text) -drop cascades to function mylt_plpgsql(text,text) -drop cascades to function mylt2(text,text) -drop cascades to function dup(anyelement) diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index ef39445b30..c0a5cf776d 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -425,7 +425,122 @@ CREATE INDEX collate_dep_test4i ON collate_dep_test4t (b COLLATE test0); drop type textrange_en_us; +-- nondeterministic collations + +CREATE COLLATION ctest_det (provider = icu, locale = 'und', deterministic = true); +CREATE COLLATION ctest_nondet (provider = icu, locale = 'und', deterministic = false); + +CREATE TABLE test6 (a int, b text); +-- same string in different normal forms +INSERT INTO test6 VALUES (1, U&'\00E4bc'); +INSERT INTO test6 VALUES (2, U&'\0061\0308bc'); +SELECT * FROM test6; +SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_det; +SELECT * FROM test6 WHERE b = 'äbc' COLLATE ctest_nondet; + +CREATE COLLATION case_sensitive (provider = icu, locale = 'und'); +CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false); + +SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive; +SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive; + +CREATE TABLE test1cs (x text COLLATE case_sensitive); +CREATE TABLE test2cs (x text COLLATE case_sensitive); +CREATE TABLE test3cs (x text COLLATE case_sensitive); +INSERT INTO test1cs VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2cs VALUES ('ABC'), ('ghi'); +INSERT INTO test3cs VALUES ('abc'), ('ABC'), ('def'), ('ghi'); + +SELECT x FROM test1cs UNION SELECT x FROM test2cs; +SELECT x FROM test2cs UNION SELECT x FROM test1cs; +SELECT x FROM test1cs INTERSECT SELECT x FROM test2cs; +SELECT x FROM test2cs INTERSECT SELECT x FROM test1cs; +SELECT x FROM test1cs EXCEPT SELECT x FROM test2cs; +SELECT x FROM test2cs EXCEPT SELECT x FROM test1cs; +SELECT x, count(*) FROM test3cs GROUP BY x ORDER BY x; +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3cs ORDER BY x; +CREATE UNIQUE INDEX ON test1cs (x); -- ok +INSERT INTO test1cs VALUES ('ABC'); -- ok +CREATE UNIQUE INDEX ON test3cs (x); -- ok + +CREATE TABLE test1ci (x text COLLATE case_insensitive); +CREATE TABLE test2ci (x text COLLATE case_insensitive); +CREATE TABLE test3ci (x text COLLATE case_insensitive); +INSERT INTO test1ci VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2ci VALUES ('ABC'), ('ghi'); +INSERT INTO test3ci VALUES ('abc'), ('ABC'), ('def'), ('ghi'); + +SELECT x FROM test1ci UNION SELECT x FROM test2ci; +SELECT x FROM test2ci UNION SELECT x FROM test1ci; +SELECT x FROM test1ci INTERSECT SELECT x FROM test2ci; +SELECT x FROM test2ci INTERSECT SELECT x FROM test1ci; +SELECT x FROM test1ci EXCEPT SELECT x FROM test2ci; +SELECT x FROM test2ci EXCEPT SELECT x FROM test1ci; +SELECT x, count(*) FROM test3ci GROUP BY x ORDER BY x; +SELECT x, row_number() OVER (ORDER BY x), rank() OVER (ORDER BY x) FROM test3ci ORDER BY x; +CREATE UNIQUE INDEX ON test1ci (x); -- ok +INSERT INTO test1ci VALUES ('ABC'); -- error +CREATE UNIQUE INDEX ON test3ci (x); -- error + +CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-true', deterministic = false); + +CREATE TABLE test4 (a int, b text); +INSERT INTO test4 VALUES (1, 'cote'), (2, 'côte'), (3, 'coté'), (4, 'côté'); +SELECT * FROM test4 WHERE b = 'cote'; +SELECT * FROM test4 WHERE b = 'cote' COLLATE ignore_accents; +SELECT * FROM test4 WHERE b = 'Cote' COLLATE ignore_accents; -- still case-sensitive +SELECT * FROM test4 WHERE b = 'Cote' COLLATE case_insensitive; + +-- foreign keys (should use collation of primary key) + +-- PK is case-sensitive, FK is case-insensitive +CREATE TABLE test10pk (x text COLLATE case_sensitive PRIMARY KEY); +INSERT INTO test10pk VALUES ('abc'), ('def'), ('ghi'); +CREATE TABLE test10fk (x text COLLATE case_insensitive REFERENCES test10pk (x) ON DELETE CASCADE); +INSERT INTO test10fk VALUES ('abc'); -- ok +INSERT INTO test10fk VALUES ('ABC'); -- error +INSERT INTO test10fk VALUES ('xyz'); -- error +SELECT * FROM test10pk; +SELECT * FROM test10fk; +DELETE FROM test10pk WHERE x = 'abc'; +SELECT * FROM test10pk; +SELECT * FROM test10fk; + +-- PK is case-insensitive, FK is case-sensitive +CREATE TABLE test11pk (x text COLLATE case_insensitive PRIMARY KEY); +INSERT INTO test11pk VALUES ('abc'), ('def'), ('ghi'); +CREATE TABLE test11fk (x text COLLATE case_sensitive REFERENCES test11pk (x) ON DELETE CASCADE); +INSERT INTO test11fk VALUES ('abc'); -- ok +INSERT INTO test11fk VALUES ('ABC'); -- ok +INSERT INTO test11fk VALUES ('xyz'); -- error +SELECT * FROM test11pk; +SELECT * FROM test11fk; +DELETE FROM test11pk WHERE x = 'abc'; +SELECT * FROM test11pk; +SELECT * FROM test11fk; + +-- partitioning +CREATE TABLE test20 (a int, b text COLLATE case_insensitive) PARTITION BY LIST (b); +CREATE TABLE test20_1 PARTITION OF test20 FOR VALUES IN ('abc'); +INSERT INTO test20 VALUES (1, 'abc'); +INSERT INTO test20 VALUES (2, 'ABC'); +SELECT * FROM test20_1; + +CREATE TABLE test21 (a int, b text COLLATE case_insensitive) PARTITION BY RANGE (b); +CREATE TABLE test21_1 PARTITION OF test21 FOR VALUES FROM ('ABC') TO ('DEF'); +INSERT INTO test21 VALUES (1, 'abc'); +INSERT INTO test21 VALUES (2, 'ABC'); +SELECT * FROM test21_1; + +CREATE TABLE test22 (a int, b text COLLATE case_insensitive) PARTITION BY HASH (b); +CREATE TABLE test22_1 PARTITION OF test22 FOR VALUES WITH (MODULUS 2, REMAINDER 0); +INSERT INTO test22 VALUES (1, 'abc'); +INSERT INTO test22 VALUES (2, 'ABC'); +SELECT * FROM test22_1; + + -- cleanup +SET client_min_messages TO 'warning'; DROP SCHEMA collate_tests CASCADE; RESET search_path; diff --git a/src/test/regress/sql/collate.linux.utf8.sql b/src/test/regress/sql/collate.linux.utf8.sql index b51162e3a1..3a3ece9c62 100644 --- a/src/test/regress/sql/collate.linux.utf8.sql +++ b/src/test/regress/sql/collate.linux.utf8.sql @@ -428,5 +428,13 @@ CREATE INDEX collate_dep_test4i ON collate_dep_test4t (b COLLATE test0); drop type textrange_en_us; +-- nondeterministic collations +-- (not supported with libc provider) + +CREATE COLLATION ctest_det (locale = 'en_US.utf8', deterministic = true); +CREATE COLLATION ctest_nondet (locale = 'en_US.utf8', deterministic = false); + + -- cleanup +SET client_min_messages TO 'warning'; DROP SCHEMA collate_tests CASCADE; base-commit: 0944ec54de389b4b8a471ca1f40f1b9d81de1f30 -- 2.20.1