From d63df8170ec8e9afedcb894f87f73a409aa27e6d Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Tue, 18 Dec 2018 22:17:53 +0100 Subject: [PATCH v1] Insensitive collations This adds a flag "insensitive" to collations. Such a collation disables various optimizations that assume that strings are equal only if they are byte-wise equal. That then allows use cases such as case-insensitive or accent-insensitive comparisons or handling of strings with different Unicode normal forms. --- contrib/bloom/bloom.h | 1 + contrib/bloom/blutils.c | 3 +- doc/src/sgml/catalogs.sgml | 7 + doc/src/sgml/charset.sgml | 11 +- doc/src/sgml/ref/create_collation.sgml | 18 +++ src/backend/access/hash/hashfunc.c | 45 ++++++ src/backend/catalog/pg_collation.c | 2 + src/backend/commands/collationcmds.c | 15 +- src/backend/executor/execExpr.c | 4 +- src/backend/executor/execGrouping.c | 12 +- src/backend/executor/execPartition.c | 1 + src/backend/executor/nodeAgg.c | 4 + src/backend/executor/nodeGroup.c | 1 + src/backend/executor/nodeHash.c | 14 +- src/backend/executor/nodeHashjoin.c | 5 + src/backend/executor/nodeRecursiveunion.c | 1 + src/backend/executor/nodeSetOp.c | 2 + src/backend/executor/nodeSubplan.c | 8 + src/backend/executor/nodeUnique.c | 1 + src/backend/executor/nodeWindowAgg.c | 2 + src/backend/nodes/copyfuncs.c | 7 + src/backend/nodes/outfuncs.c | 29 ++++ src/backend/nodes/readfuncs.c | 7 + src/backend/optimizer/plan/createplan.c | 54 ++++++- src/backend/optimizer/util/tlist.c | 25 ++++ src/backend/partitioning/partbounds.c | 4 +- src/backend/partitioning/partprune.c | 3 +- src/backend/utils/adt/arrayfuncs.c | 2 +- src/backend/utils/adt/orderedsetaggs.c | 1 + src/backend/utils/adt/pg_locale.c | 1 + src/backend/utils/adt/varchar.c | 14 ++ src/backend/utils/adt/varlena.c | 45 +++++- src/backend/utils/cache/catcache.c | 2 +- src/bin/initdb/initdb.c | 4 +- src/include/catalog/pg_collation.h | 2 + src/include/executor/executor.h | 3 + src/include/executor/hashjoin.h | 1 + src/include/executor/nodeHash.h | 2 +- src/include/nodes/execnodes.h | 3 + src/include/nodes/plannodes.h | 7 + src/include/optimizer/planmain.h | 2 +- src/include/optimizer/tlist.h | 1 + src/include/partitioning/partbounds.h | 1 + src/include/utils/pg_locale.h | 1 + .../regress/expected/collate.icu.utf8.out | 138 +++++++++++++++++- src/test/regress/sql/collate.icu.utf8.sql | 39 +++++ 46 files changed, 516 insertions(+), 39 deletions(-) diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h index 3973ac75e8..0b05f9796d 100644 --- a/contrib/bloom/bloom.h +++ b/contrib/bloom/bloom.h @@ -137,6 +137,7 @@ typedef struct BloomMetaPageData typedef struct BloomState { FmgrInfo hashFn[INDEX_MAX_KEYS]; + Oid collations[INDEX_MAX_KEYS]; BloomOptions opts; /* copy of options on index's metapage */ int32 nColumns; diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c index 6b2b9e3742..7bd0d0aa2f 100644 --- a/contrib/bloom/blutils.c +++ b/contrib/bloom/blutils.c @@ -163,6 +163,7 @@ initBloomState(BloomState *state, Relation index) fmgr_info_copy(&(state->hashFn[i]), index_getprocinfo(index, i + 1, BLOOM_HASH_PROC), CurrentMemoryContext); + state->collations[i] = index->rd_indcollation[i]; } /* Initialize amcache if needed with options from metapage */ @@ -267,7 +268,7 @@ signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno) * different columns will be mapped into different bits because of step * above */ - hashVal = DatumGetInt32(FunctionCall1(&state->hashFn[attno], value)); + hashVal = DatumGetInt32(FunctionCall1Coll(&state->hashFn[attno], state->collations[attno], value)); mySrand(hashVal ^ myRand()); for (j = 0; j < state->opts.bitSize[attno]; j++) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 8d0cab5da6..0959c09ea8 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -2077,6 +2077,13 @@ <structname>pg_collation</structname> Columns default, c = libc, i = icu + + collisinsensitive + bool + + Is the collation insensitive? + + collencoding int4 diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index a6143ef8a7..6108af7e55 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -847,11 +847,12 @@ ICU collations Note that while this system allows creating collations that ignore - case or ignore accents or similar (using - the ks key), PostgreSQL does not at the moment allow - such collations to act in a truly case- or accent-insensitive manner. Any - strings that compare equal according to the collation but are not - byte-wise equal will be sorted according to their byte values. + case or ignore accents or similar (using the + ks key), in order for such such collations to act in a + truly case- or accent-insensitive manner, they also need to be declared as + INSENSITIVE in CREATE COLLATION. + Otherwise, any strings that compare equal according to the collation but + are not byte-wise equal will be sorted according to their byte values. diff --git a/doc/src/sgml/ref/create_collation.sgml b/doc/src/sgml/ref/create_collation.sgml index 038797fce1..4ba597193a 100644 --- a/doc/src/sgml/ref/create_collation.sgml +++ b/doc/src/sgml/ref/create_collation.sgml @@ -23,6 +23,7 @@ [ LC_COLLATE = lc_collate, ] [ LC_CTYPE = lc_ctype, ] [ PROVIDER = provider, ] + [ INSENSITIVE, ] [ VERSION = version ] ) CREATE COLLATION [ IF NOT EXISTS ] name FROM existing_collation @@ -124,6 +125,23 @@ Parameters + + INSENSITIVE + + + + Makes the collation insensitive. An insensitive collation allows + strings that are not byte-wise equal to be considered logically equal + nonetheless. Otherwise, PostgreSQL breaks ties using a byte-wise + comparison. Note that declaring a collation insensitive does not by + itself make the collation actually act, say, case- or + accent-insensitive. You need to choose an appropriate + LC_COLLATE setting and declare + it insensitive here. + + + + version diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index a0060a633d..6e15cc5c4f 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -27,7 +27,9 @@ #include "postgres.h" #include "access/hash.h" +#include "catalog/pg_collation.h" #include "utils/builtins.h" +#include "utils/pg_locale.h" /* * Datatype-specific hash functions. @@ -242,8 +244,44 @@ Datum hashtext(PG_FUNCTION_ARGS) { text *key = PG_GETARG_TEXT_PP(0); + Oid collid = PG_GET_COLLATION(); Datum result; + if (!collid) + elog(ERROR, "FIXME: hashtext() called without collation"); + + if (collid != DEFAULT_COLLATION_OID) + { + pg_locale_t mylocale = pg_newlocale_from_collation(collid); + + if (mylocale->insensitive) + { + if (mylocale->provider == COLLPROVIDER_ICU) + { + int32_t ulen = -1; + UChar *uchar = NULL; + Size bsize; + uint8_t *buf; + + ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); + + bsize = ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, NULL, 0); + buf = palloc(bsize); + ucol_getSortKey(mylocale->info.icu.ucol, + uchar, ulen, buf, bsize); + + result = hash_any(buf, bsize); + + PG_FREE_IF_COPY(key, 0); + + return result; + } + else + elog(ERROR, "not supported yet"); + } + } + /* * Note: this is currently identical in behavior to hashvarlena, but keep * it as a separate function in case we someday want to do something @@ -262,8 +300,15 @@ Datum hashtextextended(PG_FUNCTION_ARGS) { text *key = PG_GETARG_TEXT_PP(0); + Oid collid = PG_GET_COLLATION(); Datum result; + if (!collid) + elog(ERROR, "FIXME: hashtextextended() called without collation"); + + if (collid != DEFAULT_COLLATION_OID) + elog(ERROR, "TODO"); + /* Same approach as hashtext */ result = hash_any_extended((unsigned char *) VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key), diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c index d4543b511e..40b8b43d0c 100644 --- a/src/backend/catalog/pg_collation.c +++ b/src/backend/catalog/pg_collation.c @@ -47,6 +47,7 @@ Oid CollationCreate(const char *collname, Oid collnamespace, Oid collowner, char collprovider, + bool collisinsensitive, int32 collencoding, const char *collcollate, const char *collctype, const char *collversion, @@ -161,6 +162,7 @@ CollationCreate(const char *collname, Oid collnamespace, values[Anum_pg_collation_collnamespace - 1] = ObjectIdGetDatum(collnamespace); values[Anum_pg_collation_collowner - 1] = ObjectIdGetDatum(collowner); values[Anum_pg_collation_collprovider - 1] = CharGetDatum(collprovider); + values[Anum_pg_collation_collisinsensitive - 1] = BoolGetDatum(collisinsensitive); values[Anum_pg_collation_collencoding - 1] = Int32GetDatum(collencoding); namestrcpy(&name_collate, collcollate); values[Anum_pg_collation_collcollate - 1] = NameGetDatum(&name_collate); diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index 8fb51e8c3d..aa4c368e51 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -59,10 +59,12 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e DefElem *lccollateEl = NULL; DefElem *lcctypeEl = NULL; DefElem *providerEl = NULL; + DefElem *insensitiveEl = NULL; DefElem *versionEl = NULL; char *collcollate = NULL; char *collctype = NULL; char *collproviderstr = NULL; + bool collisinsensitive = false; int collencoding = 0; char collprovider = 0; char *collversion = NULL; @@ -91,6 +93,8 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e defelp = &lcctypeEl; else if (strcmp(defel->defname, "provider") == 0) defelp = &providerEl; + else if (strcmp(defel->defname, "insensitive") == 0) + defelp = &insensitiveEl; else if (strcmp(defel->defname, "version") == 0) defelp = &versionEl; else @@ -125,6 +129,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate)); collctype = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype)); collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider; + collisinsensitive = ((Form_pg_collation) GETSTRUCT(tp))->collisinsensitive; collencoding = ((Form_pg_collation) GETSTRUCT(tp))->collencoding; ReleaseSysCache(tp); @@ -157,6 +162,9 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e if (providerEl) collproviderstr = defGetString(providerEl); + if (insensitiveEl) + collisinsensitive = defGetBoolean(insensitiveEl); + if (versionEl) collversion = defGetString(versionEl); @@ -203,6 +211,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e collNamespace, GetUserId(), collprovider, + collisinsensitive, collencoding, collcollate, collctype, @@ -586,7 +595,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) * about existing ones. */ collid = CollationCreate(localebuf, nspid, GetUserId(), - COLLPROVIDER_LIBC, enc, + COLLPROVIDER_LIBC, false, enc, localebuf, localebuf, get_collation_actual_version(COLLPROVIDER_LIBC, localebuf), true, true); @@ -647,7 +656,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) int enc = aliases[i].enc; collid = CollationCreate(alias, nspid, GetUserId(), - COLLPROVIDER_LIBC, enc, + COLLPROVIDER_LIBC, false, enc, locale, locale, get_collation_actual_version(COLLPROVIDER_LIBC, locale), true, true); @@ -709,7 +718,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) collid = CollationCreate(psprintf("%s-x-icu", langtag), nspid, GetUserId(), - COLLPROVIDER_ICU, -1, + COLLPROVIDER_ICU, false, -1, collcollate, collcollate, get_collation_actual_version(COLLPROVIDER_ICU, collcollate), true, true); diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index d9087cac15..2b4c988d01 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -3316,6 +3316,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, int numCols, AttrNumber *keyColIdx, Oid *eqfunctions, + Oid *collations, PlanState *parent) { ExprState *state = makeNode(ExprState); @@ -3376,6 +3377,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, Form_pg_attribute latt = TupleDescAttr(ldesc, attno - 1); Form_pg_attribute ratt = TupleDescAttr(rdesc, attno - 1); Oid foid = eqfunctions[natt]; + Oid collid = collations[natt]; FmgrInfo *finfo; FunctionCallInfo fcinfo; AclResult aclresult; @@ -3393,7 +3395,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, fmgr_info(foid, finfo); fmgr_info_set_expr(NULL, finfo); InitFunctionCallInfoData(*fcinfo, finfo, 2, - InvalidOid, NULL, NULL); + collid, NULL, NULL); /* left arg */ scratch.opcode = EEOP_INNER_VAR; diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index abce1e95cb..ba69dd8b06 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -61,6 +61,7 @@ execTuplesMatchPrepare(TupleDesc desc, int numCols, AttrNumber *keyColIdx, Oid *eqOperators, + Oid *collations, PlanState *parent) { Oid *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid)); @@ -76,7 +77,7 @@ execTuplesMatchPrepare(TupleDesc desc, /* build actual expression */ expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL, - numCols, keyColIdx, eqFunctions, + numCols, keyColIdx, eqFunctions, collations, parent); return expr; @@ -155,6 +156,7 @@ BuildTupleHashTable(PlanState *parent, int numCols, AttrNumber *keyColIdx, Oid *eqfuncoids, FmgrInfo *hashfunctions, + Oid *collations, long nbuckets, Size additionalsize, MemoryContext tablecxt, MemoryContext tempcxt, bool use_variable_hash_iv) @@ -174,6 +176,7 @@ BuildTupleHashTable(PlanState *parent, hashtable->numCols = numCols; hashtable->keyColIdx = keyColIdx; hashtable->tab_hash_funcs = hashfunctions; + hashtable->tab_collations = collations; hashtable->tablecxt = tablecxt; hashtable->tempcxt = tempcxt; hashtable->entrysize = entrysize; @@ -211,7 +214,7 @@ BuildTupleHashTable(PlanState *parent, hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc, &TTSOpsMinimalTuple, &TTSOpsMinimalTuple, numCols, - keyColIdx, eqfuncoids, + keyColIdx, eqfuncoids, collations, parent); MemoryContextSwitchTo(oldcontext); @@ -374,8 +377,9 @@ TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple) { uint32 hkey; - hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i], - attr)); + hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], + hashtable->tab_collations[i], + attr)); hashkey ^= hkey; } } diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 179a501f30..0387717180 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -1217,6 +1217,7 @@ get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull) greatest_modulus = get_hash_partition_greatest_modulus(boundinfo); rowHash = compute_partition_hash_value(key->partnatts, key->partsupfunc, + key->partcollation, values, isnull); part_index = boundinfo->indexes[rowHash % greatest_modulus]; diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index daf56cd3d1..0dd2b02a9e 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -1283,6 +1283,7 @@ build_hash_table(AggState *aggstate) perhash->hashGrpColIdxHash, perhash->eqfuncoids, perhash->hashfunctions, + perhash->aggnode->grpCollations, perhash->aggnode->numGroups, additionalsize, aggstate->hashcontext->ecxt_per_tuple_memory, @@ -2376,6 +2377,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) length, aggnode->grpColIdx, aggnode->grpOperators, + aggnode->grpCollations, (PlanState *) aggstate); } @@ -2387,6 +2389,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) aggnode->numCols, aggnode->grpColIdx, aggnode->grpOperators, + aggnode->grpCollations, (PlanState *) aggstate); } } @@ -3141,6 +3144,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans, numDistinctCols, pertrans->sortColIdx, ops, + pertrans->sortCollations, &aggstate->ss.ps); pfree(ops); } diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c index 58e0b10cd1..95b19be1ed 100644 --- a/src/backend/executor/nodeGroup.c +++ b/src/backend/executor/nodeGroup.c @@ -212,6 +212,7 @@ ExecInitGroup(Group *node, EState *estate, int eflags) node->numCols, node->grpColIdx, node->grpOperators, + node->grpCollations, &grpstate->ss.ps); return grpstate; diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index ba2f6686cf..3a86d9b964 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -425,7 +425,7 @@ ExecEndHash(HashState *node) * ---------------------------------------------------------------- */ HashJoinTable -ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls) +ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, bool keepNulls) { Hash *node; HashJoinTable hashtable; @@ -439,6 +439,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls) int nkeys; int i; ListCell *ho; + ListCell *hc; MemoryContext oldcxt; /* @@ -541,8 +542,9 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls) hashtable->inner_hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool)); + hashtable->collations = (Oid *) palloc(nkeys * sizeof(Oid)); i = 0; - foreach(ho, hashOperators) + forboth(ho, hashOperators, hc, hashCollations) { Oid hashop = lfirst_oid(ho); Oid left_hashfn; @@ -554,6 +556,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls) fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]); fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]); hashtable->hashStrict[i] = op_strict(hashop); + hashtable->collations[i] = lfirst_oid(hc); i++; } @@ -1847,7 +1850,7 @@ ExecHashGetHashValue(HashJoinTable hashtable, /* Compute the hash function */ uint32 hkey; - hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i], keyval)); + hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], hashtable->collations[i], keyval)); hashkey ^= hkey; } @@ -2303,8 +2306,9 @@ ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse) uint32 hashvalue; int bucket; - hashvalue = DatumGetUInt32(FunctionCall1(&hashfunctions[0], - sslot.values[i])); + hashvalue = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[0], + hashtable->collations[0], + sslot.values[i])); /* * While we have not hit a hole in the hashtable and have not hit diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index c2c8beffc1..c44cfda51d 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -278,6 +278,7 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel) */ hashtable = ExecHashTableCreate(hashNode, node->hj_HashOperators, + node->hj_Collations, HJ_FILL_INNER(node)); node->hj_HashTable = hashtable; @@ -603,6 +604,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) List *rclauses; List *rhclauses; List *hoperators; + List *hcollations; TupleDesc outerDesc, innerDesc; ListCell *l; @@ -738,6 +740,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) rclauses = NIL; rhclauses = NIL; hoperators = NIL; + hcollations = NIL; foreach(l, node->hashclauses) { OpExpr *hclause = lfirst_node(OpExpr, l); @@ -749,10 +752,12 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) rhclauses = lappend(rhclauses, ExecInitExpr(lsecond(hclause->args), innerPlanState(hjstate))); hoperators = lappend_oid(hoperators, hclause->opno); + hcollations = lappend_oid(hcollations, hclause->inputcollid); } hjstate->hj_OuterHashKeys = lclauses; hjstate->hj_InnerHashKeys = rclauses; hjstate->hj_HashOperators = hoperators; + hjstate->hj_Collations = hcollations; /* child Hash node needs to evaluate inner hash keys, too */ ((HashState *) innerPlanState(hjstate))->hashkeys = rhclauses; diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c index 2d26cec831..8733c85afc 100644 --- a/src/backend/executor/nodeRecursiveunion.c +++ b/src/backend/executor/nodeRecursiveunion.c @@ -43,6 +43,7 @@ build_hash_table(RecursiveUnionState *rustate) node->dupColIdx, rustate->eqfuncoids, rustate->hashfunctions, + node->dupCollations, node->numGroups, 0, rustate->tableContext, diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c index 48b7aa9b8b..7b101ab946 100644 --- a/src/backend/executor/nodeSetOp.c +++ b/src/backend/executor/nodeSetOp.c @@ -132,6 +132,7 @@ build_hash_table(SetOpState *setopstate) node->dupColIdx, setopstate->eqfuncoids, setopstate->hashfunctions, + node->dupCollations, node->numGroups, 0, setopstate->tableContext, @@ -553,6 +554,7 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags) node->numCols, node->dupColIdx, node->dupOperators, + node->dupCollations, &setopstate->ps); if (node->strategy == SETOP_HASHED) diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index 84a1a91682..7f90b5f828 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -511,6 +511,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) node->keyColIdx, node->tab_eq_funcoids, node->tab_hash_funcs, + node->tab_collations, nbuckets, 0, node->hashtablecxt, @@ -533,6 +534,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) node->keyColIdx, node->tab_eq_funcoids, node->tab_hash_funcs, + node->tab_collations, nbuckets, 0, node->hashtablecxt, @@ -808,6 +810,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) sstate->tab_eq_funcoids = NULL; sstate->tab_hash_funcs = NULL; sstate->tab_eq_funcs = NULL; + sstate->tab_collations = NULL; sstate->lhs_hash_funcs = NULL; sstate->cur_eq_funcs = NULL; @@ -906,6 +909,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) sstate->tab_eq_funcoids = (Oid *) palloc(ncols * sizeof(Oid)); sstate->tab_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); sstate->tab_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); + sstate->tab_collations = (Oid *) palloc(ncols * sizeof(Oid)); sstate->lhs_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); sstate->cur_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); i = 1; @@ -956,6 +960,9 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) fmgr_info(left_hashfn, &sstate->lhs_hash_funcs[i - 1]); fmgr_info(right_hashfn, &sstate->tab_hash_funcs[i - 1]); + /* Set collation */ + sstate->tab_collations[i - 1] = opexpr->inputcollid; + i++; } @@ -992,6 +999,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) ncols, sstate->keyColIdx, sstate->tab_eq_funcoids, + sstate->tab_collations, parent); } diff --git a/src/backend/executor/nodeUnique.c b/src/backend/executor/nodeUnique.c index c5e4232e68..79cad1ca70 100644 --- a/src/backend/executor/nodeUnique.c +++ b/src/backend/executor/nodeUnique.c @@ -152,6 +152,7 @@ ExecInitUnique(Unique *node, EState *estate, int eflags) node->numCols, node->uniqColIdx, node->uniqOperators, + node->uniqCollations, &uniquestate->ps); return uniquestate; diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index 298e370745..6312ec451f 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -2370,6 +2370,7 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) node->partNumCols, node->partColIdx, node->partOperators, + node->partCollations, &winstate->ss.ps); if (node->ordNumCols > 0) @@ -2378,6 +2379,7 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) node->ordNumCols, node->ordColIdx, node->ordOperators, + node->ordCollations, &winstate->ss.ps); /* diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index db49968409..201a19ea2e 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -297,6 +297,7 @@ _copyRecursiveUnion(const RecursiveUnion *from) { COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(dupCollations, from->numCols * sizeof(Oid)); } COPY_SCALAR_FIELD(numGroups); @@ -956,6 +957,7 @@ _copyGroup(const Group *from) COPY_SCALAR_FIELD(numCols); COPY_POINTER_FIELD(grpColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(grpCollations, from->numCols * sizeof(Oid)); return newnode; } @@ -977,6 +979,7 @@ _copyAgg(const Agg *from) { COPY_POINTER_FIELD(grpColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(grpCollations, from->numCols * sizeof(Oid)); } COPY_SCALAR_FIELD(numGroups); COPY_BITMAPSET_FIELD(aggParams); @@ -1002,12 +1005,14 @@ _copyWindowAgg(const WindowAgg *from) { COPY_POINTER_FIELD(partColIdx, from->partNumCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(partOperators, from->partNumCols * sizeof(Oid)); + COPY_POINTER_FIELD(partCollations, from->partNumCols * sizeof(Oid)); } COPY_SCALAR_FIELD(ordNumCols); if (from->ordNumCols > 0) { COPY_POINTER_FIELD(ordColIdx, from->ordNumCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(ordOperators, from->ordNumCols * sizeof(Oid)); + COPY_POINTER_FIELD(ordCollations, from->ordNumCols * sizeof(Oid)); } COPY_SCALAR_FIELD(frameOptions); COPY_NODE_FIELD(startOffset); @@ -1040,6 +1045,7 @@ _copyUnique(const Unique *from) COPY_SCALAR_FIELD(numCols); COPY_POINTER_FIELD(uniqColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(uniqOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(uniqCollations, from->numCols * sizeof(Oid)); return newnode; } @@ -1089,6 +1095,7 @@ _copySetOp(const SetOp *from) COPY_SCALAR_FIELD(numCols); COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid)); + COPY_POINTER_FIELD(dupCollations, from->numCols * sizeof(Oid)); COPY_SCALAR_FIELD(flagColIdx); COPY_SCALAR_FIELD(firstFlag); COPY_SCALAR_FIELD(numGroups); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 6edc7f2359..3a423ef1fa 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -460,6 +460,10 @@ _outRecursiveUnion(StringInfo str, const RecursiveUnion *node) for (i = 0; i < node->numCols; i++) appendStringInfo(str, " %u", node->dupOperators[i]); + appendStringInfoString(str, " :dupCollations"); + for (i = 0; i < node->numCols; i++) + appendStringInfo(str, " %u", node->dupCollations[i]); + WRITE_LONG_FIELD(numGroups); } @@ -807,6 +811,10 @@ _outAgg(StringInfo str, const Agg *node) for (i = 0; i < node->numCols; i++) appendStringInfo(str, " %u", node->grpOperators[i]); + appendStringInfoString(str, " :grpCollations"); + for (i = 0; i < node->numCols; i++) + appendStringInfo(str, " %u", node->grpCollations[i]); + WRITE_LONG_FIELD(numGroups); WRITE_BITMAPSET_FIELD(aggParams); WRITE_NODE_FIELD(groupingSets); @@ -833,6 +841,10 @@ _outWindowAgg(StringInfo str, const WindowAgg *node) for (i = 0; i < node->partNumCols; i++) appendStringInfo(str, " %u", node->partOperators[i]); + appendStringInfoString(str, " :partCollations"); + for (i = 0; i < node->partNumCols; i++) + appendStringInfo(str, " %u", node->partCollations[i]); + WRITE_INT_FIELD(ordNumCols); appendStringInfoString(str, " :ordColIdx"); @@ -843,6 +855,10 @@ _outWindowAgg(StringInfo str, const WindowAgg *node) for (i = 0; i < node->ordNumCols; i++) appendStringInfo(str, " %u", node->ordOperators[i]); + appendStringInfoString(str, " :ordCollations"); + for (i = 0; i < node->ordNumCols; i++) + appendStringInfo(str, " %u", node->ordCollations[i]); + WRITE_INT_FIELD(frameOptions); WRITE_NODE_FIELD(startOffset); WRITE_NODE_FIELD(endOffset); @@ -871,6 +887,11 @@ _outGroup(StringInfo str, const Group *node) appendStringInfoString(str, " :grpOperators"); for (i = 0; i < node->numCols; i++) appendStringInfo(str, " %u", node->grpOperators[i]); + + appendStringInfoString(str, " :grpCollations"); + for (i = 0; i < node->numCols; i++) + appendStringInfo(str, " %u", node->grpCollations[i]); + } static void @@ -927,6 +948,10 @@ _outUnique(StringInfo str, const Unique *node) appendStringInfoString(str, " :uniqOperators"); for (i = 0; i < node->numCols; i++) appendStringInfo(str, " %u", node->uniqOperators[i]); + + appendStringInfoString(str, " :uniqCollations"); + for (i = 0; i < node->numCols; i++) + appendStringInfo(str, " %u", node->uniqCollations[i]); } static void @@ -963,6 +988,10 @@ _outSetOp(StringInfo str, const SetOp *node) for (i = 0; i < node->numCols; i++) appendStringInfo(str, " %u", node->dupOperators[i]); + appendStringInfoString(str, " :dupCollations"); + for (i = 0; i < node->numCols; i++) + appendStringInfo(str, " %u", node->dupCollations[i]); + WRITE_INT_FIELD(flagColIdx); WRITE_INT_FIELD(firstFlag); WRITE_LONG_FIELD(numGroups); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index fa27f37d6f..6550b3dbed 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1672,6 +1672,7 @@ _readRecursiveUnion(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols); READ_OID_ARRAY(dupOperators, local_node->numCols); + READ_OID_ARRAY(dupCollations, local_node->numCols); READ_LONG_FIELD(numGroups); READ_DONE(); @@ -2138,6 +2139,7 @@ _readGroup(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols); READ_OID_ARRAY(grpOperators, local_node->numCols); + READ_OID_ARRAY(grpCollations, local_node->numCols); READ_DONE(); } @@ -2157,6 +2159,7 @@ _readAgg(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols); READ_OID_ARRAY(grpOperators, local_node->numCols); + READ_OID_ARRAY(grpCollations, local_node->numCols); READ_LONG_FIELD(numGroups); READ_BITMAPSET_FIELD(aggParams); READ_NODE_FIELD(groupingSets); @@ -2179,9 +2182,11 @@ _readWindowAgg(void) READ_INT_FIELD(partNumCols); READ_ATTRNUMBER_ARRAY(partColIdx, local_node->partNumCols); READ_OID_ARRAY(partOperators, local_node->partNumCols); + READ_OID_ARRAY(partCollations, local_node->partNumCols); READ_INT_FIELD(ordNumCols); READ_ATTRNUMBER_ARRAY(ordColIdx, local_node->ordNumCols); READ_OID_ARRAY(ordOperators, local_node->ordNumCols); + READ_OID_ARRAY(ordCollations, local_node->ordNumCols); READ_INT_FIELD(frameOptions); READ_NODE_FIELD(startOffset); READ_NODE_FIELD(endOffset); @@ -2207,6 +2212,7 @@ _readUnique(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(uniqColIdx, local_node->numCols); READ_OID_ARRAY(uniqOperators, local_node->numCols); + READ_OID_ARRAY(uniqCollations, local_node->numCols); READ_DONE(); } @@ -2285,6 +2291,7 @@ _readSetOp(void) READ_INT_FIELD(numCols); READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols); READ_OID_ARRAY(dupOperators, local_node->numCols); + READ_OID_ARRAY(dupCollations, local_node->numCols); READ_INT_FIELD(flagColIdx); READ_INT_FIELD(firstFlag); READ_LONG_FIELD(numGroups); diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 91cf78233d..615236d121 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -256,14 +256,14 @@ static Sort *make_sort_from_groupcols(List *groupcls, Plan *lefttree); static Material *make_material(Plan *lefttree); static WindowAgg *make_windowagg(List *tlist, Index winref, - int partNumCols, AttrNumber *partColIdx, Oid *partOperators, - int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, + int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, + int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, int frameOptions, Node *startOffset, Node *endOffset, Oid startInRangeFunc, Oid endInRangeFunc, Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst, Plan *lefttree); static Group *make_group(List *tlist, List *qual, int numGroupCols, - AttrNumber *grpColIdx, Oid *grpOperators, + AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, Plan *lefttree); static Unique *make_unique_from_sortclauses(Plan *lefttree, List *distinctList); static Unique *make_unique_from_pathkeys(Plan *lefttree, @@ -1355,6 +1355,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) bool newitems; int numGroupCols; AttrNumber *groupColIdx; + Oid *groupCollations; int groupColPos; ListCell *l; @@ -1421,6 +1422,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) newtlist = subplan->targetlist; numGroupCols = list_length(uniq_exprs); groupColIdx = (AttrNumber *) palloc(numGroupCols * sizeof(AttrNumber)); + groupCollations = (Oid *) palloc(numGroupCols * sizeof(Oid)); groupColPos = 0; foreach(l, uniq_exprs) @@ -1431,7 +1433,9 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) tle = tlist_member(uniqexpr, newtlist); if (!tle) /* shouldn't happen */ elog(ERROR, "failed to find unique expression in subplan tlist"); - groupColIdx[groupColPos++] = tle->resno; + groupColIdx[groupColPos] = tle->resno; + groupCollations[groupColPos] = exprCollation((Node *) tle->expr); + groupColPos++; } if (best_path->umethod == UNIQUE_PATH_HASH) @@ -1469,6 +1473,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) numGroupCols, groupColIdx, groupOperators, + groupCollations, NIL, NIL, best_path->path.rows, @@ -1851,6 +1856,8 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path) extract_grouping_cols(best_path->groupClause, subplan->targetlist), extract_grouping_ops(best_path->groupClause), + extract_grouping_collations(best_path->groupClause, + subplan->targetlist), subplan); copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -1917,6 +1924,8 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) extract_grouping_cols(best_path->groupClause, subplan->targetlist), extract_grouping_ops(best_path->groupClause), + extract_grouping_collations(best_path->groupClause, + subplan->targetlist), NIL, NIL, best_path->numGroups, @@ -2078,6 +2087,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) list_length((List *) linitial(rollup->gsets)), new_grpColIdx, extract_grouping_ops(rollup->groupClause), + extract_grouping_collations(rollup->groupClause, subplan->targetlist), rollup->gsets, NIL, rollup->numGroups, @@ -2115,6 +2125,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) numGroupCols, top_grpColIdx, extract_grouping_ops(rollup->groupClause), + extract_grouping_collations(rollup->groupClause, subplan->targetlist), rollup->gsets, chain, rollup->numGroups, @@ -2214,9 +2225,11 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) int partNumCols; AttrNumber *partColIdx; Oid *partOperators; + Oid *partCollations; int ordNumCols; AttrNumber *ordColIdx; Oid *ordOperators; + Oid *ordCollations; ListCell *lc; /* @@ -2238,6 +2251,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) */ partColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numPart); partOperators = (Oid *) palloc(sizeof(Oid) * numPart); + partCollations = (Oid *) palloc(sizeof(Oid) * numPart); partNumCols = 0; foreach(lc, wc->partitionClause) @@ -2248,11 +2262,13 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) Assert(OidIsValid(sgc->eqop)); partColIdx[partNumCols] = tle->resno; partOperators[partNumCols] = sgc->eqop; + partCollations[partNumCols] = exprCollation((Node *) tle->expr); partNumCols++; } ordColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numOrder); ordOperators = (Oid *) palloc(sizeof(Oid) * numOrder); + ordCollations = (Oid *) palloc(sizeof(Oid) * numOrder); ordNumCols = 0; foreach(lc, wc->orderClause) @@ -2263,6 +2279,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) Assert(OidIsValid(sgc->eqop)); ordColIdx[ordNumCols] = tle->resno; ordOperators[ordNumCols] = sgc->eqop; + ordCollations[ordNumCols] = exprCollation((Node *) tle->expr); ordNumCols++; } @@ -2272,9 +2289,11 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) partNumCols, partColIdx, partOperators, + partCollations, ordNumCols, ordColIdx, ordOperators, + ordCollations, wc->frameOptions, wc->startOffset, wc->endOffset, @@ -5409,10 +5428,12 @@ make_recursive_union(List *tlist, int keyno = 0; AttrNumber *dupColIdx; Oid *dupOperators; + Oid *dupCollations; ListCell *slitem; dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); dupOperators = (Oid *) palloc(sizeof(Oid) * numCols); + dupCollations = (Oid *) palloc(sizeof(Oid) * numCols); foreach(slitem, distinctList) { @@ -5422,11 +5443,13 @@ make_recursive_union(List *tlist, dupColIdx[keyno] = tle->resno; dupOperators[keyno] = sortcl->eqop; + dupCollations[keyno] = exprCollation((Node *) tle->expr); Assert(OidIsValid(dupOperators[keyno])); keyno++; } node->dupColIdx = dupColIdx; node->dupOperators = dupOperators; + node->dupCollations = dupCollations; } node->numGroups = numGroups; @@ -6098,7 +6121,7 @@ materialize_finished_plan(Plan *subplan) Agg * make_agg(List *tlist, List *qual, AggStrategy aggstrategy, AggSplit aggsplit, - int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, + int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, List *groupingSets, List *chain, double dNumGroups, Plan *lefttree) { @@ -6114,6 +6137,7 @@ make_agg(List *tlist, List *qual, node->numCols = numGroupCols; node->grpColIdx = grpColIdx; node->grpOperators = grpOperators; + node->grpCollations = grpCollations; node->numGroups = numGroups; node->aggParams = NULL; /* SS_finalize_plan() will fill this */ node->groupingSets = groupingSets; @@ -6129,8 +6153,8 @@ make_agg(List *tlist, List *qual, static WindowAgg * make_windowagg(List *tlist, Index winref, - int partNumCols, AttrNumber *partColIdx, Oid *partOperators, - int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, + int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, + int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, int frameOptions, Node *startOffset, Node *endOffset, Oid startInRangeFunc, Oid endInRangeFunc, Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst, @@ -6143,9 +6167,11 @@ make_windowagg(List *tlist, Index winref, node->partNumCols = partNumCols; node->partColIdx = partColIdx; node->partOperators = partOperators; + node->partCollations = partCollations; node->ordNumCols = ordNumCols; node->ordColIdx = ordColIdx; node->ordOperators = ordOperators; + node->ordCollations = ordCollations; node->frameOptions = frameOptions; node->startOffset = startOffset; node->endOffset = endOffset; @@ -6170,6 +6196,7 @@ make_group(List *tlist, int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, + Oid *grpCollations, Plan *lefttree) { Group *node = makeNode(Group); @@ -6178,6 +6205,7 @@ make_group(List *tlist, node->numCols = numGroupCols; node->grpColIdx = grpColIdx; node->grpOperators = grpOperators; + node->grpCollations = grpCollations; plan->qual = qual; plan->targetlist = tlist; @@ -6201,6 +6229,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) int keyno = 0; AttrNumber *uniqColIdx; Oid *uniqOperators; + Oid *uniqCollations; ListCell *slitem; plan->targetlist = lefttree->targetlist; @@ -6215,6 +6244,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) Assert(numCols > 0); uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols); + uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols); foreach(slitem, distinctList) { @@ -6223,6 +6253,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) uniqColIdx[keyno] = tle->resno; uniqOperators[keyno] = sortcl->eqop; + uniqCollations[keyno] = exprCollation((Node *) tle->expr); Assert(OidIsValid(uniqOperators[keyno])); keyno++; } @@ -6230,6 +6261,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList) node->numCols = numCols; node->uniqColIdx = uniqColIdx; node->uniqOperators = uniqOperators; + node->uniqCollations = uniqCollations; return node; } @@ -6245,6 +6277,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) int keyno = 0; AttrNumber *uniqColIdx; Oid *uniqOperators; + Oid *uniqCollations; ListCell *lc; plan->targetlist = lefttree->targetlist; @@ -6260,6 +6293,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) Assert(numCols >= 0 && numCols <= list_length(pathkeys)); uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols); + uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols); foreach(lc, pathkeys) { @@ -6328,6 +6362,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) uniqColIdx[keyno] = tle->resno; uniqOperators[keyno] = eqop; + uniqCollations[keyno] = ec->ec_collation; keyno++; } @@ -6335,6 +6370,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) node->numCols = numCols; node->uniqColIdx = uniqColIdx; node->uniqOperators = uniqOperators; + node->uniqCollations = uniqCollations; return node; } @@ -6379,6 +6415,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, int keyno = 0; AttrNumber *dupColIdx; Oid *dupOperators; + Oid *dupCollations; ListCell *slitem; plan->targetlist = lefttree->targetlist; @@ -6392,6 +6429,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, */ dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); dupOperators = (Oid *) palloc(sizeof(Oid) * numCols); + dupCollations = (Oid *) palloc(sizeof(Oid) * numCols); foreach(slitem, distinctList) { @@ -6400,6 +6438,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, dupColIdx[keyno] = tle->resno; dupOperators[keyno] = sortcl->eqop; + dupCollations[keyno] = exprCollation((Node *) tle->expr); Assert(OidIsValid(dupOperators[keyno])); keyno++; } @@ -6409,6 +6448,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, node->numCols = numCols; node->dupColIdx = dupColIdx; node->dupOperators = dupOperators; + node->dupCollations = dupCollations; node->flagColIdx = flagColIdx; node->firstFlag = firstFlag; node->numGroups = numGroups; diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index 5500f33e63..00ba58bcbd 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -502,6 +502,31 @@ extract_grouping_ops(List *groupClause) return groupOperators; } +/* + * extract_grouping_collations - make an array of the grouping column collations + * for a SortGroupClause list + */ +Oid * +extract_grouping_collations(List *groupClause, List *tlist) +{ + int numCols = list_length(groupClause); + int colno = 0; + Oid *grpCollations; + ListCell *glitem; + + grpCollations = (Oid *) palloc(sizeof(Oid) * numCols); + + foreach(glitem, groupClause) + { + SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); + TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist); + + grpCollations[colno++] = exprCollation((Node *) tle->expr); + } + + return grpCollations; +} + /* * extract_grouping_cols - make an array of the grouping column resnos * for a SortGroupClause list diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c index eeaab2f4c9..c60a1bb509 100644 --- a/src/backend/partitioning/partbounds.c +++ b/src/backend/partitioning/partbounds.c @@ -2657,7 +2657,7 @@ get_range_nulltest(PartitionKey key) * Compute the hash value for given partition key values. */ uint64 -compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, +compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation, Datum *values, bool *isnull) { int i; @@ -2678,7 +2678,7 @@ compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, * datatype-specific hash functions of each partition key * attribute. */ - hash = FunctionCall2(&partsupfunc[i], values[i], seed); + hash = FunctionCall2Coll(&partsupfunc[i], partcollation[i], values[i], seed); /* Form a single 64-bit hash value */ rowHash = hash_combine64(rowHash, DatumGetUInt64(hash)); diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index 35c87535d3..2abe119529 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -2157,6 +2157,7 @@ get_matching_hash_bounds(PartitionPruneContext *context, int i; uint64 rowHash; int greatest_modulus; + Oid *partcollation = context->partcollation; Assert(context->strategy == PARTITION_STRATEGY_HASH); @@ -2177,7 +2178,7 @@ get_matching_hash_bounds(PartitionPruneContext *context, isnull[i] = bms_is_member(i, nullkeys); greatest_modulus = get_hash_partition_greatest_modulus(boundinfo); - rowHash = compute_partition_hash_value(partnatts, partsupfunc, + rowHash = compute_partition_hash_value(partnatts, partsupfunc, partcollation, values, isnull); if (partindices[rowHash % greatest_modulus] >= 0) diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index ce1ded888f..6a31ed7852 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -3954,7 +3954,7 @@ hash_array(PG_FUNCTION_ARGS) * apply the hash function to each array element. */ InitFunctionCallInfoData(locfcinfo, &typentry->hash_proc_finfo, 1, - InvalidOid, NULL, NULL); + PG_GET_COLLATION(), NULL, NULL); /* Loop over source data */ nitems = ArrayGetNItems(ndims, dims); diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c index 1b21da8d09..68312677d1 100644 --- a/src/backend/utils/adt/orderedsetaggs.c +++ b/src/backend/utils/adt/orderedsetaggs.c @@ -1345,6 +1345,7 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS) numDistinctCols, sortColIdx, osastate->qstate->eqOperators, + osastate->qstate->sortCollations, NULL); MemoryContextSwitchTo(oldContext); osastate->qstate->compareTuple = compareTuple; diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index a3dc3be5a8..b421ba8279 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1308,6 +1308,7 @@ pg_newlocale_from_collation(Oid collid) /* We'll fill in the result struct locally before allocating memory */ memset(&result, 0, sizeof(result)); result.provider = collform->collprovider; + result.insensitive = collform->collisinsensitive; if (collform->collprovider == COLLPROVIDER_LIBC) { diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c index 8f07b1e272..08b1ea5276 100644 --- a/src/backend/utils/adt/varchar.c +++ b/src/backend/utils/adt/varchar.c @@ -932,10 +932,17 @@ Datum hashbpchar(PG_FUNCTION_ARGS) { BpChar *key = PG_GETARG_BPCHAR_PP(0); + Oid collid = PG_GET_COLLATION(); char *keydata; int keylen; Datum result; + if (!collid) + elog(ERROR, "FIXME: hashbpchar() called without collation"); + + if (collid != DEFAULT_COLLATION_OID) + elog(ERROR, "TODO"); + keydata = VARDATA_ANY(key); keylen = bcTruelen(key); @@ -951,10 +958,17 @@ Datum hashbpcharextended(PG_FUNCTION_ARGS) { BpChar *key = PG_GETARG_BPCHAR_PP(0); + Oid collid = PG_GET_COLLATION(); char *keydata; int keylen; Datum result; + if (!collid) + elog(ERROR, "FIXME: hashbpcharextended() called without collation"); + + if (collid != DEFAULT_COLLATION_OID) + elog(ERROR, "TODO"); + keydata = VARDATA_ANY(key); keylen = bcTruelen(key); diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 0fd3b15748..cd894859f0 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -1427,7 +1427,8 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) * equal strings in the input - then we win big by avoiding expensive * collation-aware comparisons. */ - if (len1 == len2 && memcmp(arg1, arg2, len1) == 0) + if ((!mylocale || (mylocale && !mylocale->insensitive)) && + len1 == len2 && memcmp(arg1, arg2, len1) == 0) return 0; #ifdef WIN32 @@ -1505,7 +1506,8 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) * reasons, so we follow Perl's lead and sort "equal" strings * according to strcmp (on the UTF-8 representation). */ - if (result == 0) + if (result == 0 && + (!mylocale || (mylocale && !mylocale->insensitive))) { result = memcmp(arg1, arg2, Min(len1, len2)); if ((result == 0) && (len1 != len2)) @@ -1596,7 +1598,8 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) * so we follow Perl's lead and sort "equal" strings according to * strcmp(). */ - if (result == 0) + if (result == 0 && + (!mylocale || (mylocale && !mylocale->insensitive))) result = strcmp(a1p, a2p); if (a1p != a1buf) @@ -1640,6 +1643,23 @@ text_cmp(text *arg1, text *arg2, Oid collid) Datum texteq(PG_FUNCTION_ARGS) { + if (PG_GET_COLLATION() && + PG_GET_COLLATION() != DEFAULT_COLLATION_OID && + pg_newlocale_from_collation(PG_GET_COLLATION())->insensitive) + { + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + bool result; + + result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) == 0); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); + } + else + { Datum arg1 = PG_GETARG_DATUM(0); Datum arg2 = PG_GETARG_DATUM(1); bool result; @@ -1670,11 +1690,29 @@ texteq(PG_FUNCTION_ARGS) } PG_RETURN_BOOL(result); + } } Datum textne(PG_FUNCTION_ARGS) { + if (PG_GET_COLLATION() && + PG_GET_COLLATION() != DEFAULT_COLLATION_OID && + pg_newlocale_from_collation(PG_GET_COLLATION())->insensitive) + { + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + bool result; + + result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) != 0); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL(result); + } + else + { Datum arg1 = PG_GETARG_DATUM(0); Datum arg2 = PG_GETARG_DATUM(1); bool result; @@ -1699,6 +1737,7 @@ textne(PG_FUNCTION_ARGS) } PG_RETURN_BOOL(result); + } } Datum diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 4176ced923..c4d76b8dac 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -188,7 +188,7 @@ texteqfast(Datum a, Datum b) static uint32 texthashfast(Datum datum) { - return DatumGetInt32(DirectFunctionCall1(hashtext, datum)); + return DatumGetInt32(DirectFunctionCall1Coll(hashtext, DEFAULT_COLLATION_OID, datum)); } static bool diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 211a96380e..698803026d 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -1766,8 +1766,8 @@ setup_collation(FILE *cmdfd) * in pg_collation.h. But add it before reading system collations, so * that it wins if libc defines a locale named ucs_basic. */ - PG_CMD_PRINTF3("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collencoding, collcollate, collctype)" - "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', %d, 'C', 'C');\n\n", + PG_CMD_PRINTF3("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisinsensitive, collencoding, collcollate, collctype)" + "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', false, %d, 'C', 'C');\n\n", BOOTSTRAP_SUPERUSERID, COLLPROVIDER_LIBC, PG_UTF8); /* Now import all collations we can find in the operating system */ diff --git a/src/include/catalog/pg_collation.h b/src/include/catalog/pg_collation.h index 51f9b2a2ca..32924f04d0 100644 --- a/src/include/catalog/pg_collation.h +++ b/src/include/catalog/pg_collation.h @@ -33,6 +33,7 @@ CATALOG(pg_collation,3456,CollationRelationId) Oid collnamespace; /* OID of namespace containing collation */ Oid collowner; /* owner of collation */ char collprovider; /* see constants below */ + bool collisinsensitive BKI_DEFAULT(f); int32 collencoding; /* encoding for this collation; -1 = "all" */ NameData collcollate; /* LC_COLLATE setting */ NameData collctype; /* LC_CTYPE setting */ @@ -61,6 +62,7 @@ typedef FormData_pg_collation *Form_pg_collation; extern Oid CollationCreate(const char *collname, Oid collnamespace, Oid collowner, char collprovider, + bool collisinsensitive, int32 collencoding, const char *collcollate, const char *collctype, const char *collversion, diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 2feec628c0..540bebec43 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -110,6 +110,7 @@ extern ExprState *execTuplesMatchPrepare(TupleDesc desc, int numCols, AttrNumber *keyColIdx, Oid *eqOperators, + Oid *collations, PlanState *parent); extern void execTuplesHashPrepare(int numCols, Oid *eqOperators, @@ -120,6 +121,7 @@ extern TupleHashTable BuildTupleHashTable(PlanState *parent, int numCols, AttrNumber *keyColIdx, Oid *eqfuncoids, FmgrInfo *hashfunctions, + Oid *collations, long nbuckets, Size additionalsize, MemoryContext tablecxt, MemoryContext tempcxt, bool use_variable_hash_iv); @@ -246,6 +248,7 @@ extern ExprState *ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc, int numCols, AttrNumber *keyColIdx, Oid *eqfunctions, + Oid *collations, PlanState *parent); extern ProjectionInfo *ExecBuildProjectionInfo(List *targetList, ExprContext *econtext, diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h index a9f9872a78..000277a343 100644 --- a/src/include/executor/hashjoin.h +++ b/src/include/executor/hashjoin.h @@ -337,6 +337,7 @@ typedef struct HashJoinTableData FmgrInfo *outer_hashfunctions; /* lookup data for hash functions */ FmgrInfo *inner_hashfunctions; /* lookup data for hash functions */ bool *hashStrict; /* is each hash join operator strict? */ + Oid *collations; Size spaceUsed; /* memory space currently used by tuples */ Size spaceAllowed; /* upper limit for space used */ diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h index 8d700c06c5..f2444c85c6 100644 --- a/src/include/executor/nodeHash.h +++ b/src/include/executor/nodeHash.h @@ -24,7 +24,7 @@ extern Node *MultiExecHash(HashState *node); extern void ExecEndHash(HashState *node); extern void ExecReScanHash(HashState *node); -extern HashJoinTable ExecHashTableCreate(HashState *state, List *hashOperators, +extern HashJoinTable ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, bool keepNulls); extern void ExecParallelHashTableAlloc(HashJoinTable hashtable, int batchno); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 5ed0f40f69..b9ad9a8061 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -689,6 +689,7 @@ typedef struct TupleHashTableData AttrNumber *keyColIdx; /* attr numbers of key columns */ FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */ ExprState *tab_eq_func; /* comparator for table datatype(s) */ + Oid *tab_collations; /* collations for hash and comparison */ MemoryContext tablecxt; /* memory context containing table */ MemoryContext tempcxt; /* context for function evaluations */ Size entrysize; /* actual size to make each hash entry */ @@ -858,6 +859,7 @@ typedef struct SubPlanState AttrNumber *keyColIdx; /* control data for hash tables */ Oid *tab_eq_funcoids; /* equality func oids for table * datatype(s) */ + Oid *tab_collations; /* collations for hash and comparison */ FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */ FmgrInfo *tab_eq_funcs; /* equality functions for table datatype(s) */ FmgrInfo *lhs_hash_funcs; /* hash functions for lefthand datatype(s) */ @@ -1870,6 +1872,7 @@ typedef struct HashJoinState List *hj_OuterHashKeys; /* list of ExprState nodes */ List *hj_InnerHashKeys; /* list of ExprState nodes */ List *hj_HashOperators; /* list of operator OIDs */ + List *hj_Collations; HashJoinTable hj_HashTable; uint32 hj_CurHashValue; int hj_CurBucketNo; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index f116bc23ff..e10daaf500 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -297,6 +297,7 @@ typedef struct RecursiveUnion * duplicate-ness */ AttrNumber *dupColIdx; /* their indexes in the target list */ Oid *dupOperators; /* equality operators to compare with */ + Oid *dupCollations; long numGroups; /* estimated number of groups in input */ } RecursiveUnion; @@ -772,6 +773,7 @@ typedef struct Group int numCols; /* number of grouping columns */ AttrNumber *grpColIdx; /* their indexes in the target list */ Oid *grpOperators; /* equality operators to compare with */ + Oid *grpCollations; } Group; /* --------------- @@ -796,6 +798,7 @@ typedef struct Agg int numCols; /* number of grouping columns */ AttrNumber *grpColIdx; /* their indexes in the target list */ Oid *grpOperators; /* equality operators to compare with */ + Oid *grpCollations; long numGroups; /* estimated number of groups in input */ Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */ /* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */ @@ -814,9 +817,11 @@ typedef struct WindowAgg int partNumCols; /* number of columns in partition clause */ AttrNumber *partColIdx; /* their indexes in the target list */ Oid *partOperators; /* equality operators for partition columns */ + Oid *partCollations; /* collations for partition columns */ int ordNumCols; /* number of columns in ordering clause */ AttrNumber *ordColIdx; /* their indexes in the target list */ Oid *ordOperators; /* equality operators for ordering columns */ + Oid *ordCollations; /* collations for ordering columns */ int frameOptions; /* frame_clause options, see WindowDef */ Node *startOffset; /* expression for starting bound, if any */ Node *endOffset; /* expression for ending bound, if any */ @@ -838,6 +843,7 @@ typedef struct Unique int numCols; /* number of columns to check for uniqueness */ AttrNumber *uniqColIdx; /* their indexes in the target list */ Oid *uniqOperators; /* equality operators to compare with */ + Oid *uniqCollations; /* collations for equality comparisons */ } Unique; /* ------------ @@ -912,6 +918,7 @@ typedef struct SetOp * duplicate-ness */ AttrNumber *dupColIdx; /* their indexes in the target list */ Oid *dupOperators; /* equality operators to compare with */ + Oid *dupCollations; AttrNumber flagColIdx; /* where is the flag column, if any */ int firstFlag; /* flag value for first input relation */ long numGroups; /* estimated number of groups in input */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index ec1ea10233..dd0cca98fe 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -63,7 +63,7 @@ extern bool is_projection_capable_plan(Plan *plan); extern Sort *make_sort_from_sortclauses(List *sortcls, Plan *lefttree); extern Agg *make_agg(List *tlist, List *qual, AggStrategy aggstrategy, AggSplit aggsplit, - int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, + int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, List *groupingSets, List *chain, double dNumGroups, Plan *lefttree); extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount); diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h index 9fa52e1278..f3556fe268 100644 --- a/src/include/optimizer/tlist.h +++ b/src/include/optimizer/tlist.h @@ -48,6 +48,7 @@ extern SortGroupClause *get_sortgroupref_clause_noerr(Index sortref, List *clauses); extern Oid *extract_grouping_ops(List *groupClause); +extern Oid *extract_grouping_collations(List *groupClause, List *tlist); extern AttrNumber *extract_grouping_cols(List *groupClause, List *tlist); extern bool grouping_is_sortable(List *groupClause); extern bool grouping_is_hashable(List *groupClause); diff --git a/src/include/partitioning/partbounds.h b/src/include/partitioning/partbounds.h index 36fb584e23..9a271b09e9 100644 --- a/src/include/partitioning/partbounds.h +++ b/src/include/partitioning/partbounds.h @@ -77,6 +77,7 @@ typedef struct PartitionBoundInfoData extern int get_hash_partition_greatest_modulus(PartitionBoundInfo b); extern uint64 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, + Oid *partcollation, Datum *values, bool *isnull); extern List *get_qual_from_partbound(Relation rel, Relation parent, PartitionBoundSpec *spec); diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 88a3134862..6dda2f8aca 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -82,6 +82,7 @@ extern void cache_locale_time(void); struct pg_locale_struct { char provider; + bool insensitive; union { #ifdef HAVE_LOCALE_T diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index f485b5c330..f009b26b74 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -1100,9 +1100,137 @@ select textrange_en_us('A','Z') @> 'b'::text; drop type textrange_c; drop type textrange_en_us; +-- insensitive collations +CREATE COLLATION case_sensitive (provider = icu, locale = 'und'); +CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', insensitive); +SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive; + ?column? | ?column? +----------+---------- + t | f +(1 row) + +SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive; + ?column? | ?column? +----------+---------- + t | t +(1 row) + +CREATE TABLE test1cs (x text COLLATE case_sensitive); +CREATE TABLE test2cs (x text COLLATE case_sensitive); +CREATE TABLE test3cs (x text COLLATE case_sensitive); +INSERT INTO test1cs VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2cs VALUES ('ABC'), ('ghi'); +INSERT INTO test3cs VALUES ('abc'), ('ABC'), ('def'), ('ghi'); +SELECT x FROM test1cs UNION SELECT x FROM test2cs; + x +----- + ABC + abc + def + ghi +(4 rows) + +SELECT x FROM test2cs UNION SELECT x FROM test1cs; + x +----- + ABC + abc + def + ghi +(4 rows) + +SELECT x FROM test1cs INTERSECT SELECT x FROM test2cs; + x +----- + ghi +(1 row) + +SELECT x FROM test2cs INTERSECT SELECT x FROM test1cs; + x +----- + ghi +(1 row) + +SELECT x FROM test1cs EXCEPT SELECT x FROM test2cs; + x +----- + abc + def +(2 rows) + +SELECT x FROM test2cs EXCEPT SELECT x FROM test1cs; + x +----- + ABC +(1 row) + +SELECT x, count(*) FROM test3cs GROUP BY x ORDER BY x; + x | count +-----+------- + abc | 1 + ABC | 1 + def | 1 + ghi | 1 +(4 rows) + +CREATE TABLE test1ci (x text COLLATE case_insensitive); +CREATE TABLE test2ci (x text COLLATE case_insensitive); +CREATE TABLE test3ci (x text COLLATE case_insensitive); +INSERT INTO test1ci VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2ci VALUES ('ABC'), ('ghi'); +INSERT INTO test3ci VALUES ('abc'), ('ABC'), ('def'), ('ghi'); +SELECT x FROM test1ci UNION SELECT x FROM test2ci; + x +----- + abc + ghi + def +(3 rows) + +SELECT x FROM test2ci UNION SELECT x FROM test1ci; + x +----- + ABC + ghi + def +(3 rows) + +SELECT x FROM test1ci INTERSECT SELECT x FROM test2ci; + x +----- + ghi + abc +(2 rows) + +SELECT x FROM test2ci INTERSECT SELECT x FROM test1ci; + x +----- + ghi + ABC +(2 rows) + +SELECT x FROM test1ci EXCEPT SELECT x FROM test2ci; + x +----- + def +(1 row) + +SELECT x FROM test2ci EXCEPT SELECT x FROM test1ci; + x +--- +(0 rows) + +SELECT x, count(*) FROM test3ci GROUP BY x ORDER BY x; + x | count +-----+------- + abc | 2 + def | 1 + ghi | 1 +(3 rows) + -- cleanup DROP SCHEMA collate_tests CASCADE; -NOTICE: drop cascades to 18 other objects +NOTICE: drop cascades to 26 other objects DETAIL: drop cascades to table collate_test1 drop cascades to table collate_test_like drop cascades to table collate_test2 @@ -1121,6 +1249,14 @@ drop cascades to function mylt_noninline(text,text) drop cascades to function mylt_plpgsql(text,text) drop cascades to function mylt2(text,text) drop cascades to function dup(anyelement) +drop cascades to collation case_sensitive +drop cascades to collation case_insensitive +drop cascades to table test1cs +drop cascades to table test2cs +drop cascades to table test3cs +drop cascades to table test1ci +drop cascades to table test2ci +drop cascades to table test3ci RESET search_path; -- leave a collation for pg_upgrade test CREATE COLLATION coll_icu_upgrade FROM "und-x-icu"; diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index ef39445b30..50bc12c16d 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -425,6 +425,45 @@ CREATE INDEX collate_dep_test4i ON collate_dep_test4t (b COLLATE test0); drop type textrange_en_us; +-- insensitive collations + +CREATE COLLATION case_sensitive (provider = icu, locale = 'und'); +CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', insensitive); + +SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive; +SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive; + +CREATE TABLE test1cs (x text COLLATE case_sensitive); +CREATE TABLE test2cs (x text COLLATE case_sensitive); +CREATE TABLE test3cs (x text COLLATE case_sensitive); +INSERT INTO test1cs VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2cs VALUES ('ABC'), ('ghi'); +INSERT INTO test3cs VALUES ('abc'), ('ABC'), ('def'), ('ghi'); + +SELECT x FROM test1cs UNION SELECT x FROM test2cs; +SELECT x FROM test2cs UNION SELECT x FROM test1cs; +SELECT x FROM test1cs INTERSECT SELECT x FROM test2cs; +SELECT x FROM test2cs INTERSECT SELECT x FROM test1cs; +SELECT x FROM test1cs EXCEPT SELECT x FROM test2cs; +SELECT x FROM test2cs EXCEPT SELECT x FROM test1cs; +SELECT x, count(*) FROM test3cs GROUP BY x ORDER BY x; + +CREATE TABLE test1ci (x text COLLATE case_insensitive); +CREATE TABLE test2ci (x text COLLATE case_insensitive); +CREATE TABLE test3ci (x text COLLATE case_insensitive); +INSERT INTO test1ci VALUES ('abc'), ('def'), ('ghi'); +INSERT INTO test2ci VALUES ('ABC'), ('ghi'); +INSERT INTO test3ci VALUES ('abc'), ('ABC'), ('def'), ('ghi'); + +SELECT x FROM test1ci UNION SELECT x FROM test2ci; +SELECT x FROM test2ci UNION SELECT x FROM test1ci; +SELECT x FROM test1ci INTERSECT SELECT x FROM test2ci; +SELECT x FROM test2ci INTERSECT SELECT x FROM test1ci; +SELECT x FROM test1ci EXCEPT SELECT x FROM test2ci; +SELECT x FROM test2ci EXCEPT SELECT x FROM test1ci; +SELECT x, count(*) FROM test3ci GROUP BY x ORDER BY x; + + -- cleanup DROP SCHEMA collate_tests CASCADE; RESET search_path; base-commit: 6b0faf723647a851eaaddfed11e14861f8d0f588 -- 2.20.1