From b566e8756fbf78da804f5538d68350cda7a9bab3 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 22 Feb 2023 18:33:47 +0100 Subject: [PATCH v6] Allow tailoring of ICU locales with custom rules This exposes the ICU facility to add custom collation rules to a standard collation. Discussion: https://www.postgresql.org/message-id/flat/821c71a4-6ef0-d366-9acf-bb8e367f739f@enterprisedb.com --- doc/src/sgml/catalogs.sgml | 18 ++++ doc/src/sgml/ref/create_collation.sgml | 22 ++++ doc/src/sgml/ref/create_database.sgml | 13 +++ src/backend/catalog/pg_collation.c | 5 + src/backend/commands/collationcmds.c | 23 +++- src/backend/commands/dbcommands.c | 51 ++++++++- src/backend/utils/adt/pg_locale.c | 41 ++++++- src/backend/utils/init/postinit.c | 11 +- src/bin/pg_dump/pg_dump.c | 37 +++++++ src/bin/psql/describe.c | 100 +++++++++++------- src/include/catalog/pg_collation.h | 2 + src/include/catalog/pg_database.h | 3 + src/include/utils/pg_locale.h | 1 + .../regress/expected/collate.icu.utf8.out | 30 ++++++ src/test/regress/expected/psql.out | 18 ++-- src/test/regress/sql/collate.icu.utf8.sql | 13 +++ 16 files changed, 332 insertions(+), 56 deletions(-) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index c1e4048054..746baf5053 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -2428,6 +2428,15 @@ <structname>pg_collation</structname> Columns + + + collicurules text + + + ICU collation rules for this collation object + + + collversion text @@ -3106,6 +3115,15 @@ <structname>pg_database</structname> Columns + + + daticurules text + + + ICU collation rules for this database + + + datcollversion text diff --git a/doc/src/sgml/ref/create_collation.sgml b/doc/src/sgml/ref/create_collation.sgml index 136976165c..289f8147f1 100644 --- a/doc/src/sgml/ref/create_collation.sgml +++ b/doc/src/sgml/ref/create_collation.sgml @@ -27,6 +27,7 @@ [ LC_CTYPE = lc_ctype, ] [ PROVIDER = provider, ] [ DETERMINISTIC = boolean, ] + [ RULES = rules, ] [ VERSION = version ] ) CREATE COLLATION [ IF NOT EXISTS ] name FROM existing_collation @@ -149,6 +150,19 @@ Parameters + + rules + + + + Specifies additional collation rules to customize the behavior of the + collation. This is supported for ICU only. See + for details on the syntax. + + + + version @@ -228,6 +242,14 @@ Examples + + To create a collation using the ICU provider, based on the English ICU + locale, with custom rules: + + + + + To create a collation from an existing collation: diff --git a/doc/src/sgml/ref/create_database.sgml b/doc/src/sgml/ref/create_database.sgml index 57d13e34c2..6f62161b80 100644 --- a/doc/src/sgml/ref/create_database.sgml +++ b/doc/src/sgml/ref/create_database.sgml @@ -30,6 +30,7 @@ [ LC_COLLATE [=] lc_collate ] [ LC_CTYPE [=] lc_ctype ] [ ICU_LOCALE [=] icu_locale ] + [ ICU_RULES [=] icu_rules ] [ LOCALE_PROVIDER [=] locale_provider ] [ COLLATION_VERSION = collation_version ] [ TABLESPACE [=] tablespace_name ] @@ -192,6 +193,18 @@ Parameters + + icu_rules + + + Specifies additional collation rules to customize the behavior of the + collation. This is supported for ICU only. See + for details on the syntax. + + + + locale_provider diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c index 287b13725d..fd022e6fc2 100644 --- a/src/backend/catalog/pg_collation.c +++ b/src/backend/catalog/pg_collation.c @@ -50,6 +50,7 @@ CollationCreate(const char *collname, Oid collnamespace, int32 collencoding, const char *collcollate, const char *collctype, const char *colliculocale, + const char *collicurules, const char *collversion, bool if_not_exists, bool quiet) @@ -194,6 +195,10 @@ CollationCreate(const char *collname, Oid collnamespace, values[Anum_pg_collation_colliculocale - 1] = CStringGetTextDatum(colliculocale); else nulls[Anum_pg_collation_colliculocale - 1] = true; + if (collicurules) + values[Anum_pg_collation_collicurules - 1] = CStringGetTextDatum(collicurules); + else + nulls[Anum_pg_collation_collicurules - 1] = true; if (collversion) values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(collversion); else diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index eb62d285ea..c51e3afdb4 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -64,10 +64,12 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e DefElem *lcctypeEl = NULL; DefElem *providerEl = NULL; DefElem *deterministicEl = NULL; + DefElem *rulesEl = NULL; DefElem *versionEl = NULL; char *collcollate; char *collctype; char *colliculocale; + char *collicurules; bool collisdeterministic; int collencoding; char collprovider; @@ -99,6 +101,8 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e defelp = &providerEl; else if (strcmp(defel->defname, "deterministic") == 0) defelp = &deterministicEl; + else if (strcmp(defel->defname, "rules") == 0) + defelp = &rulesEl; else if (strcmp(defel->defname, "version") == 0) defelp = &versionEl; else @@ -161,6 +165,12 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e else colliculocale = NULL; + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull); + if (!isnull) + collicurules = TextDatumGetCString(datum); + else + collicurules = NULL; + ReleaseSysCache(tp); /* @@ -182,6 +192,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e collcollate = NULL; collctype = NULL; colliculocale = NULL; + collicurules = NULL; if (providerEl) collproviderstr = defGetString(providerEl); @@ -191,6 +202,9 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e else collisdeterministic = true; + if (rulesEl) + collicurules = defGetString(rulesEl); + if (versionEl) collversion = defGetString(versionEl); @@ -297,6 +311,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e collcollate, collctype, colliculocale, + collicurules, collversion, if_not_exists, false); /* not quiet */ @@ -680,7 +695,7 @@ create_collation_from_locale(const char *locale, int nspid, */ collid = CollationCreate(locale, nspid, GetUserId(), COLLPROVIDER_LIBC, true, enc, - locale, locale, NULL, + locale, locale, NULL, NULL, get_collation_actual_version(COLLPROVIDER_LIBC, locale), true, true); if (OidIsValid(collid)) @@ -755,7 +770,7 @@ win32_read_locale(LPWSTR pStr, DWORD dwFlags, LPARAM lparam) collid = CollationCreate(alias, param->nspid, GetUserId(), COLLPROVIDER_LIBC, true, enc, - localebuf, localebuf, NULL, + localebuf, localebuf, NULL, NULL, get_collation_actual_version(COLLPROVIDER_LIBC, localebuf), true, true); if (OidIsValid(collid)) @@ -889,7 +904,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) collid = CollationCreate(alias, nspid, GetUserId(), COLLPROVIDER_LIBC, true, enc, - locale, locale, NULL, + locale, locale, NULL, NULL, get_collation_actual_version(COLLPROVIDER_LIBC, locale), true, true); if (OidIsValid(collid)) @@ -951,7 +966,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) collid = CollationCreate(psprintf("%s-x-icu", langtag), nspid, GetUserId(), COLLPROVIDER_ICU, true, -1, - NULL, NULL, iculocstr, + NULL, NULL, iculocstr, NULL, get_collation_actual_version(COLLPROVIDER_ICU, iculocstr), true, true); if (OidIsValid(collid)) diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index a0259cc593..7063a5a7ed 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -119,6 +119,7 @@ static bool get_db_info(const char *name, LOCKMODE lockmode, int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP, TransactionId *dbFrozenXidP, MultiXactId *dbMinMultiP, Oid *dbTablespace, char **dbCollate, char **dbCtype, char **dbIculocale, + char **dbIcurules, char *dbLocProvider, char **dbCollversion); static void remove_dbtablespaces(Oid db_id); @@ -675,6 +676,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) char *src_collate = NULL; char *src_ctype = NULL; char *src_iculocale = NULL; + char *src_icurules = NULL; char src_locprovider = '\0'; char *src_collversion = NULL; bool src_istemplate; @@ -698,6 +700,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) DefElem *dcollate = NULL; DefElem *dctype = NULL; DefElem *diculocale = NULL; + DefElem *dicurules = NULL; DefElem *dlocprovider = NULL; DefElem *distemplate = NULL; DefElem *dallowconnections = NULL; @@ -710,6 +713,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) char *dbcollate = NULL; char *dbctype = NULL; char *dbiculocale = NULL; + char *dbicurules = NULL; char dblocprovider = '\0'; char *canonname; int encoding = -1; @@ -775,6 +779,12 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) errorConflictingDefElem(defel, pstate); diculocale = defel; } + else if (strcmp(defel->defname, "icu_rules") == 0) + { + if (dicurules) + errorConflictingDefElem(defel, pstate); + dicurules = defel; + } else if (strcmp(defel->defname, "locale_provider") == 0) { if (dlocprovider) @@ -893,6 +903,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) dbctype = defGetString(dctype); if (diculocale && diculocale->arg) dbiculocale = defGetString(diculocale); + if (dicurules && dicurules->arg) + dbicurules = defGetString(dicurules); if (dlocprovider && dlocprovider->arg) { char *locproviderstr = defGetString(dlocprovider); @@ -958,7 +970,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) &src_dboid, &src_owner, &src_encoding, &src_istemplate, &src_allowconn, &src_frozenxid, &src_minmxid, &src_deftablespace, - &src_collate, &src_ctype, &src_iculocale, &src_locprovider, + &src_collate, &src_ctype, &src_iculocale, &src_icurules, &src_locprovider, &src_collversion)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_DATABASE), @@ -1006,6 +1018,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) dblocprovider = src_locprovider; if (dbiculocale == NULL && dblocprovider == COLLPROVIDER_ICU) dbiculocale = src_iculocale; + if (dbicurules == NULL && dblocprovider == COLLPROVIDER_ICU) + dbicurules = src_icurules; /* Some encodings are client only */ if (!PG_VALID_BE_ENCODING(encoding)) @@ -1097,6 +1111,9 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) if (dblocprovider == COLLPROVIDER_ICU) { + char *val1; + char *val2; + Assert(dbiculocale); Assert(src_iculocale); if (strcmp(dbiculocale, src_iculocale) != 0) @@ -1105,6 +1122,19 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) errmsg("new ICU locale (%s) is incompatible with the ICU locale of the template database (%s)", dbiculocale, src_iculocale), errhint("Use the same ICU locale as in the template database, or use template0 as template."))); + + val1 = dbicurules; + if (!val1) + val1 = ""; + val2 = src_icurules; + if (!val2) + val2 = ""; + if (strcmp(val1, val2) != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("new ICU collation rules (%s) are incompatible with the ICU collation rules of the template database (%s)", + val1, val2), + errhint("Use the same ICU collation rules as in the template database, or use template0 as template."))); } } @@ -1313,6 +1343,10 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) new_record[Anum_pg_database_daticulocale - 1] = CStringGetTextDatum(dbiculocale); else new_record_nulls[Anum_pg_database_daticulocale - 1] = true; + if (dbicurules) + new_record[Anum_pg_database_daticurules - 1] = CStringGetTextDatum(dbicurules); + else + new_record_nulls[Anum_pg_database_daticurules - 1] = true; if (dbcollversion) new_record[Anum_pg_database_datcollversion - 1] = CStringGetTextDatum(dbcollversion); else @@ -1526,7 +1560,7 @@ dropdb(const char *dbname, bool missing_ok, bool force) pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock); if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL, - &db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) + &db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) { if (!missing_ok) { @@ -1726,7 +1760,7 @@ RenameDatabase(const char *oldname, const char *newname) rel = table_open(DatabaseRelationId, RowExclusiveLock); if (!get_db_info(oldname, AccessExclusiveLock, &db_id, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", oldname))); @@ -1836,7 +1870,7 @@ movedb(const char *dbname, const char *tblspcname) pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock); if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL, - NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL, NULL, NULL, NULL)) + NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL, NULL, NULL, NULL, NULL)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname))); @@ -2599,6 +2633,7 @@ get_db_info(const char *name, LOCKMODE lockmode, int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP, TransactionId *dbFrozenXidP, MultiXactId *dbMinMultiP, Oid *dbTablespace, char **dbCollate, char **dbCtype, char **dbIculocale, + char **dbIcurules, char *dbLocProvider, char **dbCollversion) { @@ -2715,6 +2750,14 @@ get_db_info(const char *name, LOCKMODE lockmode, else *dbIculocale = TextDatumGetCString(datum); } + if (dbIcurules) + { + datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_daticurules, &isnull); + if (isnull) + *dbIcurules = NULL; + else + *dbIcurules = TextDatumGetCString(datum); + } if (dbCollversion) { datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_datcollversion, &isnull); diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 059e4fd79f..da514b9396 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -69,6 +69,7 @@ #ifdef USE_ICU #include +#include #endif #ifdef __GLIBC__ @@ -1402,6 +1403,7 @@ struct pg_locale_struct default_locale; void make_icu_collator(const char *iculocstr, + const char *icurules, struct pg_locale_struct *resultp) { #ifdef USE_ICU @@ -1418,6 +1420,35 @@ make_icu_collator(const char *iculocstr, if (U_ICU_VERSION_MAJOR_NUM < 54) icu_set_collation_attributes(collator, iculocstr); + /* + * If rules are specified, we extract the rules of the standard collation, + * add our own rules, and make a new collator with the combined rules. + */ + if (icurules) + { + const UChar *default_rules; + UChar *agg_rules; + UChar *my_rules; + int32_t length; + + default_rules = ucol_getRules(collator, &length); + icu_to_uchar(&my_rules, icurules, strlen(icurules)); + + agg_rules = palloc_array(UChar, u_strlen(default_rules) + u_strlen(my_rules) + 1); + u_strcpy(agg_rules, default_rules); + u_strcat(agg_rules, my_rules); + + ucol_close(collator); + + status = U_ZERO_ERROR; + collator = ucol_openRules(agg_rules, u_strlen(agg_rules), + UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH, NULL, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s", + iculocstr, icurules, u_errorName(status)))); + } + /* We will leak this string if the caller errors later :-( */ resultp->info.icu.locale = MemoryContextStrdup(TopMemoryContext, iculocstr); resultp->info.icu.ucol = collator; @@ -1580,11 +1611,19 @@ pg_newlocale_from_collation(Oid collid) else if (collform->collprovider == COLLPROVIDER_ICU) { const char *iculocstr; + const char *icurules; datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_colliculocale, &isnull); Assert(!isnull); iculocstr = TextDatumGetCString(datum); - make_icu_collator(iculocstr, &result); + + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull); + if (!isnull) + icurules = TextDatumGetCString(datum); + else + icurules = NULL; + + make_icu_collator(iculocstr, icurules, &result); } datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion, diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 2f07ca7a0e..b0e20cc635 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -421,10 +421,19 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect if (dbform->datlocprovider == COLLPROVIDER_ICU) { + char *icurules; + datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticulocale, &isnull); Assert(!isnull); iculocale = TextDatumGetCString(datum); - make_icu_collator(iculocale, &default_locale); + + datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull); + if (!isnull) + icurules = TextDatumGetCString(datum); + else + icurules = NULL; + + make_icu_collator(iculocale, icurules, &default_locale); } else iculocale = NULL; diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 1a06eeaf6a..60ae19ef12 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -2844,6 +2844,7 @@ dumpDatabase(Archive *fout) i_collate, i_ctype, i_daticulocale, + i_daticurules, i_frozenxid, i_minmxid, i_datacl, @@ -2862,6 +2863,7 @@ dumpDatabase(Archive *fout) *collate, *ctype, *iculocale, + *icurules, *datistemplate, *datconnlimit, *tablespace; @@ -2888,6 +2890,10 @@ dumpDatabase(Archive *fout) appendPQExpBufferStr(dbQry, "datlocprovider, daticulocale, datcollversion, "); else appendPQExpBufferStr(dbQry, "'c' AS datlocprovider, NULL AS daticulocale, NULL AS datcollversion, "); + if (fout->remoteVersion >= 160000) + appendPQExpBufferStr(dbQry, "daticurules, "); + else + appendPQExpBufferStr(dbQry, "NULL AS daticurules, "); appendPQExpBufferStr(dbQry, "(SELECT spcname FROM pg_tablespace t WHERE t.oid = dattablespace) AS tablespace, " "shobj_description(oid, 'pg_database') AS description " @@ -2905,6 +2911,7 @@ dumpDatabase(Archive *fout) i_collate = PQfnumber(res, "datcollate"); i_ctype = PQfnumber(res, "datctype"); i_daticulocale = PQfnumber(res, "daticulocale"); + i_daticurules = PQfnumber(res, "daticurules"); i_frozenxid = PQfnumber(res, "datfrozenxid"); i_minmxid = PQfnumber(res, "datminmxid"); i_datacl = PQfnumber(res, "datacl"); @@ -2926,6 +2933,10 @@ dumpDatabase(Archive *fout) iculocale = PQgetvalue(res, 0, i_daticulocale); else iculocale = NULL; + if (!PQgetisnull(res, 0, i_daticurules)) + icurules = PQgetvalue(res, 0, i_daticurules); + else + icurules = NULL; frozenxid = atooid(PQgetvalue(res, 0, i_frozenxid)); minmxid = atooid(PQgetvalue(res, 0, i_minmxid)); dbdacl.acl = PQgetvalue(res, 0, i_datacl); @@ -2991,6 +3002,11 @@ dumpDatabase(Archive *fout) appendPQExpBufferStr(creaQry, " ICU_LOCALE = "); appendStringLiteralAH(creaQry, iculocale, fout); } + if (icurules) + { + appendPQExpBufferStr(creaQry, " ICU_RULES = "); + appendStringLiteralAH(creaQry, icurules, fout); + } /* * For binary upgrade, carry over the collation version. For normal @@ -13154,10 +13170,12 @@ dumpCollation(Archive *fout, const CollInfo *collinfo) int i_collcollate; int i_collctype; int i_colliculocale; + int i_collicurules; const char *collprovider; const char *collcollate; const char *collctype; const char *colliculocale; + const char *collicurules; /* Do nothing in data-only dump */ if (dopt->dataOnly) @@ -13195,6 +13213,13 @@ dumpCollation(Archive *fout, const CollInfo *collinfo) appendPQExpBufferStr(query, "NULL AS colliculocale, "); + if (fout->remoteVersion >= 160000) + appendPQExpBufferStr(query, + "collicurules, "); + else + appendPQExpBufferStr(query, + "NULL AS collicurules, "); + appendPQExpBuffer(query, "collcollate, " "collctype " @@ -13209,6 +13234,7 @@ dumpCollation(Archive *fout, const CollInfo *collinfo) i_collcollate = PQfnumber(res, "collcollate"); i_collctype = PQfnumber(res, "collctype"); i_colliculocale = PQfnumber(res, "colliculocale"); + i_collicurules = PQfnumber(res, "collicurules"); collprovider = PQgetvalue(res, 0, i_collprovider); @@ -13227,6 +13253,11 @@ dumpCollation(Archive *fout, const CollInfo *collinfo) else colliculocale = NULL; + if (!PQgetisnull(res, 0, i_collicurules)) + collicurules = PQgetvalue(res, 0, i_collicurules); + else + collicurules = NULL; + appendPQExpBuffer(delq, "DROP COLLATION %s;\n", fmtQualifiedDumpable(collinfo)); @@ -13272,6 +13303,12 @@ dumpCollation(Archive *fout, const CollInfo *collinfo) } } + if (collicurules) + { + appendPQExpBufferStr(q, ", rules = "); + appendStringLiteralAH(q, collicurules, fout); + } + /* * For binary upgrade, carry over the collation version. For normal * dump/restore, omit the version, so that it is computed upon restore. diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index c8a0bb7b3a..b2455b9c47 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -928,38 +928,52 @@ listAllDbs(const char *pattern, bool verbose) initPQExpBuffer(&buf); printfPQExpBuffer(&buf, - "SELECT d.datname as \"%s\",\n" - " pg_catalog.pg_get_userbyid(d.datdba) as \"%s\",\n" - " pg_catalog.pg_encoding_to_char(d.encoding) as \"%s\",\n" - " d.datcollate as \"%s\",\n" - " d.datctype as \"%s\",\n", + "SELECT\n" + " d.datname as \"%s\",\n" + " pg_catalog.pg_get_userbyid(d.datdba) as \"%s\",\n" + " pg_catalog.pg_encoding_to_char(d.encoding) as \"%s\",\n", gettext_noop("Name"), gettext_noop("Owner"), - gettext_noop("Encoding"), - gettext_noop("Collate"), - gettext_noop("Ctype")); + gettext_noop("Encoding")); if (pset.sversion >= 150000) appendPQExpBuffer(&buf, - " d.daticulocale as \"%s\",\n" - " CASE d.datlocprovider WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n", - gettext_noop("ICU Locale"), + " CASE d.datlocprovider WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n", gettext_noop("Locale Provider")); else appendPQExpBuffer(&buf, - " NULL as \"%s\",\n" - " 'libc' AS \"%s\",\n", - gettext_noop("ICU Locale"), + " 'libc' AS \"%s\",\n", gettext_noop("Locale Provider")); - appendPQExpBufferStr(&buf, " "); + appendPQExpBuffer(&buf, + " d.datcollate as \"%s\",\n" + " d.datctype as \"%s\",\n", + gettext_noop("Collate"), + gettext_noop("Ctype")); + if (pset.sversion >= 150000) + appendPQExpBuffer(&buf, + " d.daticulocale as \"%s\",\n", + gettext_noop("ICU Locale")); + else + appendPQExpBuffer(&buf, + " NULL as \"%s\",\n", + gettext_noop("ICU Locale")); + if (pset.sversion >= 160000) + appendPQExpBuffer(&buf, + " d.daticurules as \"%s\",\n", + gettext_noop("ICU Rules")); + else + appendPQExpBuffer(&buf, + " NULL as \"%s\",\n", + gettext_noop("ICU Rules")); + appendPQExpBufferStr(&buf, " "); printACLColumn(&buf, "d.datacl"); if (verbose) appendPQExpBuffer(&buf, - ",\n CASE WHEN pg_catalog.has_database_privilege(d.datname, 'CONNECT')\n" - " THEN pg_catalog.pg_size_pretty(pg_catalog.pg_database_size(d.datname))\n" - " ELSE 'No Access'\n" - " END as \"%s\"" - ",\n t.spcname as \"%s\"" - ",\n pg_catalog.shobj_description(d.oid, 'pg_database') as \"%s\"", + ",\n CASE WHEN pg_catalog.has_database_privilege(d.datname, 'CONNECT')\n" + " THEN pg_catalog.pg_size_pretty(pg_catalog.pg_database_size(d.datname))\n" + " ELSE 'No Access'\n" + " END as \"%s\"" + ",\n t.spcname as \"%s\"" + ",\n pg_catalog.shobj_description(d.oid, 'pg_database') as \"%s\"", gettext_noop("Size"), gettext_noop("Tablespace"), gettext_noop("Description")); @@ -4854,52 +4868,64 @@ listCollations(const char *pattern, bool verbose, bool showSystem) PQExpBufferData buf; PGresult *res; printQueryOpt myopt = pset.popt; - static const bool translate_columns[] = {false, false, false, false, false, false, true, false}; + static const bool translate_columns[] = {false, false, false, false, false, false, false, true, false}; initPQExpBuffer(&buf); printfPQExpBuffer(&buf, - "SELECT n.nspname AS \"%s\",\n" - " c.collname AS \"%s\",\n" - " c.collcollate AS \"%s\",\n" - " c.collctype AS \"%s\"", + "SELECT\n" + " n.nspname AS \"%s\",\n" + " c.collname AS \"%s\",\n", gettext_noop("Schema"), - gettext_noop("Name"), + gettext_noop("Name")); + + if (pset.sversion >= 100000) + appendPQExpBuffer(&buf, + " CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n", + gettext_noop("Provider")); + else + appendPQExpBuffer(&buf, + " 'libc' AS \"%s\",\n", + gettext_noop("Provider")); + + appendPQExpBuffer(&buf, + " c.collcollate AS \"%s\",\n" + " c.collctype AS \"%s\",\n", gettext_noop("Collate"), gettext_noop("Ctype")); if (pset.sversion >= 150000) appendPQExpBuffer(&buf, - ",\n c.colliculocale AS \"%s\"", + " c.colliculocale AS \"%s\",\n", gettext_noop("ICU Locale")); else appendPQExpBuffer(&buf, - ",\n c.collcollate AS \"%s\"", + " c.collcollate AS \"%s\",\n", gettext_noop("ICU Locale")); - if (pset.sversion >= 100000) + if (pset.sversion >= 160000) appendPQExpBuffer(&buf, - ",\n CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\"", - gettext_noop("Provider")); + " c.collicurules AS \"%s\",\n", + gettext_noop("ICU Rules")); else appendPQExpBuffer(&buf, - ",\n 'libc' AS \"%s\"", - gettext_noop("Provider")); + " NULL AS \"%s\",\n", + gettext_noop("ICU Rules")); if (pset.sversion >= 120000) appendPQExpBuffer(&buf, - ",\n CASE WHEN c.collisdeterministic THEN '%s' ELSE '%s' END AS \"%s\"", + " CASE WHEN c.collisdeterministic THEN '%s' ELSE '%s' END AS \"%s\"", gettext_noop("yes"), gettext_noop("no"), gettext_noop("Deterministic?")); else appendPQExpBuffer(&buf, - ",\n '%s' AS \"%s\"", + " '%s' AS \"%s\"", gettext_noop("yes"), gettext_noop("Deterministic?")); if (verbose) appendPQExpBuffer(&buf, - ",\n pg_catalog.obj_description(c.oid, 'pg_collation') AS \"%s\"", + ",\n pg_catalog.obj_description(c.oid, 'pg_collation') AS \"%s\"", gettext_noop("Description")); appendPQExpBufferStr(&buf, diff --git a/src/include/catalog/pg_collation.h b/src/include/catalog/pg_collation.h index f9d5e88faf..bfa3568451 100644 --- a/src/include/catalog/pg_collation.h +++ b/src/include/catalog/pg_collation.h @@ -43,6 +43,7 @@ CATALOG(pg_collation,3456,CollationRelationId) text collcollate BKI_DEFAULT(_null_); /* LC_COLLATE setting */ text collctype BKI_DEFAULT(_null_); /* LC_CTYPE setting */ text colliculocale BKI_DEFAULT(_null_); /* ICU locale ID */ + text collicurules BKI_DEFAULT(_null_); /* ICU collation rules */ text collversion BKI_DEFAULT(_null_); /* provider-dependent * version of collation * data */ @@ -91,6 +92,7 @@ extern Oid CollationCreate(const char *collname, Oid collnamespace, int32 collencoding, const char *collcollate, const char *collctype, const char *colliculocale, + const char *collicurules, const char *collversion, bool if_not_exists, bool quiet); diff --git a/src/include/catalog/pg_database.h b/src/include/catalog/pg_database.h index 3da3492e92..a5c4efe086 100644 --- a/src/include/catalog/pg_database.h +++ b/src/include/catalog/pg_database.h @@ -71,6 +71,9 @@ CATALOG(pg_database,1262,DatabaseRelationId) BKI_SHARED_RELATION BKI_ROWTYPE_OID /* ICU locale ID */ text daticulocale; + /* ICU collation rules */ + text daticurules BKI_DEFAULT(_null_); + /* provider-dependent version of collation data */ text datcollversion BKI_DEFAULT(_null_); diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index cede43440b..bcadefbf08 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -95,6 +95,7 @@ typedef struct pg_locale_struct *pg_locale_t; extern PGDLLIMPORT struct pg_locale_struct default_locale; extern void make_icu_collator(const char *iculocstr, + const char *icurules, struct pg_locale_struct *resultp); extern pg_locale_t pg_newlocale_from_collation(Oid collid); diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index 4354dc07b8..2ac707b362 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -1190,6 +1190,36 @@ SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE tes t | t (1 row) +-- rules +CREATE COLLATION testcoll_rules1 (provider = icu, locale = '', rules = '&a < g'); +CREATE TABLE test7 (a text); +-- example from https://unicode-org.github.io/icu/userguide/collation/customization/#syntax +INSERT INTO test7 VALUES ('Abernathy'), ('apple'), ('bird'), ('Boston'), ('Graham'), ('green'); +SELECT * FROM test7 ORDER BY a COLLATE "en-x-icu"; + a +----------- + Abernathy + apple + bird + Boston + Graham + green +(6 rows) + +SELECT * FROM test7 ORDER BY a COLLATE testcoll_rules1; + a +----------- + Abernathy + apple + green + bird + Boston + Graham +(6 rows) + +DROP TABLE test7; +CREATE COLLATION testcoll_rulesx (provider = icu, locale = '', rules = '!!wrong!!'); +ERROR: could not open collator for locale "" with rules "!!wrong!!": U_INVALID_FORMAT_ERROR -- nondeterministic collations CREATE COLLATION ctest_det (provider = icu, locale = '', deterministic = true); CREATE COLLATION ctest_nondet (provider = icu, locale = '', deterministic = false); diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out index 8fc62cebd2..ba66b8a2c5 100644 --- a/src/test/regress/expected/psql.out +++ b/src/test/regress/expected/psql.out @@ -6170,9 +6170,9 @@ List of schemas (0 rows) \dO "no.such.collation" - List of collations - Schema | Name | Collate | Ctype | ICU Locale | Provider | Deterministic? ---------+------+---------+-------+------------+----------+---------------- + List of collations + Schema | Name | Provider | Collate | Ctype | ICU Locale | ICU Rules | Deterministic? +--------+------+----------+---------+-------+------------+-----------+---------------- (0 rows) \dp "no.such.access.privilege" @@ -6359,9 +6359,9 @@ cross-database references are not implemented: "no.such.schema"."no.such.languag (0 rows) \dO "no.such.schema"."no.such.collation" - List of collations - Schema | Name | Collate | Ctype | ICU Locale | Provider | Deterministic? ---------+------+---------+-------+------------+----------+---------------- + List of collations + Schema | Name | Provider | Collate | Ctype | ICU Locale | ICU Rules | Deterministic? +--------+------+----------+---------+-------+------------+-----------+---------------- (0 rows) \dp "no.such.schema"."no.such.access.privilege" @@ -6502,9 +6502,9 @@ List of text search templates (0 rows) \dO regression."no.such.schema"."no.such.collation" - List of collations - Schema | Name | Collate | Ctype | ICU Locale | Provider | Deterministic? ---------+------+---------+-------+------------+----------+---------------- + List of collations + Schema | Name | Provider | Collate | Ctype | ICU Locale | ICU Rules | Deterministic? +--------+------+----------+---------+-------+------------+-----------+---------------- (0 rows) \dp regression."no.such.schema"."no.such.access.privilege" diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index b0ddc7db44..aa95c1ec42 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -472,6 +472,19 @@ CREATE COLLATION testcoll_de_phonebook (provider = icu, locale = 'de@collation=p SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE testcoll_de_phonebook; +-- rules + +CREATE COLLATION testcoll_rules1 (provider = icu, locale = '', rules = '&a < g'); +CREATE TABLE test7 (a text); +-- example from https://unicode-org.github.io/icu/userguide/collation/customization/#syntax +INSERT INTO test7 VALUES ('Abernathy'), ('apple'), ('bird'), ('Boston'), ('Graham'), ('green'); +SELECT * FROM test7 ORDER BY a COLLATE "en-x-icu"; +SELECT * FROM test7 ORDER BY a COLLATE testcoll_rules1; +DROP TABLE test7; + +CREATE COLLATION testcoll_rulesx (provider = icu, locale = '', rules = '!!wrong!!'); + + -- nondeterministic collations CREATE COLLATION ctest_det (provider = icu, locale = '', deterministic = true); base-commit: 2ddab010c2777c6a965cea82dc1b809ddc33ecc1 -- 2.39.2