From 615763ccf5a1c18c3da1286eb4c86d19eb397ac0 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 3 Mar 2023 11:46:56 +0100 Subject: [PATCH v7] Allow tailoring of ICU locales with custom rules This exposes the ICU facility to add custom collation rules to a standard collation. Discussion: https://www.postgresql.org/message-id/flat/821c71a4-6ef0-d366-9acf-bb8e367f739f@enterprisedb.com --- doc/src/sgml/catalogs.sgml | 18 ++++ doc/src/sgml/ref/create_collation.sgml | 22 ++++ doc/src/sgml/ref/create_database.sgml | 14 +++ doc/src/sgml/ref/createdb.sgml | 10 ++ doc/src/sgml/ref/initdb.sgml | 10 ++ src/backend/catalog/pg_collation.c | 5 + src/backend/commands/collationcmds.c | 23 +++- src/backend/commands/dbcommands.c | 51 ++++++++- src/backend/utils/adt/pg_locale.c | 41 ++++++- src/backend/utils/init/postinit.c | 11 +- src/bin/initdb/initdb.c | 15 ++- src/bin/pg_dump/pg_dump.c | 37 +++++++ src/bin/psql/describe.c | 100 +++++++++++------- src/bin/scripts/createdb.c | 11 ++ src/include/catalog/pg_collation.h | 2 + src/include/catalog/pg_database.dat | 2 +- src/include/catalog/pg_database.h | 3 + src/include/utils/pg_locale.h | 1 + .../regress/expected/collate.icu.utf8.out | 30 ++++++ src/test/regress/expected/psql.out | 18 ++-- src/test/regress/sql/collate.icu.utf8.sql | 13 +++ 21 files changed, 379 insertions(+), 58 deletions(-) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index c1e4048054..746baf5053 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -2428,6 +2428,15 @@ <structname>pg_collation</structname> Columns + + + collicurules text + + + ICU collation rules for this collation object + + + collversion text @@ -3106,6 +3115,15 @@ <structname>pg_database</structname> Columns + + + daticurules text + + + ICU collation rules for this database + + + datcollversion text diff --git a/doc/src/sgml/ref/create_collation.sgml b/doc/src/sgml/ref/create_collation.sgml index 136976165c..289f8147f1 100644 --- a/doc/src/sgml/ref/create_collation.sgml +++ b/doc/src/sgml/ref/create_collation.sgml @@ -27,6 +27,7 @@ [ LC_CTYPE = lc_ctype, ] [ PROVIDER = provider, ] [ DETERMINISTIC = boolean, ] + [ RULES = rules, ] [ VERSION = version ] ) CREATE COLLATION [ IF NOT EXISTS ] name FROM existing_collation @@ -149,6 +150,19 @@ Parameters + + rules + + + + Specifies additional collation rules to customize the behavior of the + collation. This is supported for ICU only. See + for details on the syntax. + + + + version @@ -228,6 +242,14 @@ Examples + + To create a collation using the ICU provider, based on the English ICU + locale, with custom rules: + + + + + To create a collation from an existing collation: diff --git a/doc/src/sgml/ref/create_database.sgml b/doc/src/sgml/ref/create_database.sgml index 57d13e34c2..13793bb6b7 100644 --- a/doc/src/sgml/ref/create_database.sgml +++ b/doc/src/sgml/ref/create_database.sgml @@ -30,6 +30,7 @@ [ LC_COLLATE [=] lc_collate ] [ LC_CTYPE [=] lc_ctype ] [ ICU_LOCALE [=] icu_locale ] + [ ICU_RULES [=] icu_rules ] [ LOCALE_PROVIDER [=] locale_provider ] [ COLLATION_VERSION = collation_version ] [ TABLESPACE [=] tablespace_name ] @@ -192,6 +193,19 @@ Parameters + + icu_rules + + + Specifies additional collation rules to customize the behavior of the + default collation of this database. This is supported for ICU only. + See + for details on the syntax. + + + + locale_provider diff --git a/doc/src/sgml/ref/createdb.sgml b/doc/src/sgml/ref/createdb.sgml index 671cd362d9..e23419ba6c 100644 --- a/doc/src/sgml/ref/createdb.sgml +++ b/doc/src/sgml/ref/createdb.sgml @@ -157,6 +157,16 @@ Options + + + + + Specifies additional collation rules to customize the behavior of the + default collation of this database. This is supported for ICU only. + + + + diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml index 5b2bdac101..c96164195d 100644 --- a/doc/src/sgml/ref/initdb.sgml +++ b/doc/src/sgml/ref/initdb.sgml @@ -239,6 +239,16 @@ Options + + + + + Specifies additional collation rules to customize the behavior of the + default collation. This is supported for ICU only. + + + + diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c index 287b13725d..fd022e6fc2 100644 --- a/src/backend/catalog/pg_collation.c +++ b/src/backend/catalog/pg_collation.c @@ -50,6 +50,7 @@ CollationCreate(const char *collname, Oid collnamespace, int32 collencoding, const char *collcollate, const char *collctype, const char *colliculocale, + const char *collicurules, const char *collversion, bool if_not_exists, bool quiet) @@ -194,6 +195,10 @@ CollationCreate(const char *collname, Oid collnamespace, values[Anum_pg_collation_colliculocale - 1] = CStringGetTextDatum(colliculocale); else nulls[Anum_pg_collation_colliculocale - 1] = true; + if (collicurules) + values[Anum_pg_collation_collicurules - 1] = CStringGetTextDatum(collicurules); + else + nulls[Anum_pg_collation_collicurules - 1] = true; if (collversion) values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(collversion); else diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index eb62d285ea..c51e3afdb4 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -64,10 +64,12 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e DefElem *lcctypeEl = NULL; DefElem *providerEl = NULL; DefElem *deterministicEl = NULL; + DefElem *rulesEl = NULL; DefElem *versionEl = NULL; char *collcollate; char *collctype; char *colliculocale; + char *collicurules; bool collisdeterministic; int collencoding; char collprovider; @@ -99,6 +101,8 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e defelp = &providerEl; else if (strcmp(defel->defname, "deterministic") == 0) defelp = &deterministicEl; + else if (strcmp(defel->defname, "rules") == 0) + defelp = &rulesEl; else if (strcmp(defel->defname, "version") == 0) defelp = &versionEl; else @@ -161,6 +165,12 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e else colliculocale = NULL; + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull); + if (!isnull) + collicurules = TextDatumGetCString(datum); + else + collicurules = NULL; + ReleaseSysCache(tp); /* @@ -182,6 +192,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e collcollate = NULL; collctype = NULL; colliculocale = NULL; + collicurules = NULL; if (providerEl) collproviderstr = defGetString(providerEl); @@ -191,6 +202,9 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e else collisdeterministic = true; + if (rulesEl) + collicurules = defGetString(rulesEl); + if (versionEl) collversion = defGetString(versionEl); @@ -297,6 +311,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e collcollate, collctype, colliculocale, + collicurules, collversion, if_not_exists, false); /* not quiet */ @@ -680,7 +695,7 @@ create_collation_from_locale(const char *locale, int nspid, */ collid = CollationCreate(locale, nspid, GetUserId(), COLLPROVIDER_LIBC, true, enc, - locale, locale, NULL, + locale, locale, NULL, NULL, get_collation_actual_version(COLLPROVIDER_LIBC, locale), true, true); if (OidIsValid(collid)) @@ -755,7 +770,7 @@ win32_read_locale(LPWSTR pStr, DWORD dwFlags, LPARAM lparam) collid = CollationCreate(alias, param->nspid, GetUserId(), COLLPROVIDER_LIBC, true, enc, - localebuf, localebuf, NULL, + localebuf, localebuf, NULL, NULL, get_collation_actual_version(COLLPROVIDER_LIBC, localebuf), true, true); if (OidIsValid(collid)) @@ -889,7 +904,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) collid = CollationCreate(alias, nspid, GetUserId(), COLLPROVIDER_LIBC, true, enc, - locale, locale, NULL, + locale, locale, NULL, NULL, get_collation_actual_version(COLLPROVIDER_LIBC, locale), true, true); if (OidIsValid(collid)) @@ -951,7 +966,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) collid = CollationCreate(psprintf("%s-x-icu", langtag), nspid, GetUserId(), COLLPROVIDER_ICU, true, -1, - NULL, NULL, iculocstr, + NULL, NULL, iculocstr, NULL, get_collation_actual_version(COLLPROVIDER_ICU, iculocstr), true, true); if (OidIsValid(collid)) diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index a0259cc593..7063a5a7ed 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -119,6 +119,7 @@ static bool get_db_info(const char *name, LOCKMODE lockmode, int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP, TransactionId *dbFrozenXidP, MultiXactId *dbMinMultiP, Oid *dbTablespace, char **dbCollate, char **dbCtype, char **dbIculocale, + char **dbIcurules, char *dbLocProvider, char **dbCollversion); static void remove_dbtablespaces(Oid db_id); @@ -675,6 +676,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) char *src_collate = NULL; char *src_ctype = NULL; char *src_iculocale = NULL; + char *src_icurules = NULL; char src_locprovider = '\0'; char *src_collversion = NULL; bool src_istemplate; @@ -698,6 +700,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) DefElem *dcollate = NULL; DefElem *dctype = NULL; DefElem *diculocale = NULL; + DefElem *dicurules = NULL; DefElem *dlocprovider = NULL; DefElem *distemplate = NULL; DefElem *dallowconnections = NULL; @@ -710,6 +713,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) char *dbcollate = NULL; char *dbctype = NULL; char *dbiculocale = NULL; + char *dbicurules = NULL; char dblocprovider = '\0'; char *canonname; int encoding = -1; @@ -775,6 +779,12 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) errorConflictingDefElem(defel, pstate); diculocale = defel; } + else if (strcmp(defel->defname, "icu_rules") == 0) + { + if (dicurules) + errorConflictingDefElem(defel, pstate); + dicurules = defel; + } else if (strcmp(defel->defname, "locale_provider") == 0) { if (dlocprovider) @@ -893,6 +903,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) dbctype = defGetString(dctype); if (diculocale && diculocale->arg) dbiculocale = defGetString(diculocale); + if (dicurules && dicurules->arg) + dbicurules = defGetString(dicurules); if (dlocprovider && dlocprovider->arg) { char *locproviderstr = defGetString(dlocprovider); @@ -958,7 +970,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) &src_dboid, &src_owner, &src_encoding, &src_istemplate, &src_allowconn, &src_frozenxid, &src_minmxid, &src_deftablespace, - &src_collate, &src_ctype, &src_iculocale, &src_locprovider, + &src_collate, &src_ctype, &src_iculocale, &src_icurules, &src_locprovider, &src_collversion)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_DATABASE), @@ -1006,6 +1018,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) dblocprovider = src_locprovider; if (dbiculocale == NULL && dblocprovider == COLLPROVIDER_ICU) dbiculocale = src_iculocale; + if (dbicurules == NULL && dblocprovider == COLLPROVIDER_ICU) + dbicurules = src_icurules; /* Some encodings are client only */ if (!PG_VALID_BE_ENCODING(encoding)) @@ -1097,6 +1111,9 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) if (dblocprovider == COLLPROVIDER_ICU) { + char *val1; + char *val2; + Assert(dbiculocale); Assert(src_iculocale); if (strcmp(dbiculocale, src_iculocale) != 0) @@ -1105,6 +1122,19 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) errmsg("new ICU locale (%s) is incompatible with the ICU locale of the template database (%s)", dbiculocale, src_iculocale), errhint("Use the same ICU locale as in the template database, or use template0 as template."))); + + val1 = dbicurules; + if (!val1) + val1 = ""; + val2 = src_icurules; + if (!val2) + val2 = ""; + if (strcmp(val1, val2) != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("new ICU collation rules (%s) are incompatible with the ICU collation rules of the template database (%s)", + val1, val2), + errhint("Use the same ICU collation rules as in the template database, or use template0 as template."))); } } @@ -1313,6 +1343,10 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) new_record[Anum_pg_database_daticulocale - 1] = CStringGetTextDatum(dbiculocale); else new_record_nulls[Anum_pg_database_daticulocale - 1] = true; + if (dbicurules) + new_record[Anum_pg_database_daticurules - 1] = CStringGetTextDatum(dbicurules); + else + new_record_nulls[Anum_pg_database_daticurules - 1] = true; if (dbcollversion) new_record[Anum_pg_database_datcollversion - 1] = CStringGetTextDatum(dbcollversion); else @@ -1526,7 +1560,7 @@ dropdb(const char *dbname, bool missing_ok, bool force) pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock); if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL, - &db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) + &db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) { if (!missing_ok) { @@ -1726,7 +1760,7 @@ RenameDatabase(const char *oldname, const char *newname) rel = table_open(DatabaseRelationId, RowExclusiveLock); if (!get_db_info(oldname, AccessExclusiveLock, &db_id, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", oldname))); @@ -1836,7 +1870,7 @@ movedb(const char *dbname, const char *tblspcname) pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock); if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL, - NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL, NULL, NULL, NULL)) + NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL, NULL, NULL, NULL, NULL)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname))); @@ -2599,6 +2633,7 @@ get_db_info(const char *name, LOCKMODE lockmode, int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP, TransactionId *dbFrozenXidP, MultiXactId *dbMinMultiP, Oid *dbTablespace, char **dbCollate, char **dbCtype, char **dbIculocale, + char **dbIcurules, char *dbLocProvider, char **dbCollversion) { @@ -2715,6 +2750,14 @@ get_db_info(const char *name, LOCKMODE lockmode, else *dbIculocale = TextDatumGetCString(datum); } + if (dbIcurules) + { + datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_daticurules, &isnull); + if (isnull) + *dbIcurules = NULL; + else + *dbIcurules = TextDatumGetCString(datum); + } if (dbCollversion) { datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_datcollversion, &isnull); diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 4aa5eaa984..1d3d4d86d3 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -69,6 +69,7 @@ #ifdef USE_ICU #include +#include #endif #ifdef __GLIBC__ @@ -1421,6 +1422,7 @@ struct pg_locale_struct default_locale; void make_icu_collator(const char *iculocstr, + const char *icurules, struct pg_locale_struct *resultp) { #ifdef USE_ICU @@ -1437,6 +1439,35 @@ make_icu_collator(const char *iculocstr, if (U_ICU_VERSION_MAJOR_NUM < 54) icu_set_collation_attributes(collator, iculocstr); + /* + * If rules are specified, we extract the rules of the standard collation, + * add our own rules, and make a new collator with the combined rules. + */ + if (icurules) + { + const UChar *default_rules; + UChar *agg_rules; + UChar *my_rules; + int32_t length; + + default_rules = ucol_getRules(collator, &length); + icu_to_uchar(&my_rules, icurules, strlen(icurules)); + + agg_rules = palloc_array(UChar, u_strlen(default_rules) + u_strlen(my_rules) + 1); + u_strcpy(agg_rules, default_rules); + u_strcat(agg_rules, my_rules); + + ucol_close(collator); + + status = U_ZERO_ERROR; + collator = ucol_openRules(agg_rules, u_strlen(agg_rules), + UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH, NULL, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s", + iculocstr, icurules, u_errorName(status)))); + } + /* We will leak this string if the caller errors later :-( */ resultp->info.icu.locale = MemoryContextStrdup(TopMemoryContext, iculocstr); resultp->info.icu.ucol = collator; @@ -1608,11 +1639,19 @@ pg_newlocale_from_collation(Oid collid) else if (collform->collprovider == COLLPROVIDER_ICU) { const char *iculocstr; + const char *icurules; datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_colliculocale, &isnull); Assert(!isnull); iculocstr = TextDatumGetCString(datum); - make_icu_collator(iculocstr, &result); + + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull); + if (!isnull) + icurules = TextDatumGetCString(datum); + else + icurules = NULL; + + make_icu_collator(iculocstr, icurules, &result); } datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion, diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 2f07ca7a0e..b0e20cc635 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -421,10 +421,19 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect if (dbform->datlocprovider == COLLPROVIDER_ICU) { + char *icurules; + datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticulocale, &isnull); Assert(!isnull); iculocale = TextDatumGetCString(datum); - make_icu_collator(iculocale, &default_locale); + + datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull); + if (!isnull) + icurules = TextDatumGetCString(datum); + else + icurules = NULL; + + make_icu_collator(iculocale, icurules, &default_locale); } else iculocale = NULL; diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 7a58c33ace..5e3c6a27c4 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -135,6 +135,7 @@ static char *lc_time = NULL; static char *lc_messages = NULL; static char locale_provider = COLLPROVIDER_LIBC; static char *icu_locale = NULL; +static char *icu_rules = NULL; static const char *default_text_search_config = NULL; static char *username = NULL; static bool pwprompt = false; @@ -1312,7 +1313,10 @@ bootstrap_template1(void) escape_quotes_bki(lc_ctype)); bki_lines = replace_token(bki_lines, "ICU_LOCALE", - locale_provider == COLLPROVIDER_ICU ? escape_quotes_bki(icu_locale) : "_null_"); + icu_locale ? escape_quotes_bki(icu_locale) : "_null_"); + + bki_lines = replace_token(bki_lines, "ICU_RULES", + icu_rules ? escape_quotes_bki(icu_rules) : "_null_"); sprintf(buf, "%c", locale_provider); bki_lines = replace_token(bki_lines, "LOCALE_PROVIDER", buf); @@ -2107,6 +2111,7 @@ usage(const char *progname) printf(_(" -E, --encoding=ENCODING set default encoding for new databases\n")); printf(_(" -g, --allow-group-access allow group read/execute on data directory\n")); printf(_(" --icu-locale=LOCALE set ICU locale ID for new databases\n")); + printf(_(" --icu-rules=RULES set additional ICU collation rules for new databases\n")); printf(_(" -k, --data-checksums use data page checksums\n")); printf(_(" --locale=LOCALE set default locale for new databases\n")); printf(_(" --lc-collate=, --lc-ctype=, --lc-messages=LOCALE\n" @@ -2767,6 +2772,7 @@ main(int argc, char *argv[]) {"discard-caches", no_argument, NULL, 14}, {"locale-provider", required_argument, NULL, 15}, {"icu-locale", required_argument, NULL, 16}, + {"icu-rules", required_argument, NULL, 17}, {NULL, 0, NULL, 0} }; @@ -2924,6 +2930,9 @@ main(int argc, char *argv[]) case 16: icu_locale = pg_strdup(optarg); break; + case 17: + icu_rules = pg_strdup(optarg); + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -2954,6 +2963,10 @@ main(int argc, char *argv[]) pg_fatal("%s cannot be specified unless locale provider \"%s\" is chosen", "--icu-locale", "icu"); + if (icu_rules && locale_provider != COLLPROVIDER_ICU) + pg_fatal("%s cannot be specified unless locale provider \"%s\" is chosen", + "--icu-rules", "icu"); + atexit(cleanup_directories_atexit); /* If we only need to fsync, just do it and exit */ diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 74d806c77b..4217908f84 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -2843,6 +2843,7 @@ dumpDatabase(Archive *fout) i_collate, i_ctype, i_daticulocale, + i_daticurules, i_frozenxid, i_minmxid, i_datacl, @@ -2861,6 +2862,7 @@ dumpDatabase(Archive *fout) *collate, *ctype, *iculocale, + *icurules, *datistemplate, *datconnlimit, *tablespace; @@ -2887,6 +2889,10 @@ dumpDatabase(Archive *fout) appendPQExpBufferStr(dbQry, "datlocprovider, daticulocale, datcollversion, "); else appendPQExpBufferStr(dbQry, "'c' AS datlocprovider, NULL AS daticulocale, NULL AS datcollversion, "); + if (fout->remoteVersion >= 160000) + appendPQExpBufferStr(dbQry, "daticurules, "); + else + appendPQExpBufferStr(dbQry, "NULL AS daticurules, "); appendPQExpBufferStr(dbQry, "(SELECT spcname FROM pg_tablespace t WHERE t.oid = dattablespace) AS tablespace, " "shobj_description(oid, 'pg_database') AS description " @@ -2904,6 +2910,7 @@ dumpDatabase(Archive *fout) i_collate = PQfnumber(res, "datcollate"); i_ctype = PQfnumber(res, "datctype"); i_daticulocale = PQfnumber(res, "daticulocale"); + i_daticurules = PQfnumber(res, "daticurules"); i_frozenxid = PQfnumber(res, "datfrozenxid"); i_minmxid = PQfnumber(res, "datminmxid"); i_datacl = PQfnumber(res, "datacl"); @@ -2925,6 +2932,10 @@ dumpDatabase(Archive *fout) iculocale = PQgetvalue(res, 0, i_daticulocale); else iculocale = NULL; + if (!PQgetisnull(res, 0, i_daticurules)) + icurules = PQgetvalue(res, 0, i_daticurules); + else + icurules = NULL; frozenxid = atooid(PQgetvalue(res, 0, i_frozenxid)); minmxid = atooid(PQgetvalue(res, 0, i_minmxid)); dbdacl.acl = PQgetvalue(res, 0, i_datacl); @@ -2990,6 +3001,11 @@ dumpDatabase(Archive *fout) appendPQExpBufferStr(creaQry, " ICU_LOCALE = "); appendStringLiteralAH(creaQry, iculocale, fout); } + if (icurules) + { + appendPQExpBufferStr(creaQry, " ICU_RULES = "); + appendStringLiteralAH(creaQry, icurules, fout); + } /* * For binary upgrade, carry over the collation version. For normal @@ -13153,10 +13169,12 @@ dumpCollation(Archive *fout, const CollInfo *collinfo) int i_collcollate; int i_collctype; int i_colliculocale; + int i_collicurules; const char *collprovider; const char *collcollate; const char *collctype; const char *colliculocale; + const char *collicurules; /* Do nothing in data-only dump */ if (dopt->dataOnly) @@ -13194,6 +13212,13 @@ dumpCollation(Archive *fout, const CollInfo *collinfo) appendPQExpBufferStr(query, "NULL AS colliculocale, "); + if (fout->remoteVersion >= 160000) + appendPQExpBufferStr(query, + "collicurules, "); + else + appendPQExpBufferStr(query, + "NULL AS collicurules, "); + appendPQExpBuffer(query, "collcollate, " "collctype " @@ -13208,6 +13233,7 @@ dumpCollation(Archive *fout, const CollInfo *collinfo) i_collcollate = PQfnumber(res, "collcollate"); i_collctype = PQfnumber(res, "collctype"); i_colliculocale = PQfnumber(res, "colliculocale"); + i_collicurules = PQfnumber(res, "collicurules"); collprovider = PQgetvalue(res, 0, i_collprovider); @@ -13226,6 +13252,11 @@ dumpCollation(Archive *fout, const CollInfo *collinfo) else colliculocale = NULL; + if (!PQgetisnull(res, 0, i_collicurules)) + collicurules = PQgetvalue(res, 0, i_collicurules); + else + collicurules = NULL; + appendPQExpBuffer(delq, "DROP COLLATION %s;\n", fmtQualifiedDumpable(collinfo)); @@ -13271,6 +13302,12 @@ dumpCollation(Archive *fout, const CollInfo *collinfo) } } + if (collicurules) + { + appendPQExpBufferStr(q, ", rules = "); + appendStringLiteralAH(q, collicurules, fout); + } + /* * For binary upgrade, carry over the collation version. For normal * dump/restore, omit the version, so that it is computed upon restore. diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 2084f5ccda..99e28f607e 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -923,38 +923,52 @@ listAllDbs(const char *pattern, bool verbose) initPQExpBuffer(&buf); printfPQExpBuffer(&buf, - "SELECT d.datname as \"%s\",\n" - " pg_catalog.pg_get_userbyid(d.datdba) as \"%s\",\n" - " pg_catalog.pg_encoding_to_char(d.encoding) as \"%s\",\n" - " d.datcollate as \"%s\",\n" - " d.datctype as \"%s\",\n", + "SELECT\n" + " d.datname as \"%s\",\n" + " pg_catalog.pg_get_userbyid(d.datdba) as \"%s\",\n" + " pg_catalog.pg_encoding_to_char(d.encoding) as \"%s\",\n", gettext_noop("Name"), gettext_noop("Owner"), - gettext_noop("Encoding"), - gettext_noop("Collate"), - gettext_noop("Ctype")); + gettext_noop("Encoding")); if (pset.sversion >= 150000) appendPQExpBuffer(&buf, - " d.daticulocale as \"%s\",\n" - " CASE d.datlocprovider WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n", - gettext_noop("ICU Locale"), + " CASE d.datlocprovider WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n", gettext_noop("Locale Provider")); else appendPQExpBuffer(&buf, - " NULL as \"%s\",\n" - " 'libc' AS \"%s\",\n", - gettext_noop("ICU Locale"), + " 'libc' AS \"%s\",\n", gettext_noop("Locale Provider")); - appendPQExpBufferStr(&buf, " "); + appendPQExpBuffer(&buf, + " d.datcollate as \"%s\",\n" + " d.datctype as \"%s\",\n", + gettext_noop("Collate"), + gettext_noop("Ctype")); + if (pset.sversion >= 150000) + appendPQExpBuffer(&buf, + " d.daticulocale as \"%s\",\n", + gettext_noop("ICU Locale")); + else + appendPQExpBuffer(&buf, + " NULL as \"%s\",\n", + gettext_noop("ICU Locale")); + if (pset.sversion >= 160000) + appendPQExpBuffer(&buf, + " d.daticurules as \"%s\",\n", + gettext_noop("ICU Rules")); + else + appendPQExpBuffer(&buf, + " NULL as \"%s\",\n", + gettext_noop("ICU Rules")); + appendPQExpBufferStr(&buf, " "); printACLColumn(&buf, "d.datacl"); if (verbose) appendPQExpBuffer(&buf, - ",\n CASE WHEN pg_catalog.has_database_privilege(d.datname, 'CONNECT')\n" - " THEN pg_catalog.pg_size_pretty(pg_catalog.pg_database_size(d.datname))\n" - " ELSE 'No Access'\n" - " END as \"%s\"" - ",\n t.spcname as \"%s\"" - ",\n pg_catalog.shobj_description(d.oid, 'pg_database') as \"%s\"", + ",\n CASE WHEN pg_catalog.has_database_privilege(d.datname, 'CONNECT')\n" + " THEN pg_catalog.pg_size_pretty(pg_catalog.pg_database_size(d.datname))\n" + " ELSE 'No Access'\n" + " END as \"%s\"" + ",\n t.spcname as \"%s\"" + ",\n pg_catalog.shobj_description(d.oid, 'pg_database') as \"%s\"", gettext_noop("Size"), gettext_noop("Tablespace"), gettext_noop("Description")); @@ -4849,52 +4863,64 @@ listCollations(const char *pattern, bool verbose, bool showSystem) PQExpBufferData buf; PGresult *res; printQueryOpt myopt = pset.popt; - static const bool translate_columns[] = {false, false, false, false, false, false, true, false}; + static const bool translate_columns[] = {false, false, false, false, false, false, false, true, false}; initPQExpBuffer(&buf); printfPQExpBuffer(&buf, - "SELECT n.nspname AS \"%s\",\n" - " c.collname AS \"%s\",\n" - " c.collcollate AS \"%s\",\n" - " c.collctype AS \"%s\"", + "SELECT\n" + " n.nspname AS \"%s\",\n" + " c.collname AS \"%s\",\n", gettext_noop("Schema"), - gettext_noop("Name"), + gettext_noop("Name")); + + if (pset.sversion >= 100000) + appendPQExpBuffer(&buf, + " CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n", + gettext_noop("Provider")); + else + appendPQExpBuffer(&buf, + " 'libc' AS \"%s\",\n", + gettext_noop("Provider")); + + appendPQExpBuffer(&buf, + " c.collcollate AS \"%s\",\n" + " c.collctype AS \"%s\",\n", gettext_noop("Collate"), gettext_noop("Ctype")); if (pset.sversion >= 150000) appendPQExpBuffer(&buf, - ",\n c.colliculocale AS \"%s\"", + " c.colliculocale AS \"%s\",\n", gettext_noop("ICU Locale")); else appendPQExpBuffer(&buf, - ",\n c.collcollate AS \"%s\"", + " c.collcollate AS \"%s\",\n", gettext_noop("ICU Locale")); - if (pset.sversion >= 100000) + if (pset.sversion >= 160000) appendPQExpBuffer(&buf, - ",\n CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\"", - gettext_noop("Provider")); + " c.collicurules AS \"%s\",\n", + gettext_noop("ICU Rules")); else appendPQExpBuffer(&buf, - ",\n 'libc' AS \"%s\"", - gettext_noop("Provider")); + " NULL AS \"%s\",\n", + gettext_noop("ICU Rules")); if (pset.sversion >= 120000) appendPQExpBuffer(&buf, - ",\n CASE WHEN c.collisdeterministic THEN '%s' ELSE '%s' END AS \"%s\"", + " CASE WHEN c.collisdeterministic THEN '%s' ELSE '%s' END AS \"%s\"", gettext_noop("yes"), gettext_noop("no"), gettext_noop("Deterministic?")); else appendPQExpBuffer(&buf, - ",\n '%s' AS \"%s\"", + " '%s' AS \"%s\"", gettext_noop("yes"), gettext_noop("Deterministic?")); if (verbose) appendPQExpBuffer(&buf, - ",\n pg_catalog.obj_description(c.oid, 'pg_collation') AS \"%s\"", + ",\n pg_catalog.obj_description(c.oid, 'pg_collation') AS \"%s\"", gettext_noop("Description")); appendPQExpBufferStr(&buf, diff --git a/src/bin/scripts/createdb.c b/src/bin/scripts/createdb.c index 80859dadc4..b4205c4fa5 100644 --- a/src/bin/scripts/createdb.c +++ b/src/bin/scripts/createdb.c @@ -41,6 +41,7 @@ main(int argc, char *argv[]) {"maintenance-db", required_argument, NULL, 3}, {"locale-provider", required_argument, NULL, 4}, {"icu-locale", required_argument, NULL, 5}, + {"icu-rules", required_argument, NULL, 6}, {NULL, 0, NULL, 0} }; @@ -67,6 +68,7 @@ main(int argc, char *argv[]) char *locale = NULL; char *locale_provider = NULL; char *icu_locale = NULL; + char *icu_rules = NULL; PQExpBufferData sql; @@ -134,6 +136,9 @@ main(int argc, char *argv[]) case 5: icu_locale = pg_strdup(optarg); break; + case 6: + icu_rules = pg_strdup(optarg); + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -231,6 +236,11 @@ main(int argc, char *argv[]) appendPQExpBufferStr(&sql, " ICU_LOCALE "); appendStringLiteralConn(&sql, icu_locale, conn); } + if (icu_rules) + { + appendPQExpBufferStr(&sql, " ICU_RULES "); + appendStringLiteralConn(&sql, icu_rules, conn); + } appendPQExpBufferChar(&sql, ';'); @@ -288,6 +298,7 @@ help(const char *progname) printf(_(" --lc-collate=LOCALE LC_COLLATE setting for the database\n")); printf(_(" --lc-ctype=LOCALE LC_CTYPE setting for the database\n")); printf(_(" --icu-locale=LOCALE ICU locale setting for the database\n")); + printf(_(" --icu-rules=RULES ICU rules setting for the database\n")); printf(_(" --locale-provider={libc|icu}\n" " locale provider for the database's default collation\n")); printf(_(" -O, --owner=OWNER database user to own the new database\n")); diff --git a/src/include/catalog/pg_collation.h b/src/include/catalog/pg_collation.h index f9d5e88faf..bfa3568451 100644 --- a/src/include/catalog/pg_collation.h +++ b/src/include/catalog/pg_collation.h @@ -43,6 +43,7 @@ CATALOG(pg_collation,3456,CollationRelationId) text collcollate BKI_DEFAULT(_null_); /* LC_COLLATE setting */ text collctype BKI_DEFAULT(_null_); /* LC_CTYPE setting */ text colliculocale BKI_DEFAULT(_null_); /* ICU locale ID */ + text collicurules BKI_DEFAULT(_null_); /* ICU collation rules */ text collversion BKI_DEFAULT(_null_); /* provider-dependent * version of collation * data */ @@ -91,6 +92,7 @@ extern Oid CollationCreate(const char *collname, Oid collnamespace, int32 collencoding, const char *collcollate, const char *collctype, const char *colliculocale, + const char *collicurules, const char *collversion, bool if_not_exists, bool quiet); diff --git a/src/include/catalog/pg_database.dat b/src/include/catalog/pg_database.dat index 4b1bc29363..68dcac1a6e 100644 --- a/src/include/catalog/pg_database.dat +++ b/src/include/catalog/pg_database.dat @@ -18,6 +18,6 @@ datlocprovider => 'LOCALE_PROVIDER', datistemplate => 't', datallowconn => 't', datconnlimit => '-1', datfrozenxid => '0', datminmxid => '1', dattablespace => 'pg_default', datcollate => 'LC_COLLATE', - datctype => 'LC_CTYPE', daticulocale => 'ICU_LOCALE', datacl => '_null_' }, + datctype => 'LC_CTYPE', daticulocale => 'ICU_LOCALE', daticurules => 'ICU_RULES', datacl => '_null_' }, ] diff --git a/src/include/catalog/pg_database.h b/src/include/catalog/pg_database.h index 3da3492e92..d004f4dc8a 100644 --- a/src/include/catalog/pg_database.h +++ b/src/include/catalog/pg_database.h @@ -71,6 +71,9 @@ CATALOG(pg_database,1262,DatabaseRelationId) BKI_SHARED_RELATION BKI_ROWTYPE_OID /* ICU locale ID */ text daticulocale; + /* ICU collation rules */ + text daticurules; + /* provider-dependent version of collation data */ text datcollversion BKI_DEFAULT(_null_); diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index b8f22875a8..f9ce428233 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -95,6 +95,7 @@ typedef struct pg_locale_struct *pg_locale_t; extern PGDLLIMPORT struct pg_locale_struct default_locale; extern void make_icu_collator(const char *iculocstr, + const char *icurules, struct pg_locale_struct *resultp); extern bool pg_locale_deterministic(pg_locale_t locale); diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index 4354dc07b8..2ac707b362 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -1190,6 +1190,36 @@ SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE tes t | t (1 row) +-- rules +CREATE COLLATION testcoll_rules1 (provider = icu, locale = '', rules = '&a < g'); +CREATE TABLE test7 (a text); +-- example from https://unicode-org.github.io/icu/userguide/collation/customization/#syntax +INSERT INTO test7 VALUES ('Abernathy'), ('apple'), ('bird'), ('Boston'), ('Graham'), ('green'); +SELECT * FROM test7 ORDER BY a COLLATE "en-x-icu"; + a +----------- + Abernathy + apple + bird + Boston + Graham + green +(6 rows) + +SELECT * FROM test7 ORDER BY a COLLATE testcoll_rules1; + a +----------- + Abernathy + apple + green + bird + Boston + Graham +(6 rows) + +DROP TABLE test7; +CREATE COLLATION testcoll_rulesx (provider = icu, locale = '', rules = '!!wrong!!'); +ERROR: could not open collator for locale "" with rules "!!wrong!!": U_INVALID_FORMAT_ERROR -- nondeterministic collations CREATE COLLATION ctest_det (provider = icu, locale = '', deterministic = true); CREATE COLLATION ctest_nondet (provider = icu, locale = '', deterministic = false); diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out index b75a74d294..c00e28361c 100644 --- a/src/test/regress/expected/psql.out +++ b/src/test/regress/expected/psql.out @@ -6199,9 +6199,9 @@ List of schemas (0 rows) \dO "no.such.collation" - List of collations - Schema | Name | Collate | Ctype | ICU Locale | Provider | Deterministic? ---------+------+---------+-------+------------+----------+---------------- + List of collations + Schema | Name | Provider | Collate | Ctype | ICU Locale | ICU Rules | Deterministic? +--------+------+----------+---------+-------+------------+-----------+---------------- (0 rows) \dp "no.such.access.privilege" @@ -6388,9 +6388,9 @@ cross-database references are not implemented: "no.such.schema"."no.such.languag (0 rows) \dO "no.such.schema"."no.such.collation" - List of collations - Schema | Name | Collate | Ctype | ICU Locale | Provider | Deterministic? ---------+------+---------+-------+------------+----------+---------------- + List of collations + Schema | Name | Provider | Collate | Ctype | ICU Locale | ICU Rules | Deterministic? +--------+------+----------+---------+-------+------------+-----------+---------------- (0 rows) \dp "no.such.schema"."no.such.access.privilege" @@ -6531,9 +6531,9 @@ List of text search templates (0 rows) \dO regression."no.such.schema"."no.such.collation" - List of collations - Schema | Name | Collate | Ctype | ICU Locale | Provider | Deterministic? ---------+------+---------+-------+------------+----------+---------------- + List of collations + Schema | Name | Provider | Collate | Ctype | ICU Locale | ICU Rules | Deterministic? +--------+------+----------+---------+-------+------------+-----------+---------------- (0 rows) \dp regression."no.such.schema"."no.such.access.privilege" diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index b0ddc7db44..aa95c1ec42 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -472,6 +472,19 @@ CREATE COLLATION testcoll_de_phonebook (provider = icu, locale = 'de@collation=p SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE testcoll_de_phonebook; +-- rules + +CREATE COLLATION testcoll_rules1 (provider = icu, locale = '', rules = '&a < g'); +CREATE TABLE test7 (a text); +-- example from https://unicode-org.github.io/icu/userguide/collation/customization/#syntax +INSERT INTO test7 VALUES ('Abernathy'), ('apple'), ('bird'), ('Boston'), ('Graham'), ('green'); +SELECT * FROM test7 ORDER BY a COLLATE "en-x-icu"; +SELECT * FROM test7 ORDER BY a COLLATE testcoll_rules1; +DROP TABLE test7; + +CREATE COLLATION testcoll_rulesx (provider = icu, locale = '', rules = '!!wrong!!'); + + -- nondeterministic collations CREATE COLLATION ctest_det (provider = icu, locale = '', deterministic = true); base-commit: b6a0d469cae4410a05b5e109748278065a931b68 -- 2.39.2