From 4028980f6be3662c0302575ed92de77e941e5a9e Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 2 Feb 2022 13:54:04 +0100 Subject: [PATCH v4] Add option to use ICU as global collation provider This adds the option to use ICU as the default collation provider for either the whole cluster or a database. New options for initdb, createdb, and CREATE DATABASE are used to select this. Discussion: https://www.postgresql.org/message-id/flat/5e756dd6-0e91-d778-96fd-b1bcb06c161a%402ndquadrant.com --- doc/src/sgml/catalogs.sgml | 9 + doc/src/sgml/ref/create_database.sgml | 16 ++ doc/src/sgml/ref/createdb.sgml | 9 + doc/src/sgml/ref/initdb.sgml | 23 ++ src/backend/catalog/pg_collation.c | 18 +- src/backend/commands/collationcmds.c | 93 ++++--- src/backend/commands/dbcommands.c | 72 +++++- src/backend/utils/adt/pg_locale.c | 242 +++++++++++------- src/backend/utils/init/postinit.c | 26 ++ src/bin/initdb/Makefile | 2 + src/bin/initdb/initdb.c | 64 ++++- src/bin/initdb/t/001_initdb.pl | 18 +- src/bin/pg_dump/pg_dump.c | 19 ++ src/bin/pg_upgrade/check.c | 10 + src/bin/pg_upgrade/info.c | 18 +- src/bin/pg_upgrade/pg_upgrade.h | 2 + src/bin/psql/describe.c | 23 +- src/bin/psql/tab-complete.c | 2 +- src/bin/scripts/Makefile | 2 + src/bin/scripts/createdb.c | 9 + src/bin/scripts/t/020_createdb.pl | 20 +- src/include/catalog/pg_collation.dat | 3 +- src/include/catalog/pg_collation.h | 6 +- src/include/catalog/pg_database.dat | 4 +- src/include/catalog/pg_database.h | 6 + src/include/utils/pg_locale.h | 6 + .../regress/expected/collate.icu.utf8.out | 10 +- src/test/regress/sql/collate.icu.utf8.sql | 8 +- 28 files changed, 572 insertions(+), 168 deletions(-) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 7d5b0b1656..5a5779b9a3 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -2384,6 +2384,15 @@ <structname>pg_collation</structname> Columns + + + collicucoll text + + + ICU collation string + + + collversion text diff --git a/doc/src/sgml/ref/create_database.sgml b/doc/src/sgml/ref/create_database.sgml index f22e28dc81..403010cddf 100644 --- a/doc/src/sgml/ref/create_database.sgml +++ b/doc/src/sgml/ref/create_database.sgml @@ -28,6 +28,7 @@ [ LOCALE [=] locale ] [ LC_COLLATE [=] lc_collate ] [ LC_CTYPE [=] lc_ctype ] + [ COLLATION_PROVIDER [=] collation_provider ] [ TABLESPACE [=] tablespace_name ] [ ALLOW_CONNECTIONS [=] allowconn ] [ CONNECTION LIMIT [=] connlimit ] @@ -158,6 +159,21 @@ Parameters + + + collation_provider + + + + Specifies the provider to use for the default collation in this + database. Possible values are: + icu,ICU + libc. libc is the default. The + available choices depend on the operating system and build options. + + + + tablespace_name diff --git a/doc/src/sgml/ref/createdb.sgml b/doc/src/sgml/ref/createdb.sgml index 86473455c9..4b07363fcc 100644 --- a/doc/src/sgml/ref/createdb.sgml +++ b/doc/src/sgml/ref/createdb.sgml @@ -83,6 +83,15 @@ Options + + + + + Specifies the collation provider for the database's default collation. + + + + diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml index 8f71c7c962..77618d9a7a 100644 --- a/doc/src/sgml/ref/initdb.sgml +++ b/doc/src/sgml/ref/initdb.sgml @@ -166,6 +166,18 @@ Options + + + + + This option sets the collation provider for databases created in the + new cluster. It can be overridden in the CREATE + DATABASE command when new databases are subsequently + created. The default is libc. + + + + @@ -210,6 +222,17 @@ Options + + + + + Specifies the ICU locale if the ICU collation provider is used. If + this is not specified, the value from the + option is used. + + + + diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c index bfc02d3038..5596b9be5a 100644 --- a/src/backend/catalog/pg_collation.c +++ b/src/backend/catalog/pg_collation.c @@ -49,6 +49,7 @@ CollationCreate(const char *collname, Oid collnamespace, bool collisdeterministic, int32 collencoding, const char *collcollate, const char *collctype, + const char *collicucoll, const char *collversion, bool if_not_exists, bool quiet) @@ -66,8 +67,7 @@ CollationCreate(const char *collname, Oid collnamespace, AssertArg(collname); AssertArg(collnamespace); AssertArg(collowner); - AssertArg(collcollate); - AssertArg(collctype); + AssertArg((collcollate && collctype) || collicucoll); /* * Make sure there is no existing collation of same name & encoding. @@ -161,8 +161,18 @@ CollationCreate(const char *collname, Oid collnamespace, values[Anum_pg_collation_collprovider - 1] = CharGetDatum(collprovider); values[Anum_pg_collation_collisdeterministic - 1] = BoolGetDatum(collisdeterministic); values[Anum_pg_collation_collencoding - 1] = Int32GetDatum(collencoding); - values[Anum_pg_collation_collcollate - 1] = CStringGetTextDatum(collcollate); - values[Anum_pg_collation_collctype - 1] = CStringGetTextDatum(collctype); + if (collcollate) + values[Anum_pg_collation_collcollate - 1] = CStringGetTextDatum(collcollate); + else + nulls[Anum_pg_collation_collcollate - 1] = true; + if (collctype) + values[Anum_pg_collation_collctype - 1] = CStringGetTextDatum(collctype); + else + nulls[Anum_pg_collation_collctype - 1] = true; + if (collicucoll) + values[Anum_pg_collation_collicucoll - 1] = CStringGetTextDatum(collicucoll); + else + nulls[Anum_pg_collation_collicucoll - 1] = true; if (collversion) values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(collversion); else diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index 12fc2316f9..a9b50f5d2b 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -65,6 +65,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e DefElem *versionEl = NULL; char *collcollate = NULL; char *collctype = NULL; + char *collicucoll = NULL; char *collproviderstr = NULL; bool collisdeterministic = true; int collencoding = 0; @@ -153,6 +154,12 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e else collctype = NULL; + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicucoll, &isnull); + if (!isnull) + collicucoll = TextDatumGetCString(datum); + else + collicucoll = NULL; + ReleaseSysCache(tp); /* @@ -168,18 +175,6 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e errmsg("collation \"default\" cannot be copied"))); } - if (localeEl) - { - collcollate = defGetString(localeEl); - collctype = defGetString(localeEl); - } - - if (lccollateEl) - collcollate = defGetString(lccollateEl); - - if (lcctypeEl) - collctype = defGetString(lcctypeEl); - if (providerEl) collproviderstr = defGetString(providerEl); @@ -204,15 +199,43 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e else if (!fromEl) collprovider = COLLPROVIDER_LIBC; - if (!collcollate) - ereport(ERROR, - (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), - errmsg("parameter \"lc_collate\" must be specified"))); + if (localeEl) + { + if (collprovider == COLLPROVIDER_LIBC) + { + collcollate = defGetString(localeEl); + collctype = defGetString(localeEl); + } + else + collicucoll = defGetString(localeEl); + } - if (!collctype) - ereport(ERROR, - (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), - errmsg("parameter \"lc_ctype\" must be specified"))); + if (lccollateEl) + collcollate = defGetString(lccollateEl); + + if (lcctypeEl) + collctype = defGetString(lcctypeEl); + + if (collprovider == COLLPROVIDER_LIBC) + { + if (!collcollate) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("parameter \"lc_collate\" must be specified"))); + + if (!collctype) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("parameter \"lc_ctype\" must be specified"))); + } + + if (collprovider == COLLPROVIDER_ICU) + { + if (!collicucoll) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("parameter \"locale\" must be specified"))); + } /* * Nondeterministic collations are currently only supported with ICU @@ -255,7 +278,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e } if (!collversion) - collversion = get_collation_actual_version(collprovider, collcollate); + collversion = get_collation_actual_version(collprovider, collprovider == COLLPROVIDER_ICU ? collicucoll : collcollate); newoid = CollationCreate(collName, collNamespace, @@ -265,6 +288,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e collencoding, collcollate, collctype, + collicucoll, collversion, if_not_exists, false); /* not quiet */ @@ -347,7 +371,7 @@ AlterCollation(AlterCollationStmt *stmt) datum = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion, &isnull); oldversion = isnull ? NULL : TextDatumGetCString(datum); - datum = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collcollate, &isnull); + datum = SysCacheGetAttr(COLLOID, tup, collForm->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_collicucoll : Anum_pg_collation_collcollate, &isnull); Assert(!isnull); newversion = get_collation_actual_version(collForm->collprovider, TextDatumGetCString(datum)); @@ -409,9 +433,14 @@ pg_collation_actual_version(PG_FUNCTION_ARGS) collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider; - datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collcollate, &isnull); - Assert(!isnull); - version = get_collation_actual_version(collprovider, TextDatumGetCString(datum)); + if (collprovider != COLLPROVIDER_DEFAULT) + { + datum = SysCacheGetAttr(COLLOID, tp, collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_collicucoll : Anum_pg_collation_collcollate, &isnull); + Assert(!isnull); + version = get_collation_actual_version(collprovider, TextDatumGetCString(datum)); + } + else + version = NULL; ReleaseSysCache(tp); @@ -638,7 +667,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) */ collid = CollationCreate(localebuf, nspid, GetUserId(), COLLPROVIDER_LIBC, true, enc, - localebuf, localebuf, + localebuf, localebuf, NULL, get_collation_actual_version(COLLPROVIDER_LIBC, localebuf), true, true); if (OidIsValid(collid)) @@ -699,7 +728,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) collid = CollationCreate(alias, nspid, GetUserId(), COLLPROVIDER_LIBC, true, enc, - locale, locale, + locale, locale, NULL, get_collation_actual_version(COLLPROVIDER_LIBC, locale), true, true); if (OidIsValid(collid)) @@ -740,7 +769,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) const char *name; char *langtag; char *icucomment; - const char *collcollate; + const char *icucollstr; Oid collid; if (i == -1) @@ -749,20 +778,20 @@ pg_import_system_collations(PG_FUNCTION_ARGS) name = uloc_getAvailable(i); langtag = get_icu_language_tag(name); - collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name; + icucollstr = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name; /* * Be paranoid about not allowing any non-ASCII strings into * pg_collation */ - if (!pg_is_ascii(langtag) || !pg_is_ascii(collcollate)) + if (!pg_is_ascii(langtag) || !pg_is_ascii(icucollstr)) continue; collid = CollationCreate(psprintf("%s-x-icu", langtag), nspid, GetUserId(), COLLPROVIDER_ICU, true, -1, - collcollate, collcollate, - get_collation_actual_version(COLLPROVIDER_ICU, collcollate), + NULL, NULL, icucollstr, + get_collation_actual_version(COLLPROVIDER_ICU, icucollstr), true, true); if (OidIsValid(collid)) { diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index e673138cbd..df7087cd7b 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -36,6 +36,7 @@ #include "catalog/indexing.h" #include "catalog/objectaccess.h" #include "catalog/pg_authid.h" +#include "catalog/pg_collation.h" #include "catalog/pg_database.h" #include "catalog/pg_db_role_setting.h" #include "catalog/pg_subscription.h" @@ -85,7 +86,8 @@ static bool get_db_info(const char *name, LOCKMODE lockmode, Oid *dbIdP, Oid *ownerIdP, int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP, TransactionId *dbFrozenXidP, MultiXactId *dbMinMultiP, - Oid *dbTablespace, char **dbCollate, char **dbCtype); + Oid *dbTablespace, char **dbCollate, char **dbCtype, char **dbIcucoll, + char *dbCollProvider); static bool have_createdb_privilege(void); static void remove_dbtablespaces(Oid db_id); static bool check_db_file_conflict(Oid db_id); @@ -105,6 +107,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) int src_encoding = -1; char *src_collate = NULL; char *src_ctype = NULL; + char *src_icucoll = NULL; + char src_collprovider; bool src_istemplate; bool src_allowconn; TransactionId src_frozenxid = InvalidTransactionId; @@ -125,6 +129,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) DefElem *dlocale = NULL; DefElem *dcollate = NULL; DefElem *dctype = NULL; + DefElem *dcollprovider = NULL; DefElem *distemplate = NULL; DefElem *dallowconnections = NULL; DefElem *dconnlimit = NULL; @@ -133,6 +138,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) const char *dbtemplate = NULL; char *dbcollate = NULL; char *dbctype = NULL; + char *dbicucoll = NULL; + char dbcollprovider = '\0'; char *canonname; int encoding = -1; bool dbistemplate = false; @@ -189,6 +196,15 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) errorConflictingDefElem(defel, pstate); dctype = defel; } + else if (strcmp(defel->defname, "collation_provider") == 0) + { + if (dcollprovider) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"), + parser_errposition(pstate, defel->location))); + dcollprovider = defel; + } else if (strcmp(defel->defname, "is_template") == 0) { if (distemplate) @@ -246,12 +262,6 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) parser_errposition(pstate, defel->location))); } - if (dlocale && (dcollate || dctype)) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("conflicting or redundant options"), - errdetail("LOCALE cannot be specified together with LC_COLLATE or LC_CTYPE."))); - if (downer && downer->arg) dbowner = defGetString(downer); if (dtemplate && dtemplate->arg) @@ -288,11 +298,29 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) { dbcollate = defGetString(dlocale); dbctype = defGetString(dlocale); + dbicucoll = defGetString(dlocale); } if (dcollate && dcollate->arg) dbcollate = defGetString(dcollate); if (dctype && dctype->arg) dbctype = defGetString(dctype); + if (dcollprovider && dcollprovider->arg) + { + char *collproviderstr = defGetString(dcollprovider); + +#ifdef USE_ICU + if (pg_strcasecmp(collproviderstr, "icu") == 0) + dbcollprovider = COLLPROVIDER_ICU; + else +#endif + if (pg_strcasecmp(collproviderstr, "libc") == 0) + dbcollprovider = COLLPROVIDER_LIBC; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("unrecognized collation provider: %s", + collproviderstr))); + } if (distemplate && distemplate->arg) dbistemplate = defGetBoolean(distemplate); if (dallowconnections && dallowconnections->arg) @@ -342,7 +370,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) &src_dboid, &src_owner, &src_encoding, &src_istemplate, &src_allowconn, &src_frozenxid, &src_minmxid, &src_deftablespace, - &src_collate, &src_ctype)) + &src_collate, &src_ctype, &src_icucoll, &src_collprovider)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("template database \"%s\" does not exist", @@ -368,6 +396,10 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) dbcollate = src_collate; if (dbctype == NULL) dbctype = src_ctype; + if (dbicucoll == NULL) + dbicucoll = src_icucoll; + if (dbcollprovider == '\0') + dbcollprovider = src_collprovider; /* Some encodings are client only */ if (!PG_VALID_BE_ENCODING(encoding)) @@ -570,6 +602,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) DirectFunctionCall1(namein, CStringGetDatum(dbname)); new_record[Anum_pg_database_datdba - 1] = ObjectIdGetDatum(datdba); new_record[Anum_pg_database_encoding - 1] = Int32GetDatum(encoding); + new_record[Anum_pg_database_datcollprovider - 1] = CharGetDatum(dbcollprovider); new_record[Anum_pg_database_datistemplate - 1] = BoolGetDatum(dbistemplate); new_record[Anum_pg_database_datallowconn - 1] = BoolGetDatum(dballowconnections); new_record[Anum_pg_database_datconnlimit - 1] = Int32GetDatum(dbconnlimit); @@ -578,6 +611,10 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) new_record[Anum_pg_database_dattablespace - 1] = ObjectIdGetDatum(dst_deftablespace); new_record[Anum_pg_database_datcollate - 1] = CStringGetTextDatum(dbcollate); new_record[Anum_pg_database_datctype - 1] = CStringGetTextDatum(dbctype); + if (dbicucoll) + new_record[Anum_pg_database_daticucoll - 1] = CStringGetTextDatum(dbicucoll); + else + new_record_nulls[Anum_pg_database_daticucoll] = true; /* * We deliberately set datacl to default (NULL), rather than copying it @@ -844,7 +881,7 @@ dropdb(const char *dbname, bool missing_ok, bool force) pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock); if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL, - &db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL)) + &db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) { if (!missing_ok) { @@ -1043,7 +1080,7 @@ RenameDatabase(const char *oldname, const char *newname) rel = table_open(DatabaseRelationId, RowExclusiveLock); if (!get_db_info(oldname, AccessExclusiveLock, &db_id, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL)) + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", oldname))); @@ -1156,7 +1193,7 @@ movedb(const char *dbname, const char *tblspcname) pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock); if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL, - NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL)) + NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL, NULL, NULL)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname))); @@ -1800,7 +1837,8 @@ get_db_info(const char *name, LOCKMODE lockmode, Oid *dbIdP, Oid *ownerIdP, int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP, TransactionId *dbFrozenXidP, MultiXactId *dbMinMultiP, - Oid *dbTablespace, char **dbCollate, char **dbCtype) + Oid *dbTablespace, char **dbCollate, char **dbCtype, char **dbIcucoll, + char *dbCollProvider) { bool result = false; Relation relation; @@ -1893,6 +1931,8 @@ get_db_info(const char *name, LOCKMODE lockmode, if (dbTablespace) *dbTablespace = dbform->dattablespace; /* default locale settings for this database */ + if (dbCollProvider) + *dbCollProvider = dbform->datcollprovider; if (dbCollate) { datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_datcollate, &isnull); @@ -1905,6 +1945,14 @@ get_db_info(const char *name, LOCKMODE lockmode, Assert(!isnull); *dbCtype = TextDatumGetCString(datum); } + if (dbIcucoll) + { + datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_daticucoll, &isnull); + if (isnull) + *dbIcucoll = NULL; + else + *dbIcucoll = TextDatumGetCString(datum); + } ReleaseSysCache(tuple); result = true; break; diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index aefa0818d0..0334f66a23 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1288,26 +1288,37 @@ lookup_collation_cache(Oid collation, bool set_flags) { /* Attempt to set the flags */ HeapTuple tp; - Datum datum; - bool isnull; - const char *collcollate; - const char *collctype; + Form_pg_collation collform; tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation)); if (!HeapTupleIsValid(tp)) elog(ERROR, "cache lookup failed for collation %u", collation); + collform = (Form_pg_collation) GETSTRUCT(tp); - datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collcollate, &isnull); - Assert(!isnull); - collcollate = TextDatumGetCString(datum); - datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collctype, &isnull); - Assert(!isnull); - collctype = TextDatumGetCString(datum); - - cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) || - (strcmp(collcollate, "POSIX") == 0)); - cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) || - (strcmp(collctype, "POSIX") == 0)); + if (collform->collprovider == COLLPROVIDER_LIBC) + { + Datum datum; + bool isnull; + const char *collcollate; + const char *collctype; + + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collcollate, &isnull); + Assert(!isnull); + collcollate = TextDatumGetCString(datum); + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collctype, &isnull); + Assert(!isnull); + collctype = TextDatumGetCString(datum); + + cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) || + (strcmp(collcollate, "POSIX") == 0)); + cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) || + (strcmp(collctype, "POSIX") == 0)); + } + else + { + cache_entry->collate_is_c = false; + cache_entry->ctype_is_c = false; + } cache_entry->flags_valid = true; @@ -1340,6 +1351,9 @@ lc_collate_is_c(Oid collation) static int result = -1; char *localeptr; + if (default_locale.provider == COLLPROVIDER_ICU) + return false; + if (result >= 0) return (bool) result; localeptr = setlocale(LC_COLLATE, NULL); @@ -1390,6 +1404,9 @@ lc_ctype_is_c(Oid collation) static int result = -1; char *localeptr; + if (default_locale.provider == COLLPROVIDER_ICU) + return false; + if (result >= 0) return (bool) result; localeptr = setlocale(LC_CTYPE, NULL); @@ -1418,6 +1435,87 @@ lc_ctype_is_c(Oid collation) return (lookup_collation_cache(collation, true))->ctype_is_c; } +struct pg_locale_struct default_locale; + +void +make_icu_collator(const char *icucollstr, + struct pg_locale_struct *resultp) +{ +#ifdef USE_ICU + UCollator *collator; + UErrorCode status; + + status = U_ZERO_ERROR; + collator = ucol_open(icucollstr, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not open collator for locale \"%s\": %s", + icucollstr, u_errorName(status)))); + + if (U_ICU_VERSION_MAJOR_NUM < 54) + icu_set_collation_attributes(collator, icucollstr); + + /* We will leak this string if we get an error below :-( */ + resultp->info.icu.locale = MemoryContextStrdup(TopMemoryContext, icucollstr); + resultp->info.icu.ucol = collator; +#else /* not USE_ICU */ + /* could get here if a collation was created by a build with ICU */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ICU is not supported in this build"), \ + errhint("You need to rebuild PostgreSQL using %s.", "--with-icu"))); +#endif /* not USE_ICU */ +} + +void +check_collation_version(HeapTuple colltuple) +{ + Form_pg_collation collform; + Datum datum; + bool isnull; + + collform = (Form_pg_collation) GETSTRUCT(colltuple); + + datum = SysCacheGetAttr(COLLOID, colltuple, Anum_pg_collation_collversion, + &isnull); + if (!isnull) + { + char *actual_versionstr; + char *collversionstr; + + collversionstr = TextDatumGetCString(datum); + + datum = SysCacheGetAttr(COLLOID, colltuple, collform->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_collicucoll : Anum_pg_collation_collcollate, &isnull); + Assert(!isnull); + + actual_versionstr = get_collation_actual_version(collform->collprovider, + TextDatumGetCString(datum)); + if (!actual_versionstr) + { + /* + * This could happen when specifying a version in CREATE + * COLLATION for a libc locale, or manually creating a mess in + * the catalogs. + */ + ereport(ERROR, + (errmsg("collation \"%s\" has no actual version, but a version was specified", + NameStr(collform->collname)))); + } + + if (strcmp(actual_versionstr, collversionstr) != 0) + ereport(WARNING, + (errmsg("collation \"%s\" has version mismatch", + NameStr(collform->collname)), + errdetail("The collation in the database was created using version %s, " + "but the operating system provides version %s.", + collversionstr, actual_versionstr), + errhint("Rebuild all objects affected by this collation and run " + "ALTER COLLATION %s REFRESH VERSION, " + "or build PostgreSQL with the right library version.", + quote_qualified_identifier(get_namespace_name(collform->collnamespace), + NameStr(collform->collname))))); + } +} /* simple subroutine for reporting errors from newlocale() */ #ifdef HAVE_LOCALE_T @@ -1475,7 +1573,12 @@ pg_newlocale_from_collation(Oid collid) Assert(OidIsValid(collid)); if (collid == DEFAULT_COLLATION_OID) - return (pg_locale_t) 0; + { + if (default_locale.provider == COLLPROVIDER_ICU) + return &default_locale; + else + return (pg_locale_t) 0; + } cache_entry = lookup_collation_cache(collid, false); @@ -1484,25 +1587,14 @@ pg_newlocale_from_collation(Oid collid) /* We haven't computed this yet in this session, so do it */ HeapTuple tp; Form_pg_collation collform; - const char *collcollate; - const char *collctype pg_attribute_unused(); struct pg_locale_struct result; pg_locale_t resultp; - Datum datum; - bool isnull; tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid)); if (!HeapTupleIsValid(tp)) elog(ERROR, "cache lookup failed for collation %u", collid); collform = (Form_pg_collation) GETSTRUCT(tp); - datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collcollate, &isnull); - Assert(!isnull); - collcollate = TextDatumGetCString(datum); - datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collctype, &isnull); - Assert(!isnull); - collctype = TextDatumGetCString(datum); - /* We'll fill in the result struct locally before allocating memory */ memset(&result, 0, sizeof(result)); result.provider = collform->collprovider; @@ -1511,8 +1603,19 @@ pg_newlocale_from_collation(Oid collid) if (collform->collprovider == COLLPROVIDER_LIBC) { #ifdef HAVE_LOCALE_T + Datum datum; + bool isnull; + const char *collcollate; + const char *collctype pg_attribute_unused(); locale_t loc; + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collcollate, &isnull); + Assert(!isnull); + collcollate = TextDatumGetCString(datum); + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collctype, &isnull); + Assert(!isnull); + collctype = TextDatumGetCString(datum); + if (strcmp(collcollate, collctype) == 0) { /* Normal case where they're the same */ @@ -1563,73 +1666,17 @@ pg_newlocale_from_collation(Oid collid) } else if (collform->collprovider == COLLPROVIDER_ICU) { -#ifdef USE_ICU - UCollator *collator; - UErrorCode status; - - if (strcmp(collcollate, collctype) != 0) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("collations with different collate and ctype values are not supported by ICU"))); - - status = U_ZERO_ERROR; - collator = ucol_open(collcollate, &status); - if (U_FAILURE(status)) - ereport(ERROR, - (errmsg("could not open collator for locale \"%s\": %s", - collcollate, u_errorName(status)))); - - if (U_ICU_VERSION_MAJOR_NUM < 54) - icu_set_collation_attributes(collator, collcollate); - - /* We will leak this string if we get an error below :-( */ - result.info.icu.locale = MemoryContextStrdup(TopMemoryContext, - collcollate); - result.info.icu.ucol = collator; -#else /* not USE_ICU */ - /* could get here if a collation was created by a build with ICU */ - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("ICU is not supported in this build"), \ - errhint("You need to rebuild PostgreSQL using %s.", "--with-icu"))); -#endif /* not USE_ICU */ + Datum datum; + bool isnull; + const char *icucollstr;; + + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicucoll, &isnull); + Assert(!isnull); + icucollstr = TextDatumGetCString(datum); + make_icu_collator(icucollstr, &result); } - datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion, - &isnull); - if (!isnull) - { - char *actual_versionstr; - char *collversionstr; - - collversionstr = TextDatumGetCString(datum); - - actual_versionstr = get_collation_actual_version(collform->collprovider, collcollate); - if (!actual_versionstr) - { - /* - * This could happen when specifying a version in CREATE - * COLLATION for a libc locale, or manually creating a mess in - * the catalogs. - */ - ereport(ERROR, - (errmsg("collation \"%s\" has no actual version, but a version was specified", - NameStr(collform->collname)))); - } - - if (strcmp(actual_versionstr, collversionstr) != 0) - ereport(WARNING, - (errmsg("collation \"%s\" has version mismatch", - NameStr(collform->collname)), - errdetail("The collation in the database was created using version %s, " - "but the operating system provides version %s.", - collversionstr, actual_versionstr), - errhint("Rebuild all objects affected by this collation and run " - "ALTER COLLATION %s REFRESH VERSION, " - "or build PostgreSQL with the right library version.", - quote_qualified_identifier(get_namespace_name(collform->collnamespace), - NameStr(collform->collname))))); - } + check_collation_version(tp); ReleaseSysCache(tp); @@ -1652,6 +1699,17 @@ get_collation_actual_version(char collprovider, const char *collcollate) { char *collversion = NULL; + if (collprovider == COLLPROVIDER_DEFAULT) + { +#ifdef USE_ICU + if (default_locale.provider == COLLPROVIDER_ICU) + collversion = get_collation_actual_version(default_locale.provider, + default_locale.info.icu.locale); + else +#endif + collversion = NULL; + } + else #ifdef USE_ICU if (collprovider == COLLPROVIDER_ICU) { diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 5b9ed2f6f5..ffe60673d5 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -31,6 +31,7 @@ #include "catalog/catalog.h" #include "catalog/namespace.h" #include "catalog/pg_authid.h" +#include "catalog/pg_collation.h" #include "catalog/pg_database.h" #include "catalog/pg_db_role_setting.h" #include "catalog/pg_tablespace.h" @@ -414,6 +415,31 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect " which is not recognized by setlocale().", ctype), errhint("Recreate the database with another locale or install the missing locale."))); + if (dbform->datcollprovider == COLLPROVIDER_ICU) + { + datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticucoll, &isnull); + Assert(!isnull); + make_icu_collator(TextDatumGetCString(datum), &default_locale); + } + + default_locale.provider = dbform->datcollprovider; + /* + * Default locale is currently always deterministic. Nondeterministic + * locales currently don't support pattern matching, which would break a + * lot of things if applied globally. + */ + default_locale.deterministic = true; + + { + HeapTuple tp; + + tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(DEFAULT_COLLATION_OID)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for collation %u", DEFAULT_COLLATION_OID); + check_collation_version(tp); + ReleaseSysCache(tp); + } + /* Make the locale settings visible as GUC variables, too */ SetConfigOption("lc_collate", collate, PGC_INTERNAL, PGC_S_OVERRIDE); SetConfigOption("lc_ctype", ctype, PGC_INTERNAL, PGC_S_OVERRIDE); diff --git a/src/bin/initdb/Makefile b/src/bin/initdb/Makefile index eba282267a..b0dd13dfbd 100644 --- a/src/bin/initdb/Makefile +++ b/src/bin/initdb/Makefile @@ -62,6 +62,8 @@ clean distclean maintainer-clean: # ensure that changes in datadir propagate into object file initdb.o: initdb.c $(top_builddir)/src/Makefile.global +export with_icu + check: $(prove_check) diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index d78e8e67b8..64880038ca 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -132,6 +132,8 @@ static char *lc_monetary = NULL; static char *lc_numeric = NULL; static char *lc_time = NULL; static char *lc_messages = NULL; +static char collation_provider[] = {COLLPROVIDER_LIBC, '\0'}; +static char *icu_locale = NULL; static const char *default_text_search_config = NULL; static char *username = NULL; static bool pwprompt = false; @@ -1405,6 +1407,12 @@ bootstrap_template1(void) bki_lines = replace_token(bki_lines, "LC_CTYPE", escape_quotes_bki(lc_ctype)); + bki_lines = replace_token(bki_lines, "ICUCOLL", + escape_quotes_bki(collation_provider[0] == COLLPROVIDER_ICU ? icu_locale : "_null_")); + + bki_lines = replace_token(bki_lines, "COLLPROVIDER", + collation_provider); + /* Also ensure backend isn't confused by this environment var: */ unsetenv("PGCLIENTENCODING"); @@ -1587,6 +1595,12 @@ setup_description(FILE *cmdfd) static void setup_collation(FILE *cmdfd) { + /* + * Set version of the default collation. + */ + PG_CMD_PRINTF("UPDATE pg_collation SET collversion = pg_collation_actual_version(oid) WHERE oid = %d;\n\n", + DEFAULT_COLLATION_OID); + /* * Add an SQL-standard name. We don't want to pin this, so it doesn't go * in pg_collation.h. But add it before reading system collations, so @@ -1854,9 +1868,6 @@ make_template0(FILE *cmdfd) * the new cluster should be the result of a fresh initdb.) */ static const char *const template0_setup[] = { - "CREATE DATABASE template0 IS_TEMPLATE = true ALLOW_CONNECTIONS = false OID = " - CppAsString2(Template0ObjectId) ";\n\n", - /* * Explicitly revoke public create-schema and create-temp-table * privileges in template1 and template0; else the latter would be on @@ -1874,6 +1885,10 @@ make_template0(FILE *cmdfd) NULL }; + PG_CMD_PRINTF("CREATE DATABASE template0 IS_TEMPLATE = true ALLOW_CONNECTIONS = false OID = " + CppAsString2(Template0ObjectId) " COLLATION_PROVIDER = %s;\n\n", + collation_provider[0] == COLLPROVIDER_ICU ? "icu" : "libc"); + for (line = template0_setup; *line; line++) PG_CMD_PUTS(*line); } @@ -2147,13 +2162,14 @@ setlocales(void) lc_monetary = locale; if (!lc_messages) lc_messages = locale; + if (!icu_locale) + icu_locale = locale; } /* * canonicalize locale names, and obtain any missing values from our * current environment */ - check_locale_name(LC_CTYPE, lc_ctype, &canonname); lc_ctype = canonname; check_locale_name(LC_COLLATE, lc_collate, &canonname); @@ -2172,6 +2188,18 @@ setlocales(void) check_locale_name(LC_CTYPE, lc_messages, &canonname); lc_messages = canonname; #endif + + /* + * If ICU is selected but no ICU locale has been given, take the + * lc_collate locale and chop off any encoding suffix. This should give + * the user a configuration that resembles their operating system's locale + * setup. + */ + if (collation_provider[0] == COLLPROVIDER_ICU && !icu_locale) + { + icu_locale = pg_strdup(lc_collate); + icu_locale[strcspn(icu_locale, ".")] = '\0'; + } } /* @@ -2187,9 +2215,12 @@ usage(const char *progname) printf(_(" -A, --auth=METHOD default authentication method for local connections\n")); printf(_(" --auth-host=METHOD default authentication method for local TCP/IP connections\n")); printf(_(" --auth-local=METHOD default authentication method for local-socket connections\n")); + printf(_(" --collation-provider={libc|icu}\n" + " set default collation provider for new databases\n")); printf(_(" [-D, --pgdata=]DATADIR location for this database cluster\n")); printf(_(" -E, --encoding=ENCODING set default encoding for new databases\n")); printf(_(" -g, --allow-group-access allow group read/execute on data directory\n")); + printf(_(" --icu-locale set ICU locale for new databases\n")); printf(_(" -k, --data-checksums use data page checksums\n")); printf(_(" --locale=LOCALE set default locale for new databases\n")); printf(_(" --lc-collate=, --lc-ctype=, --lc-messages=LOCALE\n" @@ -2364,7 +2395,8 @@ setup_locale_encoding(void) strcmp(lc_ctype, lc_time) == 0 && strcmp(lc_ctype, lc_numeric) == 0 && strcmp(lc_ctype, lc_monetary) == 0 && - strcmp(lc_ctype, lc_messages) == 0) + strcmp(lc_ctype, lc_messages) == 0 && + (!icu_locale || strcmp(lc_ctype, icu_locale) == 0)) printf(_("The database cluster will be initialized with locale \"%s\".\n"), lc_ctype); else { @@ -2381,9 +2413,13 @@ setup_locale_encoding(void) lc_monetary, lc_numeric, lc_time); + if (icu_locale) + printf(_(" ICU: %s\n"), icu_locale); } - if (!encoding) + if (!encoding && collation_provider[0] == COLLPROVIDER_ICU) + encodingid = PG_UTF8; + else if (!encoding) { int ctype_enc; @@ -2887,6 +2923,8 @@ main(int argc, char *argv[]) {"data-checksums", no_argument, NULL, 'k'}, {"allow-group-access", no_argument, NULL, 'g'}, {"discard-caches", no_argument, NULL, 14}, + {"collation-provider", required_argument, NULL, 15}, + {"icu-locale", required_argument, NULL, 16}, {NULL, 0, NULL, 0} }; @@ -3033,6 +3071,20 @@ main(int argc, char *argv[]) extra_options, "-c debug_discard_caches=1"); break; + case 15: + if (strcmp(optarg, "icu") == 0) + collation_provider[0] = COLLPROVIDER_ICU; + else if (strcmp(optarg, "libc") == 0) + collation_provider[0] = COLLPROVIDER_LIBC; + else + { + pg_log_error("unrecognized collation provider: %s", optarg); + exit(1); + } + break; + case 16: + icu_locale = pg_strdup(optarg); + break; default: /* getopt_long already emitted a complaint */ fprintf(stderr, _("Try \"%s --help\" for more information.\n"), diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl index 02bc688a3b..ee6803490c 100644 --- a/src/bin/initdb/t/001_initdb.pl +++ b/src/bin/initdb/t/001_initdb.pl @@ -11,7 +11,7 @@ use File::stat qw{lstat}; use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; -use Test::More tests => 22; +use Test::More tests => 24; my $tempdir = PostgreSQL::Test::Utils::tempdir; my $xlogdir = "$tempdir/pgxlog"; @@ -92,3 +92,19 @@ ok(check_mode_recursive($datadir_group, 0750, 0640), 'check PGDATA permissions'); } + +# Collation provider tests + +if ($ENV{with_icu} eq 'yes') +{ + command_ok(['initdb', '--no-sync', '--collation-provider=icu', "$tempdir/data2"], + 'collation provider ICU'); +} +else +{ + command_fails(['initdb', '--no-sync', '--collation-provider=icu', "$tempdir/data2"], + 'collation provider ICU fails since no ICU support'); +} + +command_fails(['initdb', '--no-sync', '--collation-provider=xyz', "$tempdir/dataX"], + 'fails for invalid collation provider'); diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index e3ddf19959..0c162f7d42 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -2753,6 +2753,7 @@ dumpDatabase(Archive *fout) i_datname, i_datdba, i_encoding, + i_datcollprovider, i_collate, i_ctype, i_frozenxid, @@ -2768,6 +2769,7 @@ dumpDatabase(Archive *fout) const char *datname, *dba, *encoding, + *datcollprovider, *collate, *ctype, *datistemplate, @@ -2792,6 +2794,10 @@ dumpDatabase(Archive *fout) appendPQExpBuffer(dbQry, "datminmxid, "); else appendPQExpBuffer(dbQry, "0 AS datminmxid, "); + if (fout->remoteVersion >= 150000) + appendPQExpBuffer(dbQry, "datcollprovider, "); + else + appendPQExpBuffer(dbQry, "'c' AS datcollprovider, "); appendPQExpBuffer(dbQry, "(SELECT spcname FROM pg_tablespace t WHERE t.oid = dattablespace) AS tablespace, " "shobj_description(oid, 'pg_database') AS description " @@ -2805,6 +2811,7 @@ dumpDatabase(Archive *fout) i_datname = PQfnumber(res, "datname"); i_datdba = PQfnumber(res, "datdba"); i_encoding = PQfnumber(res, "encoding"); + i_datcollprovider = PQfnumber(res, "datcollprovider"); i_collate = PQfnumber(res, "datcollate"); i_ctype = PQfnumber(res, "datctype"); i_frozenxid = PQfnumber(res, "datfrozenxid"); @@ -2820,6 +2827,7 @@ dumpDatabase(Archive *fout) datname = PQgetvalue(res, 0, i_datname); dba = getRoleName(PQgetvalue(res, 0, i_datdba)); encoding = PQgetvalue(res, 0, i_encoding); + datcollprovider = PQgetvalue(res, 0, i_datcollprovider); collate = PQgetvalue(res, 0, i_collate); ctype = PQgetvalue(res, 0, i_ctype); frozenxid = atooid(PQgetvalue(res, 0, i_frozenxid)); @@ -2853,6 +2861,17 @@ dumpDatabase(Archive *fout) appendPQExpBufferStr(creaQry, " ENCODING = "); appendStringLiteralAH(creaQry, encoding, fout); } + if (strlen(datcollprovider) > 0) + { + appendPQExpBufferStr(creaQry, " COLLATION_PROVIDER = "); + if (datcollprovider[0] == 'c') + appendPQExpBufferStr(creaQry, "libc"); + else if (datcollprovider[0] == 'i') + appendPQExpBufferStr(creaQry, "icu"); + else + fatal("unrecognized collation provider: %s", + datcollprovider); + } if (strlen(collate) > 0 && strcmp(collate, ctype) == 0) { appendPQExpBufferStr(creaQry, " LOCALE = "); diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index 3d218c2ad2..5c5aa78b2c 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -349,6 +349,16 @@ check_locale_and_encoding(DbInfo *olddb, DbInfo *newdb) if (!equivalent_locale(LC_CTYPE, olddb->db_ctype, newdb->db_ctype)) pg_fatal("lc_ctype values for database \"%s\" do not match: old \"%s\", new \"%s\"\n", olddb->db_name, olddb->db_ctype, newdb->db_ctype); + if (olddb->db_collprovider != newdb->db_collprovider) + pg_fatal("collation providers for database \"%s\" do not match: old \"%c\", new \"%c\"\n", + olddb->db_name, olddb->db_collprovider, newdb->db_collprovider); + if ((olddb->db_icucoll == NULL && newdb->db_icucoll != NULL) || + (olddb->db_icucoll != NULL && newdb->db_icucoll == NULL) || + (olddb->db_icucoll != NULL && newdb->db_icucoll != NULL && strcmp(olddb->db_icucoll, newdb->db_icucoll) != 0)) + pg_fatal("ICU collation values for database \"%s\" do not match: old \"%s\", new \"%s\"\n", + olddb->db_name, + olddb->db_icucoll ? olddb->db_icucoll : "(null)", + newdb->db_icucoll ? newdb->db_icucoll : "(null)"); } /* diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c index 69ef23119f..2a9ca0e389 100644 --- a/src/bin/pg_upgrade/info.c +++ b/src/bin/pg_upgrade/info.c @@ -312,11 +312,20 @@ get_db_infos(ClusterInfo *cluster) i_encoding, i_datcollate, i_datctype, + i_datcollprovider, + i_daticucoll, i_spclocation; char query[QUERY_ALLOC]; snprintf(query, sizeof(query), - "SELECT d.oid, d.datname, d.encoding, d.datcollate, d.datctype, " + "SELECT d.oid, d.datname, d.encoding, d.datcollate, d.datctype, "); + if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1500) + snprintf(query + strlen(query), sizeof(query) - strlen(query), + "'c' AS datcollprovider, NULL AS daticucoll, "); + else + snprintf(query + strlen(query), sizeof(query) - strlen(query), + "datcollprovider, daticucoll, "); + snprintf(query + strlen(query), sizeof(query) - strlen(query), "pg_catalog.pg_tablespace_location(t.oid) AS spclocation " "FROM pg_catalog.pg_database d " " LEFT OUTER JOIN pg_catalog.pg_tablespace t " @@ -331,6 +340,8 @@ get_db_infos(ClusterInfo *cluster) i_encoding = PQfnumber(res, "encoding"); i_datcollate = PQfnumber(res, "datcollate"); i_datctype = PQfnumber(res, "datctype"); + i_datcollprovider = PQfnumber(res, "datcollprovider"); + i_daticucoll = PQfnumber(res, "daticucoll"); i_spclocation = PQfnumber(res, "spclocation"); ntups = PQntuples(res); @@ -343,6 +354,11 @@ get_db_infos(ClusterInfo *cluster) dbinfos[tupnum].db_encoding = atoi(PQgetvalue(res, tupnum, i_encoding)); dbinfos[tupnum].db_collate = pg_strdup(PQgetvalue(res, tupnum, i_datcollate)); dbinfos[tupnum].db_ctype = pg_strdup(PQgetvalue(res, tupnum, i_datctype)); + dbinfos[tupnum].db_collprovider = PQgetvalue(res, tupnum, i_datcollprovider)[0]; + if (PQgetisnull(res, tupnum, i_daticucoll)) + dbinfos[tupnum].db_icucoll = NULL; + else + dbinfos[tupnum].db_icucoll = pg_strdup(PQgetvalue(res, tupnum, i_daticucoll)); snprintf(dbinfos[tupnum].db_tablespace, sizeof(dbinfos[tupnum].db_tablespace), "%s", PQgetvalue(res, tupnum, i_spclocation)); } diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 1db8e3f0fb..5c0f256598 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -163,6 +163,8 @@ typedef struct * path */ char *db_collate; char *db_ctype; + char db_collprovider; + char *db_icucoll; int db_encoding; RelInfoArr rel_arr; /* array of all user relinfos */ } DbInfo; diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 346cd92793..b85a7b1794 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -896,6 +896,18 @@ listAllDbs(const char *pattern, bool verbose) gettext_noop("Encoding"), gettext_noop("Collate"), gettext_noop("Ctype")); + if (pset.sversion >= 150000) + appendPQExpBuffer(&buf, + " d.daticucoll as \"%s\",\n" + " CASE d.datcollprovider WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n", + gettext_noop("ICU Collation"), + gettext_noop("Coll. Provider")); + else + appendPQExpBuffer(&buf, + " d.datcollate as \"%s\",\n" + " 'libc' AS \"%s\",\n", + gettext_noop("ICU Collation"), + gettext_noop("Coll. Provider")); appendPQExpBufferStr(&buf, " "); printACLColumn(&buf, "d.datacl"); if (verbose) @@ -4603,7 +4615,7 @@ listCollations(const char *pattern, bool verbose, bool showSystem) PQExpBufferData buf; PGresult *res; printQueryOpt myopt = pset.popt; - static const bool translate_columns[] = {false, false, false, false, false, true, false}; + static const bool translate_columns[] = {false, false, false, false, false, false, true, false}; initPQExpBuffer(&buf); @@ -4617,6 +4629,15 @@ listCollations(const char *pattern, bool verbose, bool showSystem) gettext_noop("Collate"), gettext_noop("Ctype")); + if (pset.sversion >= 150000) + appendPQExpBuffer(&buf, + ",\n c.collicucoll AS \"%s\"", + gettext_noop("ICU Collation")); + else + appendPQExpBuffer(&buf, + ",\n c.collcollate AS \"%s\"", + gettext_noop("ICU Collation")); + if (pset.sversion >= 100000) appendPQExpBuffer(&buf, ",\n CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\"", diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 0ee94d7362..ff05d395e0 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -2716,7 +2716,7 @@ psql_completion(const char *text, int start, int end) COMPLETE_WITH("OWNER", "TEMPLATE", "ENCODING", "TABLESPACE", "IS_TEMPLATE", "ALLOW_CONNECTIONS", "CONNECTION LIMIT", - "LC_COLLATE", "LC_CTYPE", "LOCALE", "OID"); + "LC_COLLATE", "LC_CTYPE", "LOCALE", "OID", "COLLATION_PROVIDER"); else if (Matches("CREATE", "DATABASE", MatchAny, "TEMPLATE")) COMPLETE_WITH_QUERY(Query_for_list_of_template_databases); diff --git a/src/bin/scripts/Makefile b/src/bin/scripts/Makefile index b833109da6..25e7da3d3f 100644 --- a/src/bin/scripts/Makefile +++ b/src/bin/scripts/Makefile @@ -53,6 +53,8 @@ clean distclean maintainer-clean: rm -f common.o $(WIN32RES) rm -rf tmp_check +export with_icu + check: $(prove_check) diff --git a/src/bin/scripts/createdb.c b/src/bin/scripts/createdb.c index b0c6805bc9..471cae7cca 100644 --- a/src/bin/scripts/createdb.c +++ b/src/bin/scripts/createdb.c @@ -38,6 +38,7 @@ main(int argc, char *argv[]) {"lc-ctype", required_argument, NULL, 2}, {"locale", required_argument, NULL, 'l'}, {"maintenance-db", required_argument, NULL, 3}, + {"collation-provider", required_argument, NULL, 4}, {NULL, 0, NULL, 0} }; @@ -61,6 +62,7 @@ main(int argc, char *argv[]) char *lc_collate = NULL; char *lc_ctype = NULL; char *locale = NULL; + char *collation_provider = NULL; PQExpBufferData sql; @@ -119,6 +121,9 @@ main(int argc, char *argv[]) case 3: maintenance_db = pg_strdup(optarg); break; + case 4: + collation_provider = pg_strdup(optarg); + break; default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); @@ -217,6 +222,8 @@ main(int argc, char *argv[]) appendPQExpBufferStr(&sql, " LC_CTYPE "); appendStringLiteralConn(&sql, lc_ctype, conn); } + if (collation_provider) + appendPQExpBuffer(&sql, " COLLATION_PROVIDER %s", collation_provider); appendPQExpBufferChar(&sql, ';'); @@ -267,6 +274,8 @@ help(const char *progname) printf(_("Usage:\n")); printf(_(" %s [OPTION]... [DBNAME] [DESCRIPTION]\n"), progname); printf(_("\nOptions:\n")); + printf(_(" --collation-provider={libc|icu}\n" + " collation provider for the database's default collation\n")); printf(_(" -D, --tablespace=TABLESPACE default tablespace for the database\n")); printf(_(" -e, --echo show the commands being sent to the server\n")); printf(_(" -E, --encoding=ENCODING encoding for the database\n")); diff --git a/src/bin/scripts/t/020_createdb.pl b/src/bin/scripts/t/020_createdb.pl index c54c291b7a..9712714a46 100644 --- a/src/bin/scripts/t/020_createdb.pl +++ b/src/bin/scripts/t/020_createdb.pl @@ -6,7 +6,7 @@ use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; -use Test::More tests => 25; +use Test::More tests => 28; program_help_ok('createdb'); program_version_ok('createdb'); @@ -25,9 +25,27 @@ qr/statement: CREATE DATABASE foobar2 ENCODING 'LATIN1'/, 'create database with encoding'); +if ($ENV{with_icu} eq 'yes') +{ + $node->issues_sql_like( + [ 'createdb', '-T', 'template0', '--collation-provider=icu', 'foobar4' ], + qr/statement: CREATE DATABASE foobar4 .* COLLATION_PROVIDER icu/, + 'create database with ICU'); +} +else +{ + $node->command_fails( + [ 'createdb', '-T', 'template0', '--collation-provider=icu', 'foobar4' ], + 'create database with ICU fails since no ICU support'); + pass; +} + $node->command_fails([ 'createdb', 'foobar1' ], 'fails if database already exists'); +$node->command_fails([ 'createdb', '-T', 'template0', '--collation-provider=xyz', 'foobarX' ], + 'fails for invalid collation provider'); + # Check use of templates with shared dependencies copied from the template. my ($ret, $stdout, $stderr) = $node->psql( 'foobar2', diff --git a/src/include/catalog/pg_collation.dat b/src/include/catalog/pg_collation.dat index 4b56825d82..f7470ead49 100644 --- a/src/include/catalog/pg_collation.dat +++ b/src/include/catalog/pg_collation.dat @@ -14,8 +14,7 @@ { oid => '100', oid_symbol => 'DEFAULT_COLLATION_OID', descr => 'database\'s default collation', - collname => 'default', collprovider => 'd', collencoding => '-1', - collcollate => '', collctype => '' }, + collname => 'default', collprovider => 'd', collencoding => '-1' }, { oid => '950', oid_symbol => 'C_COLLATION_OID', descr => 'standard C collation', collname => 'C', collprovider => 'c', collencoding => '-1', diff --git a/src/include/catalog/pg_collation.h b/src/include/catalog/pg_collation.h index 8763dd4080..590e06d1a8 100644 --- a/src/include/catalog/pg_collation.h +++ b/src/include/catalog/pg_collation.h @@ -40,8 +40,9 @@ CATALOG(pg_collation,3456,CollationRelationId) bool collisdeterministic BKI_DEFAULT(t); int32 collencoding; /* encoding for this collation; -1 = "all" */ #ifdef CATALOG_VARLEN /* variable-length fields start here */ - text collcollate BKI_FORCE_NOT_NULL; /* LC_COLLATE setting */ - text collctype BKI_FORCE_NOT_NULL; /* LC_CTYPE setting */ + text collcollate BKI_DEFAULT(_null_); /* LC_COLLATE setting */ + text collctype BKI_DEFAULT(_null_); /* LC_CTYPE setting */ + text collicucoll BKI_DEFAULT(_null_); /* ICU collation string */ text collversion BKI_DEFAULT(_null_); /* provider-dependent * version of collation * data */ @@ -75,6 +76,7 @@ extern Oid CollationCreate(const char *collname, Oid collnamespace, bool collisdeterministic, int32 collencoding, const char *collcollate, const char *collctype, + const char *collicucoll, const char *collversion, bool if_not_exists, bool quiet); diff --git a/src/include/catalog/pg_database.dat b/src/include/catalog/pg_database.dat index e7e42d6023..cce12e27bd 100644 --- a/src/include/catalog/pg_database.dat +++ b/src/include/catalog/pg_database.dat @@ -14,9 +14,9 @@ { oid => '1', oid_symbol => 'TemplateDbOid', descr => 'default template for new databases', - datname => 'template1', encoding => 'ENCODING', datistemplate => 't', + datname => 'template1', encoding => 'ENCODING', datcollprovider => 'COLLPROVIDER', datistemplate => 't', datallowconn => 't', datconnlimit => '-1', datfrozenxid => '0', datminmxid => '1', dattablespace => 'pg_default', datcollate => 'LC_COLLATE', - datctype => 'LC_CTYPE', datacl => '_null_' }, + datctype => 'LC_CTYPE', daticucoll => 'ICUCOLL', datacl => '_null_' }, ] diff --git a/src/include/catalog/pg_database.h b/src/include/catalog/pg_database.h index 90b43a4ecc..13dc5e1ba9 100644 --- a/src/include/catalog/pg_database.h +++ b/src/include/catalog/pg_database.h @@ -40,6 +40,9 @@ CATALOG(pg_database,1262,DatabaseRelationId) BKI_SHARED_RELATION BKI_ROWTYPE_OID /* character encoding */ int32 encoding; + /* see pg_collation.collprovider */ + char datcollprovider; + /* allowed as CREATE DATABASE template? */ bool datistemplate; @@ -65,6 +68,9 @@ CATALOG(pg_database,1262,DatabaseRelationId) BKI_SHARED_RELATION BKI_ROWTYPE_OID /* LC_CTYPE setting */ text datctype BKI_FORCE_NOT_NULL; + /* ICU collation */ + text daticucoll; + /* access permissions */ aclitem datacl[1]; #endif diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 30e423af0e..11138e02f3 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -103,6 +103,12 @@ struct pg_locale_struct typedef struct pg_locale_struct *pg_locale_t; +extern struct pg_locale_struct default_locale; + +extern void make_icu_collator(const char *icucollstr, + struct pg_locale_struct *resultp); +extern void check_collation_version(HeapTuple colltuple); + extern pg_locale_t pg_newlocale_from_collation(Oid collid); extern char *get_collation_actual_version(char collprovider, const char *collcollate); diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index 70133df804..3d9647b597 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -1029,14 +1029,12 @@ CREATE COLLATION test0 FROM "C"; -- fail, duplicate name ERROR: collation "test0" already exists do $$ BEGIN - EXECUTE 'CREATE COLLATION test1 (provider = icu, lc_collate = ' || - quote_literal(current_setting('lc_collate')) || - ', lc_ctype = ' || - quote_literal(current_setting('lc_ctype')) || ');'; + EXECUTE 'CREATE COLLATION test1 (provider = icu, locale = ' || + quote_literal(current_setting('lc_collate')) || ');'; END $$; -CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, need lc_ctype -ERROR: parameter "lc_ctype" must be specified +CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale" +ERROR: parameter "locale" must be specified CREATE COLLATION testx (provider = icu, locale = 'nonsense'); /* never fails with ICU */ DROP COLLATION testx; CREATE COLLATION test4 FROM nonsense; ERROR: collation "nonsense" for encoding "UTF8" does not exist diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index 9cee3d0042..0677ba56e4 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -366,13 +366,11 @@ CREATE SCHEMA test_schema; CREATE COLLATION test0 FROM "C"; -- fail, duplicate name do $$ BEGIN - EXECUTE 'CREATE COLLATION test1 (provider = icu, lc_collate = ' || - quote_literal(current_setting('lc_collate')) || - ', lc_ctype = ' || - quote_literal(current_setting('lc_ctype')) || ');'; + EXECUTE 'CREATE COLLATION test1 (provider = icu, locale = ' || + quote_literal(current_setting('lc_collate')) || ');'; END $$; -CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, need lc_ctype +CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale" CREATE COLLATION testx (provider = icu, locale = 'nonsense'); /* never fails with ICU */ DROP COLLATION testx; CREATE COLLATION test4 FROM nonsense; base-commit: 87669de72c2249e6aec84b8c27fdc3ffb7284e13 -- 2.35.1