From e1cb130f550952d9c9c2d9ad1c52e60699a2c968 Mon Sep 17 00:00:00 2001 From: Marina Polyakova Date: Fri, 9 Feb 2018 18:57:25 +0300 Subject: [PATCH] ICU as default collation provider Now you can choose the default collation provider - libc or icu (the latter is available only if you build PostgreSQL using --with-icu). Just pass the appropriate locale options in libc format with the collation provider modifier (or without it) to initdb or createdb: [initdb|createdb] [--locale='locale'[@icu|@libc|]] [--lc-collate='locale'[@icu|@libc|]] where 'locale' is in the libc format, for example, 'en_US', 'ru_RU.UTF-8' or 'C'. You can also pass the corresponding locale options without the collation provider modifier. In this case, in initdb, the default collation provider is libc for locales 'C' and 'POSIX' and icu for others. If you did not specify the collation provider for the --locale/--lc-collate options in createdb, this will be libc for locales 'C' and 'POSIX' and the default collation provider from the template database for others. Note that, as usual, the --lc-collate option takes precedence over the --locale option regardless of whether it contains the modifier of collation provider or not. Note that you can you use icu as the default collation provider only for the locales that libc also has in your operation system. This was done in part because we need databases with default collation and SQL_ASCII encoding for regression tests, but ICU in PostgreSQL does not support this encoding.. Also in this case, we don't need to unset the approriate locale environment variables (because other programs don't understand new format). So in fact ICU is used as the default collation/ctype provider where there is already a choice for using the collation provider. In other places all the work is done by libc as usual. Note that to use icu as the default collation provider, lc_collate and lc_ctype must be the same. The default database collation with the collation provider and version (for ICU collations) is stored in pg_database.datcollate in the format 'locale'@'collprivider'[.'collversion']. Important: when you try to connect to a database, the ability to use the selected collation provider and the version of the default collation will be checked. But when you try to start a cluster server it is not checked. Important: in this commit there's no appropriate support for using pg_upgrade/pg_dump/pg_dumpall for clusters that do not support this feature. In this case pg_dump/pg_dumpall retrieve the LC_COLLATE database settings unchanged (= without mentioning the provider) from the old cluster. --- doc/src/sgml/charset.sgml | 55 ++ doc/src/sgml/ref/create_database.sgml | 8 +- doc/src/sgml/ref/createdb.sgml | 18 +- doc/src/sgml/ref/initdb.sgml | 9 +- doc/src/sgml/regress.sgml | 17 + src/backend/catalog/information_schema.sql | 2 +- src/backend/commands/collationcmds.c | 33 +- src/backend/commands/dbcommands.c | 152 +++- src/backend/main/main.c | 5 +- src/backend/regex/regc_pg_locale.c | 40 +- src/backend/utils/adt/formatting.c | 111 ++- src/backend/utils/adt/like.c | 16 +- src/backend/utils/adt/pg_locale.c | 390 +++++++--- src/backend/utils/adt/selfuncs.c | 14 +- src/backend/utils/adt/varlena.c | 270 ++++--- src/backend/utils/init/postinit.c | 118 ++- src/backend/utils/mb/encnames.c | 4 +- src/bin/initdb/Makefile | 2 +- src/bin/initdb/initdb.c | 387 +++++++++- src/bin/pg_dump/pg_dump.c | 30 +- src/bin/psql/describe.c | 10 +- src/bin/scripts/Makefile | 2 +- src/bin/scripts/createdb.c | 14 +- src/common/Makefile | 2 +- src/common/pg_collation_fn_common.c | 90 +++ src/fe_utils/.gitignore | 1 + src/fe_utils/Makefile | 11 +- src/include/commands/dbcommands.h | 3 +- src/include/common/pg_collation_fn_common.h | 22 + src/include/pg_config.h.win32 | 4 + src/include/port.h | 34 + src/include/port/win32.h | 2 +- src/include/utils/pg_locale.h | 12 +- src/interfaces/libpq/.gitignore | 1 + src/interfaces/libpq/Makefile | 2 +- src/port/chklocale.c | 598 +++++++++++++++ src/test/Makefile | 2 +- src/test/default_collation/Makefile | 28 + src/test/default_collation/icu.utf8/.gitignore | 2 + src/test/default_collation/icu.utf8/Makefile | 11 + .../icu.utf8/t/001_default_collation.pl | 799 +++++++++++++++++++++ src/test/default_collation/icu/.gitignore | 2 + src/test/default_collation/icu/Makefile | 11 + .../icu/t/001_default_collation.pl | 605 ++++++++++++++++ src/test/default_collation/libc.utf8/.gitignore | 2 + src/test/default_collation/libc.utf8/Makefile | 11 + .../libc.utf8/t/001_default_collation.pl | 703 ++++++++++++++++++ src/test/default_collation/libc/.gitignore | 2 + src/test/default_collation/libc/Makefile | 11 + .../libc/t/001_default_collation.pl | 355 +++++++++ src/test/regress/expected/collate.icu.utf8.out | 10 +- src/test/regress/expected/collate.linux.utf8.out | 10 +- src/test/regress/sql/collate.icu.utf8.sql | 8 +- src/test/regress/sql/collate.linux.utf8.sql | 8 +- src/tools/msvc/Mkvcbuild.pm | 26 +- 55 files changed, 4730 insertions(+), 365 deletions(-) create mode 100644 src/common/pg_collation_fn_common.c create mode 100644 src/include/common/pg_collation_fn_common.h create mode 100644 src/test/default_collation/Makefile create mode 100644 src/test/default_collation/icu.utf8/.gitignore create mode 100644 src/test/default_collation/icu.utf8/Makefile create mode 100644 src/test/default_collation/icu.utf8/t/001_default_collation.pl create mode 100644 src/test/default_collation/icu/.gitignore create mode 100644 src/test/default_collation/icu/Makefile create mode 100644 src/test/default_collation/icu/t/001_default_collation.pl create mode 100644 src/test/default_collation/libc.utf8/.gitignore create mode 100644 src/test/default_collation/libc.utf8/Makefile create mode 100644 src/test/default_collation/libc.utf8/t/001_default_collation.pl create mode 100644 src/test/default_collation/libc/.gitignore create mode 100644 src/test/default_collation/libc/Makefile create mode 100644 src/test/default_collation/libc/t/001_default_collation.pl diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index dc3fd34..f28e0ec 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -537,6 +537,61 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR"; a database. + + You can specify the default collation provider with the + and options of the or + commands, as follows: + +--locale=locale[@provider] +--lc-collate=locale[@provider] + + where provider can take the icu + or libc value, and locale is specified + in the libc format. You can only specify a single + locale provider after the @ symbol. + The --lc-collate option overrides the + --locale setting, regardless of whether it specifies the + collation provider. + + + + If you omit the collation provider options, libc + provider is used for C and POSIX + locales. For other locales, the default providers are: + + + + icu at the cluster level + + + + Default collation provider from the template database at + the database level + + + + + + + + You can only use the icu collation provider for locales that are + supported by libc in your operating system and satisfy all + restrictions applicable to icu. + + + + + When you connect to a database, + PostgreSQL checks that the selected collation + provider and the version of the default collation are supported. + You can find the default database collation and the collation provider + in pg_database.datcollate. For ICU collations, collation version is + also stored: + +locale@provider[.version] + + + Standard Collations diff --git a/doc/src/sgml/ref/create_database.sgml b/doc/src/sgml/ref/create_database.sgml index b2c9e24..8b2e153 100644 --- a/doc/src/sgml/ref/create_database.sgml +++ b/doc/src/sgml/ref/create_database.sgml @@ -25,7 +25,7 @@ CREATE DATABASE name [ [ WITH ] [ OWNER [=] user_name ] [ TEMPLATE [=] template ] [ ENCODING [=] encoding ] - [ LC_COLLATE [=] lc_collate ] + [ LC_COLLATE [=] lc_collate[@provider] ] [ LC_CTYPE [=] lc_ctype ] [ TABLESPACE [=] tablespace_name ] [ ALLOW_CONNECTIONS [=] allowconn ] @@ -112,13 +112,17 @@ CREATE DATABASE name - lc_collate + lc_collate[@provider] Collation order (LC_COLLATE) to use in the new database. This affects the sort order applied to strings, e.g. in queries with ORDER BY, as well as the order used in indexes on text columns. The default is to use the collation order of the template database. + Optionally, you can specify the default collation provider after the + @ symbol, as explained in + . Supported values are icu + and libc. See below for additional restrictions. diff --git a/doc/src/sgml/ref/createdb.sgml b/doc/src/sgml/ref/createdb.sgml index 2658efe..dbf87d3 100644 --- a/doc/src/sgml/ref/createdb.sgml +++ b/doc/src/sgml/ref/createdb.sgml @@ -121,22 +121,34 @@ PostgreSQL documentation - - + + Specifies the locale to be used in this database. This is equivalent to specifying both and . + + + Optionally, you can specify the default collation provider after the + @ symbol. Supported values are icu + and libc. For details, see . + - + Specifies the LC_COLLATE setting to be used in this database. + + + Optionally, you can specify the default collation provider after the + @ symbol. Supported values are icu + and libc. For details, see . + diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml index 585665f..87adcd5 100644 --- a/doc/src/sgml/ref/initdb.sgml +++ b/doc/src/sgml/ref/initdb.sgml @@ -203,7 +203,7 @@ PostgreSQL documentation - + Sets the default locale for the database cluster. If this @@ -211,11 +211,16 @@ PostgreSQL documentation environment that initdb runs in. Locale support is described in . + + Optionally, you can specify the default collation provider after the + @ symbol. Supported values are icu + and libc. For details, see . + - + diff --git a/doc/src/sgml/regress.sgml b/doc/src/sgml/regress.sgml index 53716a0..fefddd8 100644 --- a/doc/src/sgml/regress.sgml +++ b/doc/src/sgml/regress.sgml @@ -280,6 +280,23 @@ make check EXTRA_TESTS='collate.icu.utf8 collate.linux.utf8' LANG=en_US.utf8 + Extra TAP Tests for Default Collations + + + To test the default collations on Linux/glibc platforms, + you can run extra TAP tests, as follows: + +make -C src/test/default_collation check-utf8 + + These tests only succeed when run in a database that uses the UTF-8 + encoding. As these tests are TAP-based, you can only run them if + PostgreSQL was configured with the + option. + For details, see . + + + + Testing Hot Standby diff --git a/src/backend/catalog/information_schema.sql b/src/backend/catalog/information_schema.sql index 686528c..640a9e1 100644 --- a/src/backend/catalog/information_schema.sql +++ b/src/backend/catalog/information_schema.sql @@ -397,7 +397,7 @@ CREATE VIEW character_sets AS CAST(c.collname AS sql_identifier) AS default_collate_name FROM pg_database d LEFT JOIN (pg_collation c JOIN pg_namespace nc ON (c.collnamespace = nc.oid)) - ON (datcollate = collcollate AND datctype = collctype) + ON (datcollate = (collcollate || '@libc') AND datctype = collctype) WHERE d.datname = current_database() ORDER BY char_length(c.collname) DESC, c.collname ASC -- prefer full/canonical name LIMIT 1; diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index d0b5cdb..db4f67d 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -28,6 +28,7 @@ #include "commands/comment.h" #include "commands/dbcommands.h" #include "commands/defrem.h" +#include "common/pg_collation_fn_common.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "utils/builtins.h" @@ -163,11 +164,8 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e if (collproviderstr) { - if (pg_strcasecmp(collproviderstr, "icu") == 0) - collprovider = COLLPROVIDER_ICU; - else if (pg_strcasecmp(collproviderstr, "libc") == 0) - collprovider = COLLPROVIDER_LIBC; - else + collprovider = get_collprovider(collproviderstr); + if (!is_valid_nondefault_collprovider(collprovider)) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("unrecognized collation provider: %s", @@ -193,7 +191,8 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e else { collencoding = GetDatabaseEncoding(); - check_encoding_locale_matches(collencoding, collcollate, collctype); + check_encoding_locale_matches(collencoding, collcollate, collctype, + collprovider); } } @@ -435,26 +434,6 @@ cmpaliases(const void *a, const void *b) #ifdef USE_ICU /* - * Get the ICU language tag for a locale name. - * The result is a palloc'd string. - */ -static char * -get_icu_language_tag(const char *localename) -{ - char buf[ULOC_FULLNAME_CAPACITY]; - UErrorCode status; - - status = U_ZERO_ERROR; - uloc_toLanguageTag(localename, buf, sizeof(buf), TRUE, &status); - if (U_FAILURE(status)) - ereport(ERROR, - (errmsg("could not convert locale name \"%s\" to language tag: %s", - localename, u_errorName(status)))); - - return pstrdup(buf); -} - -/* * Get a comment (specifically, the display name) for an ICU locale. * The result is a palloc'd string, or NULL if we can't get a comment * or find that it's not all ASCII. (We can *not* accept non-ASCII @@ -699,7 +678,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) name = uloc_getAvailable(i); langtag = get_icu_language_tag(name); - collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name; + collcollate = get_icu_collate(name, langtag); /* * Be paranoid about not allowing any non-ASCII strings into diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index d2020d0..6fbd8b3 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -34,6 +34,7 @@ #include "catalog/indexing.h" #include "catalog/objectaccess.h" #include "catalog/pg_authid.h" +#include "catalog/pg_collation.h" #include "catalog/pg_database.h" #include "catalog/pg_db_role_setting.h" #include "catalog/pg_subscription.h" @@ -44,6 +45,7 @@ #include "commands/defrem.h" #include "commands/seclabel.h" #include "commands/tablespace.h" +#include "common/pg_collation_fn_common.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "pgstat.h" @@ -141,6 +143,14 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) int notherbackends; int npreparedxacts; createdb_failure_params fparms; + char *src_canonname; + char src_collprovider; + char *dbcanonname = NULL; + char dbcollprovider; + char *dbcollate_full_name; + char *icu_wincollate = NULL; + char *langtag = NULL; + const char *collate; /* Extract options from the statement node tree */ foreach(option, stmt->options) @@ -350,8 +360,28 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) /* If encoding or locales are defaulted, use source's setting */ if (encoding < 0) encoding = src_encoding; + + check_locale_collprovider(src_collate, &src_canonname, &src_collprovider, + NULL); + + if (!is_valid_nondefault_collprovider(src_collprovider)) + /* This could happen when manually creating a mess in the catalogs. */ + ereport(FATAL, + (errmsg("could not find out the collation provider for datcollate \"%s\" of template database \"%s\"", + src_collate, dbtemplate))); + if (dbcollate == NULL) - dbcollate = src_collate; + { + dbcollate = src_canonname; + dbcollprovider = src_collprovider; + } + else + { + check_locale_collprovider(dbcollate, &dbcanonname, &dbcollprovider, + NULL); + dbcollate = dbcanonname; + } + if (dbctype == NULL) dbctype = src_ctype; @@ -362,18 +392,88 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) errmsg("invalid server encoding %d", encoding))); /* Check that the chosen locales are valid, and get canonical spellings */ - if (!check_locale(LC_COLLATE, dbcollate, &canonname)) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("invalid locale name: \"%s\"", dbcollate))); - dbcollate = canonname; - if (!check_locale(LC_CTYPE, dbctype, &canonname)) + + if (!check_locale(LC_CTYPE, dbctype, &canonname, '\0')) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("invalid locale name: \"%s\"", dbctype))); dbctype = canonname; - check_encoding_locale_matches(encoding, dbcollate, dbctype); + /* we always check lc_collate for libc */ + if (!check_locale(LC_COLLATE, dbcollate, &canonname, COLLPROVIDER_LIBC)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("invalid locale name: \"%s\" (provider \"%s\")", + dbcollate, get_collprovider_name(COLLPROVIDER_LIBC)))); + dbcollate = canonname; + + /* determine the collation provider if we haven't already done it */ + if (!is_valid_nondefault_collprovider(dbcollprovider)) + { + if (locale_is_c(dbcollate)) + dbcollprovider = COLLPROVIDER_LIBC; + else + dbcollprovider = src_collprovider; + } + + Assert(is_valid_nondefault_collprovider(dbcollprovider)); + +#ifndef USE_ICU + if (dbcollprovider == COLLPROVIDER_ICU) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ICU is not supported in this build"), + errhint("You need to rebuild PostgreSQL using --with-icu."))); +#endif + + /* check lc_collate and lc_ctype for icu if we need it */ + if (dbcollprovider == COLLPROVIDER_ICU) + { + if (!check_locale(LC_COLLATE, dbcollate, NULL, dbcollprovider)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("invalid locale name: \"%s\" (provider \"%s\")", + dbcollate, get_collprovider_name(dbcollprovider)))); + + if (strcmp(dbcollate, dbctype) != 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("collations with different collate and ctype values are not supported by ICU"))); + } + + check_encoding_locale_matches(encoding, dbcollate, dbctype, dbcollprovider); + + /* get the collation version */ + +#ifdef USE_ICU + if (dbcollprovider == COLLPROVIDER_ICU) + { + collate = (const char *) dbcollate; +#ifdef WIN32 + if (!locale_is_c(collate)) + { + icu_wincollate = check_icu_winlocale(collate); + collate = (const char *) icu_wincollate; + } +#endif /* WIN32 */ + langtag = get_icu_language_tag(collate); + collate = get_icu_collate(collate, langtag); + } + else +#endif /* USE_ICU */ + { + /* COLLPROVIDER_LIBC */ + collate = (const char *) dbcollate; + } + + dbcollate_full_name = get_full_collation_name( + dbcollate, dbcollprovider, + get_collation_actual_version(dbcollprovider, collate)); + + if (strlen(dbcollate_full_name) >= NAMEDATALEN) + ereport(ERROR, + (errmsg("the full database collation name \"%s\" is too long", + dbcollate_full_name))); /* * Check that the new encoding and locale settings match the source @@ -395,11 +495,11 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) pg_encoding_to_char(src_encoding)), errhint("Use the same encoding as in the template database, or use template0 as template."))); - if (strcmp(dbcollate, src_collate) != 0) + if (strcmp(dbcollate_full_name, src_collate) != 0) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("new collation (%s) is incompatible with the collation of the template database (%s)", - dbcollate, src_collate), + dbcollate_full_name, src_collate), errhint("Use the same collation as in the template database, or use template0 as template."))); if (strcmp(dbctype, src_ctype) != 0) @@ -522,7 +622,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) new_record[Anum_pg_database_datdba - 1] = ObjectIdGetDatum(datdba); new_record[Anum_pg_database_encoding - 1] = Int32GetDatum(encoding); new_record[Anum_pg_database_datcollate - 1] = - DirectFunctionCall1(namein, CStringGetDatum(dbcollate)); + DirectFunctionCall1(namein, CStringGetDatum(dbcollate_full_name)); new_record[Anum_pg_database_datctype - 1] = DirectFunctionCall1(namein, CStringGetDatum(dbctype)); new_record[Anum_pg_database_datistemplate - 1] = BoolGetDatum(dbistemplate); @@ -690,6 +790,16 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) */ ForceSyncCommit(); } + + pfree(src_canonname); + pfree(dbcollate_full_name); + if (dbcanonname) + pfree(dbcanonname); + if (langtag) + pfree(langtag); + if (icu_wincollate) + pfree(icu_wincollate); + PG_END_ENSURE_ERROR_CLEANUP(createdb_failure_callback, PointerGetDatum(&fparms)); @@ -719,7 +829,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) * Note: if you change this policy, fix initdb to match. */ void -check_encoding_locale_matches(int encoding, const char *collate, const char *ctype) +check_encoding_locale_matches(int encoding, const char *collate, const char *ctype, + char collprovider) { int ctype_encoding = pg_get_encoding_from_locale(ctype, true); int collate_encoding = pg_get_encoding_from_locale(collate, true); @@ -753,6 +864,23 @@ check_encoding_locale_matches(int encoding, const char *collate, const char *cty collate), errdetail("The chosen LC_COLLATE setting requires encoding \"%s\".", pg_encoding_to_char(collate_encoding)))); + + if (collprovider == COLLPROVIDER_ICU) + { +#ifdef USE_ICU + if (!(is_encoding_supported_by_icu(encoding) || + (encoding == PG_SQL_ASCII && superuser()))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("encoding \"%s\" is not supported for ICU locales", + pg_encoding_to_char(encoding)))); +#else + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ICU is not supported in this build"), + errhint("You need to rebuild PostgreSQL using --with-icu."))); +#endif + } } /* Error cleanup callback for createdb */ diff --git a/src/backend/main/main.c b/src/backend/main/main.c index 38853e3..cb27d62 100644 --- a/src/backend/main/main.c +++ b/src/backend/main/main.c @@ -32,6 +32,7 @@ #endif #include "bootstrap/bootstrap.h" +#include "catalog/pg_collation.h" #include "common/username.h" #include "port/atomics.h" #include "postmaster/postmaster.h" @@ -306,8 +307,8 @@ startup_hacks(const char *progname) static void init_locale(const char *categoryname, int category, const char *locale) { - if (pg_perm_setlocale(category, locale) == NULL && - pg_perm_setlocale(category, "C") == NULL) + if (pg_perm_setlocale(category, locale, COLLPROVIDER_LIBC) == NULL && + pg_perm_setlocale(category, "C", COLLPROVIDER_LIBC) == NULL) elog(FATAL, "could not adopt \"%s\" locale nor C locale for %s", locale, categoryname); } diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c index acbed2e..e836553 100644 --- a/src/backend/regex/regc_pg_locale.c +++ b/src/backend/regex/regc_pg_locale.c @@ -16,6 +16,7 @@ */ #include "catalog/pg_collation.h" +#include "common/pg_collation_fn_common.h" #include "utils/pg_locale.h" /* @@ -240,8 +241,13 @@ pg_set_regex_collation(Oid collation) } else { + char collprovider; + if (collation == DEFAULT_COLLATION_OID) + { pg_regex_locale = 0; + collprovider = get_default_collprovider(); + } else if (OidIsValid(collation)) { /* @@ -250,6 +256,7 @@ pg_set_regex_collation(Oid collation) * have to be considered below. */ pg_regex_locale = pg_newlocale_from_collation(collation); + collprovider = pg_regex_locale->provider; } else { @@ -263,24 +270,35 @@ pg_set_regex_collation(Oid collation) errhint("Use the COLLATE clause to set the collation explicitly."))); } + Assert(is_valid_nondefault_collprovider(collprovider)); + + if (collprovider == COLLPROVIDER_ICU) + { #ifdef USE_ICU - if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_ICU) pg_regex_strategy = PG_REGEX_LOCALE_ICU; - else +#else + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", collprovider); #endif - if (GetDatabaseEncoding() == PG_UTF8) - { - if (pg_regex_locale) - pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L; - else - pg_regex_strategy = PG_REGEX_LOCALE_WIDE; } else { - if (pg_regex_locale) - pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L; + /* COLLPROVIDER_LIBC */ + + if (GetDatabaseEncoding() == PG_UTF8) + { + if (pg_regex_locale) + pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L; + else + pg_regex_strategy = PG_REGEX_LOCALE_WIDE; + } else - pg_regex_strategy = PG_REGEX_LOCALE_1BYTE; + { + if (pg_regex_locale) + pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L; + else + pg_regex_strategy = PG_REGEX_LOCALE_1BYTE; + } } pg_regex_collation = collation; diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index b8bd4ca..af38e72 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -1452,7 +1452,7 @@ typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode); static int32_t -icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale, +icu_convert_case(ICU_Convert_Func func, const char *locale, UChar **buff_dest, UChar *buff_source, int32_t len_source) { UErrorCode status; @@ -1462,7 +1462,7 @@ icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale, *buff_dest = palloc(len_dest * sizeof(**buff_dest)); status = U_ZERO_ERROR; len_dest = func(*buff_dest, len_dest, buff_source, len_source, - mylocale->info.icu.locale, &status); + locale, &status); if (status == U_BUFFER_OVERFLOW_ERROR) { /* try again with adjusted length */ @@ -1470,7 +1470,7 @@ icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale, *buff_dest = palloc(len_dest * sizeof(**buff_dest)); status = U_ZERO_ERROR; len_dest = func(*buff_dest, len_dest, buff_source, len_source, - mylocale->info.icu.locale, &status); + locale, &status); } if (U_FAILURE(status)) ereport(ERROR, @@ -1528,8 +1528,15 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) else { pg_locale_t mylocale = 0; + char collprovider; + bool use_libc PG_USED_FOR_ASSERTS_ONLY, + use_icu; - if (collid != DEFAULT_COLLATION_OID) + if (collid == DEFAULT_COLLATION_OID) + { + collprovider = get_default_collprovider(); + } + else { if (!OidIsValid(collid)) { @@ -1543,25 +1550,43 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) errhint("Use the COLLATE clause to set the collation explicitly."))); } mylocale = pg_newlocale_from_collation(collid); + collprovider = mylocale->provider; } -#ifdef USE_ICU - if (mylocale && mylocale->provider == COLLPROVIDER_ICU) + use_icu = (collprovider == COLLPROVIDER_ICU && + GetDatabaseEncoding() != PG_SQL_ASCII); + use_libc = (collprovider == COLLPROVIDER_LIBC || + GetDatabaseEncoding() == PG_SQL_ASCII); + Assert(use_libc || use_icu); + + if (use_icu) { +#ifdef USE_ICU int32_t len_uchar; int32_t len_conv; UChar *buff_uchar; UChar *buff_conv; + const char *locale; + + if (mylocale) + locale = mylocale->info.icu.locale; + else + locale = get_icu_default_collate(); len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); - len_conv = icu_convert_case(u_strToLower, mylocale, + len_conv = icu_convert_case(u_strToLower, locale, &buff_conv, buff_uchar, len_uchar); icu_from_uchar(&result, buff_conv, len_conv); pfree(buff_uchar); +#else /* not USE_ICU */ + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", collprovider); +#endif /* not USE_ICU */ } else -#endif { + /* use_libc */ + if (pg_database_encoding_max_length() > 1) { wchar_t *workspace; @@ -1650,8 +1675,15 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) else { pg_locale_t mylocale = 0; + char collprovider; + bool use_libc PG_USED_FOR_ASSERTS_ONLY, + use_icu; - if (collid != DEFAULT_COLLATION_OID) + if (collid == DEFAULT_COLLATION_OID) + { + collprovider = get_default_collprovider(); + } + else { if (!OidIsValid(collid)) { @@ -1665,25 +1697,43 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) errhint("Use the COLLATE clause to set the collation explicitly."))); } mylocale = pg_newlocale_from_collation(collid); + collprovider = mylocale->provider; } -#ifdef USE_ICU - if (mylocale && mylocale->provider == COLLPROVIDER_ICU) + use_icu = (collprovider == COLLPROVIDER_ICU && + GetDatabaseEncoding() != PG_SQL_ASCII); + use_libc = (collprovider == COLLPROVIDER_LIBC || + GetDatabaseEncoding() == PG_SQL_ASCII); + Assert(use_libc || use_icu); + + if (use_icu) { +#ifdef USE_ICU int32_t len_uchar, len_conv; UChar *buff_uchar; UChar *buff_conv; + const char *locale; + + if (mylocale) + locale = mylocale->info.icu.locale; + else + locale = get_icu_default_collate(); len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); - len_conv = icu_convert_case(u_strToUpper, mylocale, + len_conv = icu_convert_case(u_strToUpper, locale, &buff_conv, buff_uchar, len_uchar); icu_from_uchar(&result, buff_conv, len_conv); pfree(buff_uchar); +#else /* not USE_ICU */ + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", collprovider); +#endif /* not USE_ICU */ } else -#endif { + /* use_libc */ + if (pg_database_encoding_max_length() > 1) { wchar_t *workspace; @@ -1773,8 +1823,15 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) else { pg_locale_t mylocale = 0; + char collprovider; + bool use_libc PG_USED_FOR_ASSERTS_ONLY, + use_icu; - if (collid != DEFAULT_COLLATION_OID) + if (collid == DEFAULT_COLLATION_OID) + { + collprovider = get_default_collprovider(); + } + else { if (!OidIsValid(collid)) { @@ -1788,25 +1845,43 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) errhint("Use the COLLATE clause to set the collation explicitly."))); } mylocale = pg_newlocale_from_collation(collid); + collprovider = mylocale->provider; } -#ifdef USE_ICU - if (mylocale && mylocale->provider == COLLPROVIDER_ICU) + use_icu = (collprovider == COLLPROVIDER_ICU && + GetDatabaseEncoding() != PG_SQL_ASCII); + use_libc = (collprovider == COLLPROVIDER_LIBC || + GetDatabaseEncoding() == PG_SQL_ASCII); + Assert(use_libc || use_icu); + + if (use_icu) { +#ifdef USE_ICU int32_t len_uchar, len_conv; UChar *buff_uchar; UChar *buff_conv; + const char *locale; + + if (mylocale) + locale = mylocale->info.icu.locale; + else + locale = get_icu_default_collate(); len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); - len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale, + len_conv = icu_convert_case(u_strToTitle_default_BI, locale, &buff_conv, buff_uchar, len_uchar); icu_from_uchar(&result, buff_conv, len_conv); pfree(buff_uchar); +#else /* not USE_ICU */ + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", collprovider); +#endif /* not USE_ICU */ } else -#endif { + /* use_libc */ + if (pg_database_encoding_max_length() > 1) { wchar_t *workspace; diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index ff716c5..28ea64f 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -167,6 +167,9 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) plen; pg_locale_t locale = 0; bool locale_is_c = false; + char collprovider = COLLPROVIDER_LIBC; + bool use_libc PG_USED_FOR_ASSERTS_ONLY; + bool use_icu; if (lc_ctype_is_c(collation)) locale_is_c = true; @@ -184,7 +187,18 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) errhint("Use the COLLATE clause to set the collation explicitly."))); } locale = pg_newlocale_from_collation(collation); + collprovider = locale->provider; } + else + { + collprovider = get_default_collprovider(); + } + + use_icu = (collprovider == COLLPROVIDER_ICU && + GetDatabaseEncoding() != PG_SQL_ASCII); + use_libc = (collprovider == COLLPROVIDER_LIBC || + GetDatabaseEncoding() == PG_SQL_ASCII); + Assert(use_libc || use_icu); /* * For efficiency reasons, in the single byte case we don't call lower() @@ -194,7 +208,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) * way. */ - if (pg_database_encoding_max_length() > 1 || (locale && locale->provider == COLLPROVIDER_ICU)) + if (pg_database_encoding_max_length() > 1 || use_icu) { /* lower's result is never packed, so OK to use old macros here */ pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation, diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index a3dc3be..5d7c66b 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -56,7 +56,10 @@ #include "access/htup_details.h" #include "catalog/pg_collation.h" #include "catalog/pg_control.h" +#include "catalog/pg_database.h" +#include "common/pg_collation_fn_common.h" #include "mb/pg_wchar.h" +#include "miscadmin.h" #include "utils/builtins.h" #include "utils/hsearch.h" #include "utils/lsyscache.h" @@ -132,6 +135,10 @@ static HTAB *collation_cache = NULL; static char *IsoLocaleName(const char *); /* MSVC specific */ #endif +#ifdef USE_ICU +static char *check_icu_locale(const char *locale); +#endif + /* * pg_perm_setlocale @@ -146,13 +153,45 @@ static char *IsoLocaleName(const char *); /* MSVC specific */ * also be unset to fully ensure that, but that has to be done elsewhere after * all the individual LC_XXX variables have been set correctly. (Thank you * Perl for making this kluge necessary.) + * + * Set collprovider to '\0' if category is not LC_COLLATE. */ -char * -pg_perm_setlocale(int category, const char *locale) +const char * +pg_perm_setlocale(int category, const char *locale, char collprovider) { - char *result; + const char *result; const char *envvar; char *envbuf; + bool use_libc PG_USED_FOR_ASSERTS_ONLY = + category != LC_COLLATE || collprovider == COLLPROVIDER_LIBC; + bool use_icu = + category == LC_COLLATE && collprovider == COLLPROVIDER_ICU; + + Assert(use_libc || use_icu); + + if (use_icu) + { +#ifdef USE_ICU + UErrorCode status = U_ZERO_ERROR; + char *icu_locale = check_icu_locale(locale); + + if (icu_locale == NULL && locale != NULL) + return NULL; /* fall out immediately on failure */ + + uloc_setDefault(icu_locale, &status); + if (U_FAILURE(status)) + return NULL; /* fall out immediately on failure */ + + result = uloc_getDefault(); + if (icu_locale) + pfree(icu_locale); + return result; +#else /* not USE_ICU */ + return NULL; /* fall out immediately on failure */ +#endif /* not USE_ICU */ + } + + /* use libc */ #ifndef WIN32 result = setlocale(category, locale); @@ -167,7 +206,7 @@ pg_perm_setlocale(int category, const char *locale) #ifdef LC_MESSAGES if (category == LC_MESSAGES) { - result = (char *) locale; + result = locale; if (locale == NULL || locale[0] == '\0') return result; } @@ -218,7 +257,7 @@ pg_perm_setlocale(int category, const char *locale) #ifdef WIN32 result = IsoLocaleName(locale); if (result == NULL) - result = (char *) locale; + result = locale; #endif /* WIN32 */ break; #endif /* LC_MESSAGES */ @@ -259,34 +298,102 @@ pg_perm_setlocale(int category, const char *locale) * it seems that on most implementations that's the only thing it's good for; * we could wish that setlocale gave back a canonically spelled version of * the locale name, but typically it doesn't.) + * + * Set collprovider to '\0' if category is not LC_COLLATE. */ bool -check_locale(int category, const char *locale, char **canonname) +check_locale(int category, const char *locale, char **canonname, + char collprovider) { - char *save; - char *res; + const char *save; + const char *res; + char *save_dup; + bool use_libc PG_USED_FOR_ASSERTS_ONLY = + category != LC_COLLATE || collprovider == COLLPROVIDER_LIBC; + bool use_icu = + category == LC_COLLATE && collprovider == COLLPROVIDER_ICU; +#ifdef USE_ICU + UErrorCode status; + char *icu_locale; +#endif + + Assert(use_libc || use_icu); if (canonname) *canonname = NULL; /* in case of failure */ - save = setlocale(category, NULL); - if (!save) - return false; /* won't happen, we hope */ +#ifndef USE_ICU + /* cannot use icu functions */ + if (use_icu) + return false; +#endif + +#ifdef USE_ICU + if (use_icu) + { + save = uloc_getDefault(); + if (!save) + return false; /* won't happen, we hope */ + } + else +#endif + { + /* use_libc */ + save = setlocale(category, NULL); + if (!save) + return false; /* won't happen, we hope */ + } /* save may be pointing at a modifiable scratch variable, see above. */ - save = pstrdup(save); + save_dup = pstrdup(save); /* set the locale with setlocale, to see if it accepts it. */ - res = setlocale(category, locale); +#ifdef USE_ICU + if (use_icu) + { + icu_locale = check_icu_locale(locale); + + if (icu_locale == NULL && locale != NULL) + return false; /* won't happen, we hope */ + + status = U_ZERO_ERROR; + uloc_setDefault(icu_locale, &status); + if (U_FAILURE(status)) + return false; /* won't happen, we hope */ + + res = uloc_getDefault(); + if (icu_locale) + pfree(icu_locale); + } + else +#endif + { + /* use_libc */ + res = setlocale(category, locale); + } /* save canonical name if requested. */ if (res && canonname) *canonname = pstrdup(res); /* restore old value. */ - if (!setlocale(category, save)) - elog(WARNING, "failed to restore old locale \"%s\"", save); - pfree(save); +#ifdef USE_ICU + if (use_icu) + { + status = U_ZERO_ERROR; + uloc_setDefault(save_dup, &status); + if (U_FAILURE(status)) + elog(WARNING, "ICU error: failed to restore old locale \"%s\"", + save_dup); + } + else +#endif + { + /* use_libc */ + if (!setlocale(category, save_dup)) + elog(WARNING, "failed to restore old locale \"%s\"", save_dup); + } + pfree(save_dup); return (res != NULL); } @@ -306,7 +413,7 @@ check_locale(int category, const char *locale, char **canonname) bool check_locale_monetary(char **newval, void **extra, GucSource source) { - return check_locale(LC_MONETARY, *newval, NULL); + return check_locale(LC_MONETARY, *newval, NULL, '\0'); } void @@ -318,7 +425,7 @@ assign_locale_monetary(const char *newval, void *extra) bool check_locale_numeric(char **newval, void **extra, GucSource source) { - return check_locale(LC_NUMERIC, *newval, NULL); + return check_locale(LC_NUMERIC, *newval, NULL, '\0'); } void @@ -330,7 +437,7 @@ assign_locale_numeric(const char *newval, void *extra) bool check_locale_time(char **newval, void **extra, GucSource source) { - return check_locale(LC_TIME, *newval, NULL); + return check_locale(LC_TIME, *newval, NULL, '\0'); } void @@ -366,7 +473,7 @@ check_locale_messages(char **newval, void **extra, GucSource source) * On Windows, we can't even check the value, so accept blindly */ #if defined(LC_MESSAGES) && !defined(WIN32) - return check_locale(LC_MESSAGES, *newval, NULL); + return check_locale(LC_MESSAGES, *newval, NULL, '\0'); #else return true; #endif @@ -380,7 +487,7 @@ assign_locale_messages(const char *newval, void *extra) * We ignore failure, as per comment above. */ #ifdef LC_MESSAGES - (void) pg_perm_setlocale(LC_MESSAGES, newval); + (void) pg_perm_setlocale(LC_MESSAGES, newval, '\0'); #endif } @@ -1096,21 +1203,14 @@ lookup_collation_cache(Oid collation, bool set_flags) /* Attempt to set the flags */ HeapTuple tp; Form_pg_collation collform; - const char *collcollate; - const char *collctype; tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation)); if (!HeapTupleIsValid(tp)) elog(ERROR, "cache lookup failed for collation %u", collation); collform = (Form_pg_collation) GETSTRUCT(tp); - collcollate = NameStr(collform->collcollate); - collctype = NameStr(collform->collctype); - - cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) || - (strcmp(collcollate, "POSIX") == 0)); - cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) || - (strcmp(collctype, "POSIX") == 0)); + cache_entry->collate_is_c = locale_is_c(NameStr(collform->collcollate)); + cache_entry->ctype_is_c = locale_is_c(NameStr(collform->collctype)); cache_entry->flags_valid = true; @@ -1141,20 +1241,28 @@ lc_collate_is_c(Oid collation) if (collation == DEFAULT_COLLATION_OID) { static int result = -1; - char *localeptr; + char collprovider; if (result >= 0) return (bool) result; - localeptr = setlocale(LC_COLLATE, NULL); - if (!localeptr) - elog(ERROR, "invalid LC_COLLATE setting"); - - if (strcmp(localeptr, "C") == 0) - result = true; - else if (strcmp(localeptr, "POSIX") == 0) - result = true; - else + + collprovider = get_default_collprovider(); + Assert(is_valid_nondefault_collprovider(collprovider)); + + if (collprovider == COLLPROVIDER_ICU) + { result = false; + } + else + { + /* COLLPROVIDER_LIBC */ + char *localeptr = setlocale(LC_COLLATE, NULL); + + if (!localeptr) + elog(ERROR, "invalid LC_COLLATE setting"); + + result = locale_is_c(localeptr); + } return (bool) result; } @@ -1191,20 +1299,28 @@ lc_ctype_is_c(Oid collation) if (collation == DEFAULT_COLLATION_OID) { static int result = -1; - char *localeptr; + char collprovider; if (result >= 0) return (bool) result; - localeptr = setlocale(LC_CTYPE, NULL); - if (!localeptr) - elog(ERROR, "invalid LC_CTYPE setting"); - - if (strcmp(localeptr, "C") == 0) - result = true; - else if (strcmp(localeptr, "POSIX") == 0) - result = true; - else + + collprovider = get_default_collprovider(); + Assert(is_valid_nondefault_collprovider(collprovider)); + + if (collprovider == COLLPROVIDER_ICU) + { result = false; + } + else + { + /* COLLPROVIDER_LIBC */ + char *localeptr = setlocale(LC_CTYPE, NULL); + + if (!localeptr) + elog(ERROR, "invalid LC_CTYPE setting"); + + result = locale_is_c(localeptr); + } return (bool) result; } @@ -1365,25 +1481,15 @@ pg_newlocale_from_collation(Oid collid) else if (collform->collprovider == COLLPROVIDER_ICU) { #ifdef USE_ICU - UCollator *collator; - UErrorCode status; - if (strcmp(collcollate, collctype) != 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("collations with different collate and ctype values are not supported by ICU"))); - status = U_ZERO_ERROR; - collator = ucol_open(collcollate, &status); - if (U_FAILURE(status)) - ereport(ERROR, - (errmsg("could not open collator for locale \"%s\": %s", - collcollate, u_errorName(status)))); - /* We will leak this string if we get an error below :-( */ result.info.icu.locale = MemoryContextStrdup(TopMemoryContext, collcollate); - result.info.icu.ucol = collator; + result.info.icu.ucol = open_collator(collcollate); #else /* not USE_ICU */ /* could get here if a collation was created by a build with ICU */ ereport(ERROR, @@ -1440,46 +1546,6 @@ pg_newlocale_from_collation(Oid collid) return cache_entry->locale; } -/* - * Get provider-specific collation version string for the given collation from - * the operating system/library. - * - * A particular provider must always either return a non-NULL string or return - * NULL (if it doesn't support versions). It must not return NULL for some - * collcollate and not NULL for others. - */ -char * -get_collation_actual_version(char collprovider, const char *collcollate) -{ - char *collversion; - -#ifdef USE_ICU - if (collprovider == COLLPROVIDER_ICU) - { - UCollator *collator; - UErrorCode status; - UVersionInfo versioninfo; - char buf[U_MAX_VERSION_STRING_LENGTH]; - - status = U_ZERO_ERROR; - collator = ucol_open(collcollate, &status); - if (U_FAILURE(status)) - ereport(ERROR, - (errmsg("could not open collator for locale \"%s\": %s", - collcollate, u_errorName(status)))); - ucol_getVersion(collator, versioninfo); - ucol_close(collator); - - u_versionToString(versioninfo, buf); - collversion = pstrdup(buf); - } - else -#endif - collversion = NULL; - - return collversion; -} - #ifdef USE_ICU /* @@ -1761,3 +1827,125 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, return result; } + +#ifdef USE_ICU +/* + * If locale is "" return the environment value from setlocale(). + * + * Otherwise return a malloc'd copy of locale if it is not NULL. + */ +static char * +check_icu_locale(const char *locale) +{ + char *canonname = NULL; + char *winlocale = NULL; + char *result; + + /* Windows locales can be in the format ".codepage" */ + if (locale && (strlen(locale) == 0 || locale[0] == '.')) + { + check_locale(LC_COLLATE, locale, &canonname, COLLPROVIDER_LIBC); + locale = (const char *) canonname; + } + +#ifdef WIN32 + if (!locale_is_c(locale)) + { + winlocale = check_icu_winlocale(locale); + locale = (const char *) winlocale; + } +#endif + + result = locale ? pstrdup(locale) : NULL; + + if (canonname) + pfree(canonname); + if (winlocale) + pfree(winlocale); + + return result; +} + +/* + * Get the default icu collation. + */ +const char * +get_icu_default_collate(void) +{ + /* Cache the result so we only have to compute it once. */ + static char result[NAMEDATALEN]; + static bool cached = false; + const char *locale, + *collate; + char *langtag; + + if (cached) + return result; + + locale = uloc_getDefault(); + if (!locale) + ereport(ERROR, (errmsg("ICU error: uloc_getDefault() failed"))); + + langtag = get_icu_language_tag(locale); + collate = get_icu_collate(locale, langtag); + + if (strlen(collate) >= NAMEDATALEN) + ereport(FATAL, + (errmsg("the default ICU collation name \"%s\" is too long", collate))); + + strcpy(result, collate); + cached = true; + + pfree(langtag); + return result; +} + +/* + * Get the collator for the default ICU collation. + */ +UCollator * +get_default_collation_collator(void) +{ + /* Cache the result so we only have to compute it once. */ + static UCollator *collator = NULL; + + if (collator) + return collator; + + collator = open_collator(get_icu_default_collate()); + return collator; +} +#endif /* USE_ICU */ + +/* + * Get the default collation provider. + */ +char +get_default_collprovider(void) +{ + /* Cache the result so we only have to compute it once. */ + static char result = '\0'; + HeapTuple tp; + Form_pg_database dbform; + char *datcollate; + + if (result) + return result; + + tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for database %u", MyDatabaseId); + + dbform = (Form_pg_database) GETSTRUCT(tp); + datcollate = NameStr(dbform->datcollate); + check_locale_collprovider(datcollate, NULL, &result, NULL); + + if (!is_valid_nondefault_collprovider(result)) + /* This could happen when manually creating a mess in the catalogs. */ + ereport(FATAL, + (errmsg("could not find out the collation provider for datcollate \"%s\" of database \"%s\"", + datcollate, NameStr(dbform->datname)))); + + ReleaseSysCache(tp); + return result; +} diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index fcc8323..807311a 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -5689,13 +5689,14 @@ find_join_input_rel(PlannerInfo *root, Relids relids) */ static int pattern_char_isalpha(char c, bool is_multibyte, - pg_locale_t locale, bool locale_is_c) + pg_locale_t locale, char collprovider, bool locale_is_c) { if (locale_is_c) return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); else if (is_multibyte && IS_HIGHBIT_SET(c)) return true; - else if (locale && locale->provider == COLLPROVIDER_ICU) + else if (collprovider == COLLPROVIDER_ICU && + GetDatabaseEncoding() != PG_SQL_ASCII) return IS_HIGHBIT_SET(c) ? true : false; #ifdef HAVE_LOCALE_T else if (locale && locale->provider == COLLPROVIDER_LIBC) @@ -5731,6 +5732,7 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, bool is_multibyte = (pg_database_encoding_max_length() > 1); pg_locale_t locale = 0; bool locale_is_c = false; + char collprovider = COLLPROVIDER_LIBC; /* the right-hand const is type text or bytea */ Assert(typeid == BYTEAOID || typeid == TEXTOID); @@ -5759,6 +5761,11 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, errhint("Use the COLLATE clause to set the collation explicitly."))); } locale = pg_newlocale_from_collation(collation); + collprovider = locale->provider; + } + else + { + collprovider = get_default_collprovider(); } } @@ -5796,7 +5803,8 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, /* Stop if case-varying character (it's sort of a wildcard) */ if (case_insensitive && - pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c)) + pattern_char_isalpha(patt[pos], is_multibyte, locale, + collprovider, locale_is_c)) break; match[match_pos++] = patt[pos]; diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 304cb26..e413e8b 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -1402,8 +1402,15 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) char *a1p, *a2p; pg_locale_t mylocale = 0; + char collprovider; + bool use_libc PG_USED_FOR_ASSERTS_ONLY, + use_icu; - if (collid != DEFAULT_COLLATION_OID) + if (collid == DEFAULT_COLLATION_OID) + { + collprovider = get_default_collprovider(); + } + else { if (!OidIsValid(collid)) { @@ -1417,8 +1424,15 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) errhint("Use the COLLATE clause to set the collation explicitly."))); } mylocale = pg_newlocale_from_collation(collid); + collprovider = mylocale->provider; } + use_icu = (collprovider == COLLPROVIDER_ICU && + GetDatabaseEncoding() != PG_SQL_ASCII); + use_libc = (collprovider == COLLPROVIDER_LIBC || + GetDatabaseEncoding() == PG_SQL_ASCII); + Assert(use_libc || use_icu); + /* * memcmp() can't tell us which of two unequal strings sorts first, * but it's a cheap way to tell if they're equal. Testing shows that @@ -1433,8 +1447,7 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) #ifdef WIN32 /* Win32 does not have UTF-8, so we need to map to UTF-16 */ - if (GetDatabaseEncoding() == PG_UTF8 - && (!mylocale || mylocale->provider == COLLPROVIDER_LIBC)) + if (GetDatabaseEncoding() == PG_UTF8 && use_libc) { int a1len; int a2len; @@ -1536,60 +1549,67 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) memcpy(a2p, arg2, len2); a2p[len2] = '\0'; - if (mylocale) + if (use_icu) { - if (mylocale->provider == COLLPROVIDER_ICU) - { #ifdef USE_ICU + UCollator *collator; + + if (mylocale) + collator = mylocale->info.icu.ucol; + else + collator = get_default_collation_collator(); + #ifdef HAVE_UCOL_STRCOLLUTF8 - if (GetDatabaseEncoding() == PG_UTF8) - { - UErrorCode status; + if (GetDatabaseEncoding() == PG_UTF8) + { + UErrorCode status; - status = U_ZERO_ERROR; - result = ucol_strcollUTF8(mylocale->info.icu.ucol, - arg1, len1, - arg2, len2, - &status); - if (U_FAILURE(status)) - ereport(ERROR, - (errmsg("collation failed: %s", u_errorName(status)))); - } - else + status = U_ZERO_ERROR; + result = ucol_strcollUTF8(collator, + arg1, len1, + arg2, len2, + &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("collation failed: %s", u_errorName(status)))); + } + else #endif - { - int32_t ulen1, - ulen2; - UChar *uchar1, - *uchar2; + { + int32_t ulen1, + ulen2; + UChar *uchar1, + *uchar2; - ulen1 = icu_to_uchar(&uchar1, arg1, len1); - ulen2 = icu_to_uchar(&uchar2, arg2, len2); + ulen1 = icu_to_uchar(&uchar1, arg1, len1); + ulen2 = icu_to_uchar(&uchar2, arg2, len2); - result = ucol_strcoll(mylocale->info.icu.ucol, - uchar1, ulen1, - uchar2, ulen2); + result = ucol_strcoll(collator, + uchar1, ulen1, + uchar2, ulen2); - pfree(uchar1); - pfree(uchar2); - } + pfree(uchar1); + pfree(uchar2); + } #else /* not USE_ICU */ - /* shouldn't happen */ - elog(ERROR, "unsupported collprovider: %c", mylocale->provider); + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", collprovider); #endif /* not USE_ICU */ - } - else - { + } + else + { + /* use_libc */ + + if (mylocale) #ifdef HAVE_LOCALE_T result = strcoll_l(a1p, a2p, mylocale->info.lt); #else /* shouldn't happen */ - elog(ERROR, "unsupported collprovider: %c", mylocale->provider); + elog(ERROR, "unsupported collprovider: %c", collprovider); #endif - } + else + result = strcoll(a1p, a2p); } - else - result = strcoll(a1p, a2p); /* * In some locales strcoll() can claim that nonidentical strings are @@ -1811,6 +1831,9 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar) bool collate_c = false; VarStringSortSupport *sss; pg_locale_t locale = 0; + char collprovider = '\0'; + bool use_libc PG_USED_FOR_ASSERTS_ONLY = false; + bool use_icu = false; /* * If possible, set ssup->comparator to a function which can be used to @@ -1840,7 +1863,11 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar) * we'll figure out the collation based on the locale id and cache the * result. */ - if (collid != DEFAULT_COLLATION_OID) + if (collid == DEFAULT_COLLATION_OID) + { + collprovider = get_default_collprovider(); + } + else { if (!OidIsValid(collid)) { @@ -1854,8 +1881,15 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar) errhint("Use the COLLATE clause to set the collation explicitly."))); } locale = pg_newlocale_from_collation(collid); + collprovider = locale->provider; } + use_icu = (collprovider == COLLPROVIDER_ICU && + GetDatabaseEncoding() != PG_SQL_ASCII); + use_libc = (collprovider == COLLPROVIDER_LIBC || + GetDatabaseEncoding() == PG_SQL_ASCII); + Assert(use_libc || use_icu); + /* * There is a further exception on Windows. When the database * encoding is UTF-8 and we are not using the C collation, complex @@ -1865,8 +1899,7 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar) * trampoline. ICU locales work just the same on Windows, however. */ #ifdef WIN32 - if (GetDatabaseEncoding() == PG_UTF8 && - !(locale && locale->provider == COLLPROVIDER_ICU)) + if (GetDatabaseEncoding() == PG_UTF8 && use_libc) return; #endif @@ -1895,7 +1928,7 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar) * platforms. */ #ifndef TRUST_STRXFRM - if (!collate_c && !(locale && locale->provider == COLLPROVIDER_ICU)) + if (!collate_c && !use_icu) abbreviate = false; #endif @@ -2037,6 +2070,9 @@ varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup) VarString *arg2 = DatumGetVarStringPP(y); bool arg1_match; VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra; + char collprovider; + bool use_libc PG_USED_FOR_ASSERTS_ONLY, + use_icu; /* working state */ char *a1p, @@ -2130,59 +2166,77 @@ varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup) } if (sss->locale) + collprovider = sss->locale->provider; + else + collprovider = get_default_collprovider(); + + use_icu = (collprovider == COLLPROVIDER_ICU && + GetDatabaseEncoding() != PG_SQL_ASCII); + use_libc = (collprovider == COLLPROVIDER_LIBC || + GetDatabaseEncoding() == PG_SQL_ASCII); + Assert(use_libc || use_icu); + + if (use_icu) { - if (sss->locale->provider == COLLPROVIDER_ICU) - { #ifdef USE_ICU -#ifdef HAVE_UCOL_STRCOLLUTF8 - if (GetDatabaseEncoding() == PG_UTF8) - { - UErrorCode status; + UCollator *collator; - status = U_ZERO_ERROR; - result = ucol_strcollUTF8(sss->locale->info.icu.ucol, - a1p, len1, - a2p, len2, - &status); - if (U_FAILURE(status)) - ereport(ERROR, - (errmsg("collation failed: %s", u_errorName(status)))); - } - else + if (sss->locale) + collator = sss->locale->info.icu.ucol; + else + collator = get_default_collation_collator(); + +#ifdef HAVE_UCOL_STRCOLLUTF8 + if (GetDatabaseEncoding() == PG_UTF8) + { + UErrorCode status; + + status = U_ZERO_ERROR; + result = ucol_strcollUTF8(collator, + a1p, len1, + a2p, len2, + &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("collation failed: %s", u_errorName(status)))); + } + else #endif - { - int32_t ulen1, - ulen2; - UChar *uchar1, - *uchar2; + { + int32_t ulen1, + ulen2; + UChar *uchar1, + *uchar2; - ulen1 = icu_to_uchar(&uchar1, a1p, len1); - ulen2 = icu_to_uchar(&uchar2, a2p, len2); + ulen1 = icu_to_uchar(&uchar1, a1p, len1); + ulen2 = icu_to_uchar(&uchar2, a2p, len2); - result = ucol_strcoll(sss->locale->info.icu.ucol, - uchar1, ulen1, - uchar2, ulen2); + result = ucol_strcoll(collator, + uchar1, ulen1, + uchar2, ulen2); - pfree(uchar1); - pfree(uchar2); - } + pfree(uchar1); + pfree(uchar2); + } #else /* not USE_ICU */ - /* shouldn't happen */ - elog(ERROR, "unsupported collprovider: %c", sss->locale->provider); + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", collprovider); #endif /* not USE_ICU */ - } - else - { + } + else + { + /* use_libc */ + + if (sss->locale) #ifdef HAVE_LOCALE_T result = strcoll_l(sss->buf1, sss->buf2, sss->locale->info.lt); #else /* shouldn't happen */ - elog(ERROR, "unsupported collprovider: %c", sss->locale->provider); + elog(ERROR, "unsupported collprovider: %c", collprovider); #endif - } + else + result = strcoll(sss->buf1, sss->buf2); } - else - result = strcoll(sss->buf1, sss->buf2); /* * In some locales strcoll() can claim that nonidentical strings are @@ -2287,6 +2341,9 @@ varstr_abbrev_convert(Datum original, SortSupport ssup) else { Size bsize; + char collprovider; + bool use_libc PG_USED_FOR_ASSERTS_ONLY, + use_icu; #ifdef USE_ICU int32_t ulen = -1; UChar *uchar = NULL; @@ -2323,10 +2380,20 @@ varstr_abbrev_convert(Datum original, SortSupport ssup) sss->buf1[len] = '\0'; sss->last_len1 = len; + if (sss->locale) + collprovider = sss->locale->provider; + else + collprovider = get_default_collprovider(); + + use_icu = (collprovider == COLLPROVIDER_ICU && + GetDatabaseEncoding() != PG_SQL_ASCII); + use_libc = (collprovider == COLLPROVIDER_LIBC || + GetDatabaseEncoding() == PG_SQL_ASCII); + Assert(use_libc || use_icu); + #ifdef USE_ICU /* When using ICU and not UTF8, convert string to UChar. */ - if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU && - GetDatabaseEncoding() != PG_UTF8) + if (use_icu && GetDatabaseEncoding() != PG_UTF8) ulen = icu_to_uchar(&uchar, sss->buf1, len); #endif @@ -2340,9 +2407,15 @@ varstr_abbrev_convert(Datum original, SortSupport ssup) */ for (;;) { -#ifdef USE_ICU - if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU) + if (use_icu) { +#ifdef USE_ICU + UCollator *collator; + + if (sss->locale) + collator = sss->locale->info.icu.ucol; + else + collator = get_default_collation_collator(); /* * When using UTF8, use the iteration interface so we only * need to produce as many bytes as we actually need. @@ -2356,7 +2429,7 @@ varstr_abbrev_convert(Datum original, SortSupport ssup) uiter_setUTF8(&iter, sss->buf1, len); state[0] = state[1] = 0; /* won't need that again */ status = U_ZERO_ERROR; - bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol, + bsize = ucol_nextSortKeyPart(collator, &iter, state, (uint8_t *) sss->buf2, @@ -2368,19 +2441,26 @@ varstr_abbrev_convert(Datum original, SortSupport ssup) u_errorName(status)))); } else - bsize = ucol_getSortKey(sss->locale->info.icu.ucol, + bsize = ucol_getSortKey(collator, uchar, ulen, (uint8_t *) sss->buf2, sss->buflen2); +#else /* not USE_ICU */ + /* shouldn't happen */ + elog(ERROR, "unsupported collprovider: %c", collprovider); +#endif /* not USE_ICU */ } else -#endif + { + /* use_libc */ + #ifdef HAVE_LOCALE_T - if (sss->locale && sss->locale->provider == COLLPROVIDER_LIBC) - bsize = strxfrm_l(sss->buf2, sss->buf1, - sss->buflen2, sss->locale->info.lt); - else + if (sss->locale) + bsize = strxfrm_l(sss->buf2, sss->buf1, + sss->buflen2, sss->locale->info.lt); + else #endif - bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2); + bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2); + } sss->last_len2 = bsize; if (bsize < sss->buflen2) diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 4846289..9f588ba 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -29,9 +29,11 @@ #include "catalog/indexing.h" #include "catalog/namespace.h" #include "catalog/pg_authid.h" +#include "catalog/pg_collation.h" #include "catalog/pg_database.h" #include "catalog/pg_db_role_setting.h" #include "catalog/pg_tablespace.h" +#include "common/pg_collation_fn_common.h" #include "libpq/auth.h" #include "libpq/libpq-be.h" #include "mb/pg_wchar.h" @@ -296,6 +298,13 @@ CheckMyDatabase(const char *name, bool am_superuser) Form_pg_database dbform; char *collate; char *ctype; + char *datcollate; + char collprovider; + char *collversion; + char *wincollate = NULL; + char *langtag = NULL; + const char *collcollate; + char *actual_versionstr; /* Fetch our pg_database row normally, via syscache */ tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId)); @@ -377,27 +386,124 @@ CheckMyDatabase(const char *name, bool am_superuser) PGC_BACKEND, PGC_S_DYNAMIC_DEFAULT); /* assign locale variables */ - collate = NameStr(dbform->datcollate); ctype = NameStr(dbform->datctype); + datcollate = NameStr(dbform->datcollate); + check_locale_collprovider(datcollate, &collate, &collprovider, + &collversion); - if (pg_perm_setlocale(LC_COLLATE, collate) == NULL) + if (!is_valid_nondefault_collprovider(collprovider)) + /* This could happen when manually creating a mess in the catalogs. */ + ereport(FATAL, + (errmsg("could not find out the collation provider for datcollate \"%s\" of database \"%s\"", + datcollate, name))); + +#ifndef USE_ICU + if (collprovider == COLLPROVIDER_ICU) + ereport(FATAL, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ICU is not supported in this build"), \ + errhint("Recreate the database with libc locale or rebuild PostgreSQL using --with-icu."))); +#endif + + /* we always check lc_collate for libc */ + if (pg_perm_setlocale(LC_COLLATE, collate, COLLPROVIDER_LIBC) == NULL) ereport(FATAL, (errmsg("database locale is incompatible with operating system"), - errdetail("The database was initialized with LC_COLLATE \"%s\", " - " which is not recognized by setlocale().", collate), + errdetail("The database was initialized with LC_COLLATE \"%s\" (provider \"%s\"), " + " which is not recognized by setlocale().", + collate, get_collprovider_name(COLLPROVIDER_LIBC)), errhint("Recreate the database with another locale or install the missing locale."))); - if (pg_perm_setlocale(LC_CTYPE, ctype) == NULL) + /* check lc_collate and lc_ctype for icu if we need it */ + if (collprovider == COLLPROVIDER_ICU) + { + if (pg_perm_setlocale(LC_COLLATE, collate, collprovider) == NULL) + ereport(FATAL, + (errmsg("database locale is incompatible with operating system"), + errdetail("The database was initialized with LC_COLLATE \"%s\" (provider \"%s\"), " + " which is not recognized by uloc_setDefault().", + collate, get_collprovider_name(collprovider)), + errhint("Recreate the database with another locale or install the missing locale."))); + + /* This could happen when manually creating a mess in the catalogs. */ + if (strcmp(collate, ctype) != 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("collations with different collate and ctype values are not supported by ICU"))); + } + + if (pg_perm_setlocale(LC_CTYPE, ctype, '\0') == NULL) ereport(FATAL, (errmsg("database locale is incompatible with operating system"), errdetail("The database was initialized with LC_CTYPE \"%s\", " " which is not recognized by setlocale().", ctype), errhint("Recreate the database with another locale or install the missing locale."))); + /* get the actual version of the collation */ + +#ifdef USE_ICU + if (collprovider == COLLPROVIDER_ICU) + { + collcollate = (const char *) collate; +#ifdef WIN32 + if (!locale_is_c(collcollate)) + { + wincollate = check_icu_winlocale(collcollate); + collcollate = (const char *) wincollate; + } +#endif /* WIN32 */ + langtag = get_icu_language_tag(collcollate); + collcollate = get_icu_collate(collcollate, langtag); + } + else +#endif /* USE_ICU */ + { + /* COLLPROVIDER_LIBC */ + collcollate = (const char *) collate; + } + + actual_versionstr = get_collation_actual_version(collprovider, collcollate); + + /* + * Check the collation version (this matches the version checking in the + * function pg_newlocale_from_collation()) + */ + if (collversion) + { + if (!actual_versionstr) + { + /* + * This could happen when manually creating a mess in the catalogs. + */ + ereport(ERROR, + (errmsg("collation \"%s\" (provider \"%s\") has no actual version, but a version was specified", + collate, get_collprovider_name(collprovider)))); + } + + if (strcmp(actual_versionstr, collversion) != 0) + ereport(ERROR, + (errmsg("collation \"%s\" (provider \"%s\") has version mismatch", + collate, get_collprovider_name(collprovider)), + errdetail("The collation in the database was created using version %s, " + "but the operating system provides version %s.", + collversion, actual_versionstr), + errhint("Build PostgreSQL with the right library version."))); + } + /* Make the locale settings visible as GUC variables, too */ - SetConfigOption("lc_collate", collate, PGC_INTERNAL, PGC_S_OVERRIDE); + SetConfigOption("lc_collate", datcollate, PGC_INTERNAL, PGC_S_OVERRIDE); SetConfigOption("lc_ctype", ctype, PGC_INTERNAL, PGC_S_OVERRIDE); + pfree(collate); + if (collversion) + pfree(collversion); + if (langtag) + pfree(langtag); + if (actual_versionstr) + pfree(actual_versionstr); + if (wincollate) + pfree(wincollate); + check_strxfrm_bug(); ReleaseSysCache(tup); diff --git a/src/backend/utils/mb/encnames.c b/src/backend/utils/mb/encnames.c index 12b61cd..1e75257 100644 --- a/src/backend/utils/mb/encnames.c +++ b/src/backend/utils/mb/encnames.c @@ -403,8 +403,6 @@ const pg_enc2gettext pg_enc2gettext_tbl[] = }; -#ifndef FRONTEND - /* * Table of encoding names for ICU * @@ -457,6 +455,7 @@ is_encoding_supported_by_icu(int encoding) return (pg_enc2icu_tbl[encoding] != NULL); } +#ifndef FRONTEND const char * get_encoding_name_for_icu(int encoding) { @@ -475,7 +474,6 @@ get_encoding_name_for_icu(int encoding) return icu_encoding_name; } - #endif /* not FRONTEND */ diff --git a/src/bin/initdb/Makefile b/src/bin/initdb/Makefile index dae3daf..27415b8 100644 --- a/src/bin/initdb/Makefile +++ b/src/bin/initdb/Makefile @@ -19,7 +19,7 @@ include $(top_builddir)/src/Makefile.global override CPPFLAGS := -DFRONTEND -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(CPPFLAGS) # note: we need libpq only because fe_utils does -override LDFLAGS := -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) $(LDFLAGS) +override LDFLAGS := -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) $(ICU_LIBS) $(LDFLAGS) # use system timezone data? ifneq (,$(with_system_tzdata)) diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 2efd3b7..2d3b90d 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -55,6 +55,10 @@ #include #include +#ifdef USE_ICU +#include +#endif + #ifdef HAVE_SHM_OPEN #include "sys/mman.h" #endif @@ -65,6 +69,7 @@ #include "catalog/pg_class.h" #include "catalog/pg_collation.h" #include "common/file_utils.h" +#include "common/pg_collation_fn_common.h" #include "common/restricted_token.h" #include "common/username.h" #include "fe_utils/string_utils.h" @@ -144,6 +149,8 @@ static bool data_checksums = false; static char *xlog_dir = NULL; static char *str_wal_segment_size_mb = NULL; static int wal_segment_size_mb; +static char collprovider = '\0'; +static char *collversion = NULL; /* internal vars */ @@ -267,10 +274,15 @@ static void check_ok(void); static char *escape_quotes(const char *src); static int locale_date_order(const char *locale); static void check_locale_name(int category, const char *locale, - char **canonname); -static bool check_locale_encoding(const char *locale, int encoding); + char **canonname, char collprovider); +static bool check_locale_encoding(const char *locale, int encoding, + char collprovider); static void setlocales(void); static void usage(const char *progname); +#ifdef USE_ICU +static char *check_icu_locale_name(const char *locale); +#endif +static void set_collation_version(void); void setup_pgdata(void); void setup_bin_paths(const char *argv0); void setup_data_file_paths(void); @@ -1317,10 +1329,27 @@ bootstrap_template1(void) char **bki_lines; char headerline[MAXPGPATH]; char buf[64]; + char *lc_collate_full_name; printf(_("running bootstrap script ... ")); fflush(stdout); + Assert(lc_collate); + + lc_collate_full_name = get_full_collation_name(lc_collate, collprovider, + collversion); + + if (!lc_collate_full_name) + exit(1); /* get_full_collation_name printed the error */ + + if (strlen(lc_collate_full_name) >= NAMEDATALEN) + { + fprintf(stderr, + _("%s: the full collation name \"%s\" is too long\n"), + progname, lc_collate_full_name); + exit(1); + } + bki_lines = readfile(bki_file); /* Check that bki file appears to be of the right version */ @@ -1359,7 +1388,8 @@ bootstrap_template1(void) bki_lines = replace_token(bki_lines, "ENCODING", encodingid_to_string(encodingid)); - bki_lines = replace_token(bki_lines, "LC_COLLATE", escape_quotes(lc_collate)); + bki_lines = replace_token(bki_lines, "LC_COLLATE", + escape_quotes(lc_collate_full_name)); bki_lines = replace_token(bki_lines, "LC_CTYPE", escape_quotes(lc_ctype)); @@ -1400,6 +1430,7 @@ bootstrap_template1(void) PG_CMD_CLOSE; free(bki_lines); + free(lc_collate_full_name); check_ok(); } @@ -2143,53 +2174,143 @@ locale_date_order(const char *locale) * the locale name, but typically it doesn't.) * * this should match the backend's check_locale() function + * + * Set collprovider to '\0' if category is not LC_COLLATE. */ static void -check_locale_name(int category, const char *locale, char **canonname) +check_locale_name(int category, const char *locale, char **canonname, + char collprovider) { - char *save; - char *res; + const char *save; + const char *res; + char *save_dup; + bool use_libc PG_USED_FOR_ASSERTS_ONLY = + category != LC_COLLATE || collprovider == COLLPROVIDER_LIBC; + bool use_icu = + category == LC_COLLATE && collprovider == COLLPROVIDER_ICU; + bool failure = false; +#ifdef USE_ICU + UErrorCode status; + char *icu_locale; +#endif - if (canonname) - *canonname = NULL; /* in case of failure */ + Assert(use_libc || use_icu); - save = setlocale(category, NULL); - if (!save) +#ifndef USE_ICU + if (use_icu) { - fprintf(stderr, _("%s: setlocale() failed\n"), + fprintf(stderr, + _("%s: ICU is not supported in this build\n" + "You need to rebuild PostgreSQL using --with-icu.\n"), progname); exit(1); } +#endif + + if (canonname) + *canonname = NULL; /* in case of failure */ + +#ifdef USE_ICU + if (use_icu) + { + save = uloc_getDefault(); + if (!save) + { + fprintf(stderr, _("%s: ICU error: uloc_getDefault() failed\n"), + progname); + exit(1); + } + } + else +#endif + { + /* use_libc */ + save = setlocale(category, NULL); + if (!save) + { + fprintf(stderr, _("%s: setlocale() failed\n"), + progname); + exit(1); + } + } /* save may be pointing at a modifiable scratch variable, so copy it. */ - save = pg_strdup(save); + save_dup = pg_strdup(save); /* for setlocale() call */ if (!locale) locale = ""; /* set the locale with setlocale, to see if it accepts it. */ - res = setlocale(category, locale); +#ifdef USE_ICU + if (use_icu) + { + icu_locale = check_icu_locale_name(locale); + if (icu_locale == NULL && locale != NULL) + { + failure = true; + res = NULL; + } + else + { + status = U_ZERO_ERROR; + uloc_setDefault(icu_locale, &status); + res = uloc_getDefault(); + failure = (U_FAILURE(status) || res == NULL); + if (icu_locale) + pfree(icu_locale); + } + } + else +#endif + { + /* use_libc */ + res = setlocale(category, locale); + failure = (res == NULL); + } /* save canonical name if requested. */ if (res && canonname) *canonname = pg_strdup(res); /* restore old value. */ - if (!setlocale(category, save)) +#ifdef USE_ICU + if (use_icu) { - fprintf(stderr, _("%s: failed to restore old locale \"%s\"\n"), - progname, save); - exit(1); + status = U_ZERO_ERROR; + uloc_setDefault(save_dup, &status); + if (U_FAILURE(status)) + { + fprintf(stderr, _("%s: ICU error: failed to restore old locale \"%s\"\n"), + progname, save_dup); + exit(1); + } } - free(save); + else +#endif + { + /* use_libc */ + if (!setlocale(category, save_dup)) + { + fprintf(stderr, _("%s: failed to restore old locale \"%s\"\n"), + progname, save_dup); + exit(1); + } + } + free(save_dup); /* complain if locale wasn't valid */ - if (res == NULL) + if (failure) { if (*locale) - fprintf(stderr, _("%s: invalid locale name \"%s\"\n"), - progname, locale); + { + if (category == LC_COLLATE) + fprintf(stderr, _("%s: invalid locale name \"%s\" (provider \"%s\")\n"), + progname, locale, get_collprovider_name(collprovider)); + else + fprintf(stderr, _("%s: invalid locale name \"%s\"\n"), + progname, locale); + } else { /* @@ -2211,9 +2332,11 @@ check_locale_name(int category, const char *locale, char **canonname) * check if the chosen encoding matches the encoding required by the locale * * this should match the similar check in the backend createdb() function + * + * Set collprovider to '\0' if category is not LC_COLLATE. */ static bool -check_locale_encoding(const char *locale, int user_enc) +check_locale_encoding(const char *locale, int user_enc, char collprovider) { int locale_enc; @@ -2240,6 +2363,25 @@ check_locale_encoding(const char *locale, int user_enc) progname); return false; } + + if (collprovider == COLLPROVIDER_ICU) + { +#ifdef USE_ICU + if (!is_encoding_supported_by_icu(user_enc)) + { + fprintf(stderr, _("%s: selected encoding (%s) is not supported for ICU locales\n"), + progname, pg_encoding_to_char(user_enc)); + return false; + } +#else /* not USE_ICU */ + fprintf(stderr, + _("%s: ICU is not supported in this build\n" + "You need to rebuild PostgreSQL using --with-icu.\n"), + progname); + exit(1); +#endif /* not USE_ICU */ + } + return true; } @@ -2251,16 +2393,22 @@ check_locale_encoding(const char *locale, int user_enc) static void setlocales(void) { - char *canonname; - - /* set empty lc_* values to locale config if set */ + char *canonname = NULL; if (locale) { + /* + * Set up the collation provider if possible and canonicalize the locale + * name. + */ + check_locale_collprovider(locale, &canonname, &collprovider, NULL); + if (!canonname) + exit(1); /* check_locale_collprovider printed the error */ + locale = canonname; + + /* set empty lc_* values to locale config if set */ if (!lc_ctype) lc_ctype = locale; - if (!lc_collate) - lc_collate = locale; if (!lc_numeric) lc_numeric = locale; if (!lc_time) @@ -2271,29 +2419,83 @@ setlocales(void) lc_messages = locale; } + if (lc_collate) + { + /* + * Set up the collation provider if possible and canonicalize the locale + * name. + */ + check_locale_collprovider(lc_collate, &canonname, &collprovider, NULL); + if (!canonname) + exit(1); /* check_locale_collprovider printed the error */ + lc_collate = canonname; + } + else if (canonname) + { + /* we have already canonicalized the locale name */ + lc_collate = pstrdup(canonname); + } + /* * canonicalize locale names, and obtain any missing values from our * current environment */ - check_locale_name(LC_CTYPE, lc_ctype, &canonname); + check_locale_name(LC_CTYPE, lc_ctype, &canonname, '\0'); lc_ctype = canonname; - check_locale_name(LC_COLLATE, lc_collate, &canonname); + + /* we always check lc_collate for libc */ + check_locale_name(LC_COLLATE, lc_collate, &canonname, COLLPROVIDER_LIBC); + if (lc_collate) + pfree(lc_collate); lc_collate = canonname; - check_locale_name(LC_NUMERIC, lc_numeric, &canonname); + + /* determine the collation provider if we haven't already done it */ + if (!is_valid_nondefault_collprovider(collprovider)) + { +#ifdef USE_ICU + if (!locale_is_c(lc_collate)) + { + collprovider = COLLPROVIDER_ICU; + } + else +#endif + { + collprovider = COLLPROVIDER_LIBC; + } + } + + Assert(is_valid_nondefault_collprovider(collprovider)); + + /* check lc_collate and lc_ctype for icu if we need it */ + if (collprovider == COLLPROVIDER_ICU) + { + check_locale_name(LC_COLLATE, lc_collate, NULL, collprovider); + if (strcmp(lc_collate, lc_ctype) != 0) + { + fprintf(stderr, + _("%s: collations with different collate and ctype values are not supported by ICU\n"), + progname); + exit(1); + } + } + + check_locale_name(LC_NUMERIC, lc_numeric, &canonname, '\0'); lc_numeric = canonname; - check_locale_name(LC_TIME, lc_time, &canonname); + check_locale_name(LC_TIME, lc_time, &canonname, '\0'); lc_time = canonname; - check_locale_name(LC_MONETARY, lc_monetary, &canonname); + check_locale_name(LC_MONETARY, lc_monetary, &canonname, '\0'); lc_monetary = canonname; #if defined(LC_MESSAGES) && !defined(WIN32) - check_locale_name(LC_MESSAGES, lc_messages, &canonname); + check_locale_name(LC_MESSAGES, lc_messages, &canonname, '\0'); lc_messages = canonname; #else /* when LC_MESSAGES is not available, use the LC_CTYPE setting */ - check_locale_name(LC_CTYPE, lc_messages, &canonname); + check_locale_name(LC_CTYPE, lc_messages, &canonname, '\0'); lc_messages = canonname; #endif + + set_collation_version(); } /* @@ -2510,6 +2712,9 @@ setup_locale_encoding(void) lc_time); } + printf(_("The default collation provider is \"%s\".\n"), + get_collprovider_name(collprovider)); + if (!encoding) { int ctype_enc; @@ -2560,8 +2765,8 @@ setup_locale_encoding(void) else encodingid = get_encoding_id(encoding); - if (!check_locale_encoding(lc_ctype, encodingid) || - !check_locale_encoding(lc_collate, encodingid)) + if (!check_locale_encoding(lc_ctype, encodingid, '\0') || + !check_locale_encoding(lc_collate, encodingid, collprovider)) exit(1); /* check_locale_encoding printed the error */ } @@ -3321,3 +3526,113 @@ main(int argc, char *argv[]) return 0; } + +#ifdef USE_ICU +/* + * If locale is "" return the environment value from setlocale(). + * + * Otherwise return a malloc'd copy of locale if it is not NULL. + * + * This should match the backend's check_icu_locale() function. + */ +static char * +check_icu_locale_name(const char *locale) +{ + char *canonname = NULL; + char *winlocale = NULL; + char *result; + + /* Windows locales can be in the format ".codepage" */ + if (locale && (strlen(locale) == 0 || locale[0] == '.')) + { + check_locale_name(LC_COLLATE, locale, &canonname, COLLPROVIDER_LIBC); + locale = (const char *) canonname; + } + +#ifdef WIN32 + if (!locale_is_c(locale)) + { + winlocale = check_icu_winlocale(locale); + + if (winlocale == NULL && locale != NULL) + exit(1); /* check_icu_winlocale printed the error */ + else + locale = winlocale; + } +#endif + + result = locale ? pstrdup(locale) : NULL; + + if (canonname) + pfree(canonname); + if (winlocale) + pfree(winlocale); + + return result; +} +#endif /* USE_ICU */ + +/* + * Setup the lc_collate version (get it from the collation provider). + */ +static void +set_collation_version(void) +{ + char *wincollate = NULL; + char *langtag = NULL; + const char *collate; + bool failure; + + Assert(lc_collate); + Assert(is_valid_nondefault_collprovider(collprovider)); + + if (collprovider == COLLPROVIDER_ICU) + { +#ifdef USE_ICU + collate = (const char *) lc_collate; + +#ifdef WIN32 + if (!locale_is_c(collate)) + { + wincollate = check_icu_winlocale(collate); + + if (wincollate == NULL && collate != NULL) + exit(1); /* check_icu_winlocale printed the error */ + else + collate = (const char *) wincollate; + } +#endif /* WIN32 */ + + langtag = get_icu_language_tag(collate); + if (!langtag) + { + /* get_icu_language_tag printed the main error message */ + fprintf(stderr, _("Rerun %s with a different locale selection.\n"), + progname); + exit(1); + } + collate = get_icu_collate(collate, langtag); +#else /* not USE_ICU */ + fprintf(stderr, + _("%s: ICU is not supported in this build\n" + "You need to rebuild PostgreSQL using --with-icu.\n"), + progname); + exit(1); +#endif /* not USE_ICU */ + } + else + { + /* COLLPROVIDER_LIBC */ + collate = (const char *) lc_collate; + } + + get_collation_actual_version(collprovider, collate, &collversion, &failure); + if (failure) + /* get_collation_actual_version printed the error */ + exit(1); + + if (langtag) + free(langtag); + if (wincollate) + free(wincollate); +} diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 8ca83c0..ca3b138 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -47,12 +47,14 @@ #include "catalog/pg_attribute.h" #include "catalog/pg_cast.h" #include "catalog/pg_class.h" +#include "catalog/pg_collation.h" #include "catalog/pg_default_acl.h" #include "catalog/pg_largeobject.h" #include "catalog/pg_largeobject_metadata.h" #include "catalog/pg_proc.h" #include "catalog/pg_trigger.h" #include "catalog/pg_type.h" +#include "common/pg_collation_fn_common.h" #include "libpq/libpq-fs.h" #include "dumputils.h" @@ -13420,9 +13422,10 @@ dumpCollation(Archive *fout, CollInfo *collinfo) int i_collprovider; int i_collcollate; int i_collctype; - const char *collprovider; + const char *collproviderstr; const char *collcollate; const char *collctype; + const char *collprovider_name; /* Skip if not to be dumped */ if (!collinfo->dobj.dump || dopt->dataOnly) @@ -13462,11 +13465,21 @@ dumpCollation(Archive *fout, CollInfo *collinfo) i_collcollate = PQfnumber(res, "collcollate"); i_collctype = PQfnumber(res, "collctype"); - collprovider = PQgetvalue(res, 0, i_collprovider); + collproviderstr = PQgetvalue(res, 0, i_collprovider); collcollate = PQgetvalue(res, 0, i_collcollate); collctype = PQgetvalue(res, 0, i_collctype); /* + * Use COLLPROVIDER_DEFAULT to allow dumping pg_catalog; not accepted on + * input + */ + collprovider_name = get_collprovider_name(collproviderstr[0]); + if (!collprovider_name) + exit_horribly(NULL, + "unrecognized collation provider: %s\n", + collproviderstr); + + /* * DROP must be fully qualified in case same name appears in pg_catalog */ appendPQExpBuffer(delq, "DROP COLLATION %s", @@ -13477,18 +13490,7 @@ dumpCollation(Archive *fout, CollInfo *collinfo) appendPQExpBuffer(q, "CREATE COLLATION %s (", fmtId(collinfo->dobj.name)); - appendPQExpBufferStr(q, "provider = "); - if (collprovider[0] == 'c') - appendPQExpBufferStr(q, "libc"); - else if (collprovider[0] == 'i') - appendPQExpBufferStr(q, "icu"); - else if (collprovider[0] == 'd') - /* to allow dumping pg_catalog; not accepted on input */ - appendPQExpBufferStr(q, "default"); - else - exit_horribly(NULL, - "unrecognized collation provider: %s\n", - collprovider); + appendPQExpBuffer(q, "provider = %s", collprovider_name); if (strcmp(collcollate, collctype) == 0) { diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 466a780..1581bf8 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -16,7 +16,9 @@ #include "catalog/pg_attribute.h" #include "catalog/pg_class.h" +#include "catalog/pg_collation.h" #include "catalog/pg_default_acl.h" +#include "common/pg_collation_fn_common.h" #include "fe_utils/string_utils.h" #include "common.h" @@ -3967,7 +3969,13 @@ listCollations(const char *pattern, bool verbose, bool showSystem) if (pset.sversion >= 100000) appendPQExpBuffer(&buf, - ",\n CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\"", + ",\n CASE c.collprovider WHEN '%c' THEN '%s' WHEN '%c' THEN '%s' WHEN '%c' THEN '%s' END AS \"%s\"", + COLLPROVIDER_DEFAULT, + get_collprovider_name(COLLPROVIDER_DEFAULT), + COLLPROVIDER_LIBC, + get_collprovider_name(COLLPROVIDER_LIBC), + COLLPROVIDER_ICU, + get_collprovider_name(COLLPROVIDER_ICU), gettext_noop("Provider")); if (verbose) diff --git a/src/bin/scripts/Makefile b/src/bin/scripts/Makefile index 0cc528e..35c7ff9 100644 --- a/src/bin/scripts/Makefile +++ b/src/bin/scripts/Makefile @@ -19,7 +19,7 @@ include $(top_builddir)/src/Makefile.global PROGRAMS = createdb createuser dropdb dropuser clusterdb vacuumdb reindexdb pg_isready override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) -override LDFLAGS := -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) $(LDFLAGS) +override LDFLAGS := -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) $(ICU_LIBS) $(LDFLAGS) all: $(PROGRAMS) diff --git a/src/bin/scripts/createdb.c b/src/bin/scripts/createdb.c index 81a8192..8d89d2b 100644 --- a/src/bin/scripts/createdb.c +++ b/src/bin/scripts/createdb.c @@ -58,6 +58,7 @@ main(int argc, char *argv[]) char *lc_collate = NULL; char *lc_ctype = NULL; char *locale = NULL; + char *canonname = NULL; PQExpBufferData sql; @@ -153,7 +154,15 @@ main(int argc, char *argv[]) progname); exit(1); } - lc_ctype = locale; + + /* + * remove the collation provider modifier from the locale for lc_ctype + */ + check_locale_collprovider(locale, &canonname, NULL, NULL); + if (!canonname) + exit(1); /* check_locale_collprovider printed the error */ + lc_ctype = canonname; + lc_collate = locale; } @@ -241,6 +250,9 @@ main(int argc, char *argv[]) PQfinish(conn); + if (canonname) + pfree(canonname); + exit(0); } diff --git a/src/common/Makefile b/src/common/Makefile index 80e78d7..4fbe0f0 100644 --- a/src/common/Makefile +++ b/src/common/Makefile @@ -43,7 +43,7 @@ override CPPFLAGS += -DVAL_LIBS="\"$(LIBS)\"" OBJS_COMMON = base64.o config_info.o controldata_utils.o exec.o ip.o \ keywords.o md5.o pg_lzcompress.o pgfnames.o psprintf.o relpath.o \ rmtree.o saslprep.o scram-common.o string.o unicode_norm.o \ - username.o wait_error.o + username.o wait_error.o pg_collation_fn_common.o ifeq ($(with_openssl),yes) OBJS_COMMON += sha2_openssl.o diff --git a/src/common/pg_collation_fn_common.c b/src/common/pg_collation_fn_common.c new file mode 100644 index 0000000..a3ba3a3 --- /dev/null +++ b/src/common/pg_collation_fn_common.c @@ -0,0 +1,90 @@ +/*------------------------------------------------------------------------- + * + * pg_collation_fn_common.c + * commmon routines to support manipulation of the pg_collation relation + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/common/pg_collation_fn_common.c + * + *------------------------------------------------------------------------- + */ +#ifdef FRONTEND +#include "postgres_fe.h" +#else +#include "postgres.h" +#endif + +#include "catalog/pg_collation.h" +#include "common/pg_collation_fn_common.h" + + +/* + * Note that we search the table with pg_strcasecmp(), so variant + * capitalizations don't need their own entries. + */ +typedef struct collprovider_name +{ + char collprovider; + const char *name; +} collprovider_name; + +static const collprovider_name collprovider_name_tbl[] = +{ + {COLLPROVIDER_DEFAULT, "default"}, + {COLLPROVIDER_LIBC, "libc"}, + {COLLPROVIDER_ICU, "icu"}, + {'\0', NULL} /* end marker */ +}; + +/* + * Get the collation provider from the given collation provider name. + * + * Return '\0' if we can't determine it. + */ +char +get_collprovider(const char *name) +{ + int i; + + if (!name) + return '\0'; + + /* Check the table */ + for (i = 0; collprovider_name_tbl[i].name; ++i) + if (pg_strcasecmp(name, collprovider_name_tbl[i].name) == 0) + return collprovider_name_tbl[i].collprovider; + + return '\0'; +} + +/* + * Get the name of the given collation provider. + * + * Return NULL if we can't determine it. + */ +const char * +get_collprovider_name(char collprovider) +{ + int i; + + /* Check the table */ + for (i = 0; collprovider_name_tbl[i].collprovider; ++i) + if (collprovider_name_tbl[i].collprovider == collprovider) + return collprovider_name_tbl[i].name; + + return NULL; +} + +/* + * Return true if collation provider is nondefault and valid, and false otherwise. + */ +bool +is_valid_nondefault_collprovider(char collprovider) +{ + return (collprovider == COLLPROVIDER_LIBC || + collprovider == COLLPROVIDER_ICU); +} diff --git a/src/fe_utils/.gitignore b/src/fe_utils/.gitignore index 37f5f75..b14041b 100644 --- a/src/fe_utils/.gitignore +++ b/src/fe_utils/.gitignore @@ -1 +1,2 @@ /psqlscan.c +/pg_collation_fn_common.c diff --git a/src/fe_utils/Makefile b/src/fe_utils/Makefile index 3f4ba8b..4bdfd17 100644 --- a/src/fe_utils/Makefile +++ b/src/fe_utils/Makefile @@ -19,7 +19,8 @@ include $(top_builddir)/src/Makefile.global override CPPFLAGS := -DFRONTEND -I$(libpq_srcdir) $(CPPFLAGS) -OBJS = mbprint.o print.o psqlscan.o simple_list.o string_utils.o +OBJS = mbprint.o print.o psqlscan.o simple_list.o string_utils.o \ + pg_collation_fn_common.o all: libpgfeutils.a @@ -33,6 +34,13 @@ psqlscan.c: FLEX_FIX_WARNING=yes distprep: psqlscan.c +# Pull in pg_collation_fn_common.c from src/common. That exposes us to +# risks of version skew if we link to a shared library. Do it the +# hard way, instead, so that we're statically linked. + +pg_collation_fn_common.c: % : $(top_srcdir)/src/common/% + rm -f $@ && $(LN_S) $< . + # libpgfeutils could be useful to contrib, so install it install: all installdirs $(INSTALL_STLIB) libpgfeutils.a '$(DESTDIR)$(libdir)/libpgfeutils.a' @@ -45,6 +53,7 @@ uninstall: clean distclean: rm -f libpgfeutils.a $(OBJS) lex.backup + rm -f pg_collation_fn_common.c # psqlscan.c is supposed to be in the distribution tarball, # so do not clean it in the clean/distclean rules diff --git a/src/include/commands/dbcommands.h b/src/include/commands/dbcommands.h index 677c7fc..d1b2776 100644 --- a/src/include/commands/dbcommands.h +++ b/src/include/commands/dbcommands.h @@ -29,6 +29,7 @@ extern ObjectAddress AlterDatabaseOwner(const char *dbname, Oid newOwnerId); extern Oid get_database_oid(const char *dbname, bool missingok); extern char *get_database_name(Oid dbid); -extern void check_encoding_locale_matches(int encoding, const char *collate, const char *ctype); +extern void check_encoding_locale_matches(int encoding, const char *collate, const char *ctype, + char collprovider); #endif /* DBCOMMANDS_H */ diff --git a/src/include/common/pg_collation_fn_common.h b/src/include/common/pg_collation_fn_common.h new file mode 100644 index 0000000..f05778d --- /dev/null +++ b/src/include/common/pg_collation_fn_common.h @@ -0,0 +1,22 @@ +/*------------------------------------------------------------------------- + * + * pg_collation_fn_common.h + * prototypes for functions in common/pg_collation_fn_common.c + * + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/common/pg_collation_fn_common.h + * + *------------------------------------------------------------------------- + */ + +#ifndef PG_COLLATION_FN_COMMON_H +#define PG_COLLATION_FN_COMMON_H + +extern char get_collprovider(const char *name); +extern const char *get_collprovider_name(char collprovider); +extern bool is_valid_nondefault_collprovider(char collprovider); + +#endif /* PG_COLLATION_FN_COMMON_H */ diff --git a/src/include/pg_config.h.win32 b/src/include/pg_config.h.win32 index 22d19ed..de91567 100644 --- a/src/include/pg_config.h.win32 +++ b/src/include/pg_config.h.win32 @@ -623,6 +623,10 @@ /* Define to use /dev/urandom for random number generation */ /* #undef USE_DEV_URANDOM */ +/* Define to build with ICU support. (--with-icu) */ +/* #undef USE_ICU */ + + /* Define to 1 to build with LDAP support. (--with-ldap) */ /* #undef USE_LDAP */ diff --git a/src/include/port.h b/src/include/port.h index 3e528fa..3ed7d11 100644 --- a/src/include/port.h +++ b/src/include/port.h @@ -419,6 +419,40 @@ extern int pg_get_encoding_from_locale(const char *ctype, bool write_message); extern int pg_codepage_to_encoding(UINT cp); #endif +/* do not make libpq with icu */ +#ifndef LIBPQ_MAKE + +extern void check_locale_collprovider(const char *locale, char **canonname, + char *collprovider, char **collversion); +extern bool locale_is_c(const char *locale); +extern char *get_full_collation_name(const char *locale, char collprovider, + const char *collversion); + +#ifdef FRONTEND +extern void get_collation_actual_version(char collprovider, + const char *collcollate, + char **collversion, bool *failure); +#else +extern char *get_collation_actual_version(char collprovider, + const char *collcollate); +#endif + +#ifdef USE_ICU +#define ICU_ROOT_LOCALE "root" + +/* Users of this must import unicode/ucol.h too. */ +struct UCollator; +extern struct UCollator *open_collator(const char *collate); + +extern char * get_icu_language_tag(const char *localename); +extern const char *get_icu_collate(const char *locale, const char *langtag); +#ifdef WIN32 +extern char * check_icu_winlocale(const char *winlocale); +#endif /* WIN32 */ +#endif /* USE_ICU */ + +#endif /* not LIBPQ_MAKE */ + /* port/inet_net_ntop.c */ extern char *inet_net_ntop(int af, const void *src, int bits, char *dst, size_t size); diff --git a/src/include/port/win32.h b/src/include/port/win32.h index 9f48a58..7e3e7e5 100644 --- a/src/include/port/win32.h +++ b/src/include/port/win32.h @@ -16,7 +16,7 @@ * get support for GetLocaleInfoEx() with locales. For everything else * the minimum version is Windows XP (0x0501). */ -#if defined(_MSC_VER) && _MSC_VER >= 1900 +#if defined(_MSC_VER) && _MSC_VER >= 1800 #define MIN_WINNT 0x0600 #else #define MIN_WINNT 0x0501 diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 88a3134..161a14e 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -57,8 +57,10 @@ extern void assign_locale_numeric(const char *newval, void *extra); extern bool check_locale_time(char **newval, void **extra, GucSource source); extern void assign_locale_time(const char *newval, void *extra); -extern bool check_locale(int category, const char *locale, char **canonname); -extern char *pg_perm_setlocale(int category, const char *locale); +extern bool check_locale(int category, const char *locale, char **canonname, + char collprovider); +extern const char *pg_perm_setlocale(int category, const char *locale, + char collprovider); extern void check_strxfrm_bug(void); extern bool lc_collate_is_c(Oid collation); @@ -102,11 +104,11 @@ typedef struct pg_locale_struct *pg_locale_t; extern pg_locale_t pg_newlocale_from_collation(Oid collid); -extern char *get_collation_actual_version(char collprovider, const char *collcollate); - #ifdef USE_ICU extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes); extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar); +extern const char *get_icu_default_collate(void); +extern UCollator *get_default_collation_collator(void); #endif /* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */ @@ -115,4 +117,6 @@ extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen, extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, pg_locale_t locale); +extern char get_default_collprovider(void); + #endif /* _PG_LOCALE_ */ diff --git a/src/interfaces/libpq/.gitignore b/src/interfaces/libpq/.gitignore index 5c232ae..212edd9 100644 --- a/src/interfaces/libpq/.gitignore +++ b/src/interfaces/libpq/.gitignore @@ -32,3 +32,4 @@ /unicode_norm.c /encnames.c /wchar.c +/pg_collation_fn_common.c diff --git a/src/interfaces/libpq/Makefile b/src/interfaces/libpq/Makefile index abe0a50..32a5d43 100644 --- a/src/interfaces/libpq/Makefile +++ b/src/interfaces/libpq/Makefile @@ -19,7 +19,7 @@ NAME= pq SO_MAJOR_VERSION= 5 SO_MINOR_VERSION= $(MAJORVERSION) -override CPPFLAGS := -DFRONTEND -DUNSAFE_STAT_OK -I$(srcdir) $(CPPFLAGS) -I$(top_builddir)/src/port -I$(top_srcdir)/src/port +override CPPFLAGS := -DFRONTEND -DUNSAFE_STAT_OK -I$(srcdir) $(CPPFLAGS) -I$(top_builddir)/src/port -I$(top_srcdir)/src/port -DLIBPQ_MAKE ifneq ($(PORTNAME), win32) override CFLAGS += $(PTHREAD_CFLAGS) endif diff --git a/src/port/chklocale.c b/src/port/chklocale.c index dde9130..a30bded 100644 --- a/src/port/chklocale.c +++ b/src/port/chklocale.c @@ -23,8 +23,26 @@ #include #endif +#ifdef USE_ICU +#include +#endif + +#include "catalog/pg_collation.h" +#include "common/pg_collation_fn_common.h" #include "mb/pg_wchar.h" +/* + * In backend, we will use palloc/pfree. In frontend, use malloc/free. + */ +#ifndef FRONTEND +#define STRDUP(s) pstrdup(s) +#define ALLOC(size) palloc(size) +#define FREE(s) pfree(s) +#else +#define STRDUP(s) strdup(s) +#define ALLOC(size) malloc(size) +#define FREE(s) free(s) +#endif /* * This table needs to recognize all the CODESET spellings for supported @@ -436,3 +454,583 @@ pg_get_encoding_from_locale(const char *ctype, bool write_message) } #endif /* (HAVE_LANGINFO_H && CODESET) || WIN32 */ + +/* do not make libpq with icu */ +#ifndef LIBPQ_MAKE + +/* + * Check if the locale contains the modifier of the collation provider. + * + * Set up the collation provider according to the appropriate modifier or '\0'. + * Set up the collation version to NULL if we don't find it after the collation + * provider modifier. + * + * The malloc'd copy of the locale's canonical name without the modifier of the + * collation provider and the collation version is stored in the canonname if + * locale is not NULL. The canoname can have the zero length. + */ +void +check_locale_collprovider(const char *locale, char **canonname, + char *collprovider, char **collversion) +{ + const char *modifier_sign, + *dot_sign, + *cur_collprovider_end; + char cur_collprovider_name[NAMEDATALEN]; + int cur_collprovider_len; + char cur_collprovider; + + /* in case of failure or if we don't find them in the locale name */ + if (canonname) + *canonname = NULL; + if (collprovider) + *collprovider = '\0'; + if (collversion) + *collversion = NULL; + + if (!locale) + return; + + /* find the last occurrence of the modifier sign '@' in the locale */ + modifier_sign = strrchr(locale, '@'); + + if (!modifier_sign) + { + /* just copy all the name */ + if (canonname) + *canonname = STRDUP(locale); + return; + } + + /* check if there's a version after the collation provider modifier */ + if ((dot_sign = strchr(modifier_sign, '.')) == NULL) + cur_collprovider_end = &locale[strlen(locale)]; + else + cur_collprovider_end = dot_sign; + + cur_collprovider_len = cur_collprovider_end - modifier_sign - 1; + if (cur_collprovider_len + 1 > NAMEDATALEN) + { +#ifdef FRONTEND + fprintf(stderr, _("collation provider name is too long: %s"), locale); + /* keep newline separate so there's only one translatable string */ + fputc('\n', stderr); +#else /* not FRONTEND */ + ereport(ERROR, + (errmsg("collation provider name is too long: %s", locale))); +#endif /* not FRONTEND */ + return; + } + + strncpy(cur_collprovider_name, modifier_sign + 1, cur_collprovider_len); + cur_collprovider_name[cur_collprovider_len] = '\0'; + + /* check if this is a valid collprovider name */ + cur_collprovider = get_collprovider(cur_collprovider_name); + if (is_valid_nondefault_collprovider(cur_collprovider)) + { + if (collprovider) + *collprovider = cur_collprovider; + + if (canonname) + { + int canonname_len = modifier_sign - locale; + + *canonname = ALLOC((canonname_len + 1) * sizeof(char)); + if (*canonname) + { + strncpy(*canonname, locale, canonname_len); + (*canonname)[canonname_len] = '\0'; + } + else + { +#ifdef FRONTEND + fprintf(stderr, _("out of memory")); + /* + * keep newline separate so there's only one translatable string + */ + fputc('\n', stderr); +#else /* not FRONTEND */ + ereport(ERROR, (errmsg("out of memory"))); +#endif /* not FRONTEND */ + } + } + + if (dot_sign && collversion) + *collversion = STRDUP(dot_sign + 1); + } + else + { + /* just copy all the name */ + if (canonname) + *canonname = STRDUP(locale); + } +} + +/* + * Return true if locale is "C" or "POSIX"; + */ +bool +locale_is_c(const char *locale) +{ + return locale && (strcmp(locale, "C") == 0 || strcmp(locale, "POSIX") == 0); +} + +/* + * Return locale ended with collation provider modifier and collation version. + * + * Return NULL if locale is NULL. + */ +char * +get_full_collation_name(const char *locale, char collprovider, + const char *collversion) +{ + char *new_locale; + int old_len, + len_with_provider, + new_len; + const char *collprovider_name; + + if (!locale) + return NULL; + + collprovider_name = get_collprovider_name(collprovider); + Assert(collprovider_name); + + old_len = strlen(locale); + new_len = len_with_provider = old_len + 1 + strlen(collprovider_name); + if (collversion && *collversion) + new_len += 1 + strlen(collversion); + + new_locale = ALLOC((new_len + 1) * sizeof(char)); + if (!new_locale) + { +#ifdef FRONTEND + fprintf(stderr, _("out of memory")); + /* keep newline separate so there's only one translatable string */ + fputc('\n', stderr); +#else /* not FRONTEND */ + ereport(ERROR, (errmsg("out of memory"))); +#endif /* not FRONTEND */ + + return NULL; + } + + /* add the collation provider modifier */ + strcpy(new_locale, locale); + new_locale[old_len] = '@'; + strcpy(&new_locale[old_len + 1], collprovider_name); + + /* add the collation version if needed */ + if (collversion && *collversion) + { + new_locale[len_with_provider] = '.'; + strcpy(&new_locale[len_with_provider + 1], collversion); + } + + new_locale[new_len] = '\0'; + + return new_locale; +} + +/* + * Get provider-specific collation version string for the given collation from + * the operating system/library. + * + * A particular provider must always either return a non-NULL string or return + * NULL (if it doesn't support versions). It must not return NULL for some + * collcollate and not NULL for others. + */ +#ifdef FRONTEND +void +get_collation_actual_version(char collprovider, const char *collcollate, + char **collversion, bool *failure) +{ + if (failure) + *failure = false; + +#ifdef USE_ICU + if (collprovider == COLLPROVIDER_ICU) + { + UCollator *collator = open_collator(collcollate); + UVersionInfo versioninfo; + char buf[U_MAX_VERSION_STRING_LENGTH]; + + if (collator) + { + ucol_getVersion(collator, versioninfo); + ucol_close(collator); + + u_versionToString(versioninfo, buf); + if (collversion) + *collversion = STRDUP(buf); + } + else + { + if (collversion) + *collversion = NULL; + if (failure) + *failure = true; + } + } + else +#endif + { + if (collversion) + *collversion = NULL; + } +} +#else /* not FRONTEND */ +char * +get_collation_actual_version(char collprovider, const char *collcollate) +{ + char *collversion; + +#ifdef USE_ICU + if (collprovider == COLLPROVIDER_ICU) + { + UCollator *collator = open_collator(collcollate); + UVersionInfo versioninfo; + char buf[U_MAX_VERSION_STRING_LENGTH]; + + ucol_getVersion(collator, versioninfo); + ucol_close(collator); + + u_versionToString(versioninfo, buf); + collversion = STRDUP(buf); + } + else +#endif + collversion = NULL; + + return collversion; +} +#endif /* not FRONTEND */ + +#ifdef USE_ICU +/* + * Open the collator for this icu locale. Return NULL in case of failure. + */ +UCollator * +open_collator(const char *collate) +{ + UCollator *collator; + UErrorCode status; + const char *save = uloc_getDefault(); + char *save_dup; + + if (!save) + { +#ifdef FRONTEND + fprintf(stderr, _("ICU error: uloc_getDefault() failed")); + /* keep newline separate so there's only one translatable string */ + fputc('\n', stderr); +#else + ereport(ERROR, (errmsg("ICU error: uloc_getDefault() failed"))); +#endif + return NULL; + } + + /* save may be pointing at a modifiable scratch variable, so copy it. */ + save_dup = STRDUP(save); + + /* set the default locale to root */ + status = U_ZERO_ERROR; + uloc_setDefault(ICU_ROOT_LOCALE, &status); + if (U_FAILURE(status)) + { +#ifdef FRONTEND + fprintf(stderr, _("ICU error: failed to set the default locale to \"%s\": %s"), + ICU_ROOT_LOCALE, u_errorName(status)); + /* keep newline separate so there's only one translatable string */ + fputc('\n', stderr); +#else + ereport(ERROR, + (errmsg("ICU error: failed to set the default locale to \"%s\": %s", + ICU_ROOT_LOCALE, u_errorName(status)))); +#endif + return NULL; + } + + /* get a collator for this collate */ + status = U_ZERO_ERROR; + collator = ucol_open(collate, &status); + if (U_FAILURE(status)) + { +#ifdef FRONTEND + fprintf(stderr, _("ICU error: could not open collator for locale \"%s\": %s"), + collate, u_errorName(status)); + /* keep newline separate so there's only one translatable string */ + fputc('\n', stderr); +#else + ereport(ERROR, + (errmsg("ICU error: could not open collator for locale \"%s\": %s", + collate, u_errorName(status)))); +#endif + collator = NULL; + } + + /* restore old value of the default locale. */ + status = U_ZERO_ERROR; + uloc_setDefault(save_dup, &status); + if (U_FAILURE(status)) + { +#ifdef FRONTEND + fprintf(stderr, _("ICU error: failed to restore old locale \"%s\": %s"), + save_dup, u_errorName(status)); + /* keep newline separate so there's only one translatable string */ + fputc('\n', stderr); +#else + ereport(ERROR, + (errmsg("ICU error: failed to restore old locale \"%s\": %s", + save_dup, u_errorName(status)))); +#endif + return NULL; + } + FREE(save_dup); + + return collator; +} + +/* + * Get the ICU language tag for a locale name. + * The result is a palloc'd string. + * Return NULL in case of failure or if localename is NULL. + */ +char * +get_icu_language_tag(const char *localename) +{ + char buf[ULOC_FULLNAME_CAPACITY]; + UErrorCode status = U_ZERO_ERROR; + + if (!localename) + return NULL; + + uloc_toLanguageTag(localename, buf, sizeof(buf), TRUE, &status); + if (U_FAILURE(status)) + { +#ifdef FRONTEND + fprintf(stderr, + _("ICU error: could not convert locale name \"%s\" to language tag: %s"), + localename, u_errorName(status)); + /* keep newline separate so there's only one translatable string */ + fputc('\n', stderr); +#else + ereport(ERROR, + (errmsg("ICU error: could not convert locale name \"%s\" to language tag: %s", + localename, u_errorName(status)))); +#endif + return NULL; + } + return STRDUP(buf); +} + +/* + * Get the icu collation name. + */ +const char * +get_icu_collate(const char *locale, const char *langtag) +{ + return U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : locale; +} + +#ifdef WIN32 +/* + * Get the Language Code Identifier (LCID) for the Windows locale. + * + * Return zero in case of failure. + */ +static uint32 +get_lcid(const wchar_t *winlocale) +{ + /* + * The second argument to the LocaleNameToLCID function is: + * - Prior to Windows 7: reserved; should always be 0. + * - Beginning in Windows 7: use LOCALE_ALLOW_NEUTRAL_NAMES to allow the + * return of lcids of locales without regions. + */ +#if (NTDDI_VERSION >= NTDDI_WIN7) + return LocaleNameToLCID(winlocale, LOCALE_ALLOW_NEUTRAL_NAMES); +#else + return LocaleNameToLCID(winlocale, 0); +#endif +} + +/* + * char2wchar_ascii --- convert multibyte characters to wide characters + * + * This is a simplified version of the char2wchar() function from backend. + */ +static size_t +char2wchar_ascii(wchar_t *to, size_t tolen, const char *from, size_t fromlen) +{ + size_t result; + + if (tolen == 0) + return 0; + + /* Win32 API does not work for zero-length input */ + if (fromlen == 0) + result = 0; + else + { + result = MultiByteToWideChar(CP_ACP, 0, from, fromlen, to, tolen - 1); + /* A zero return is failure */ + if (result == 0) + result = -1; + } + + if (result != -1) + { + Assert(result < tolen); + /* Append trailing null wchar (MultiByteToWideChar() does not) */ + to[result] = 0; + } + + return result; +} + +/* + * Get the canonical ICU name for the Windows locale. + * + * Return a malloc'd string or NULL in case of failure. + */ +char * +check_icu_winlocale(const char *winlocale) +{ + uint32 lcid; + char canonname_buf[ULOC_FULLNAME_CAPACITY]; + UErrorCode status = U_ZERO_ERROR; +#if (_MSC_VER >= 1400) /* VC8.0 or later */ + _locale_t loct = NULL; +#endif + + if (winlocale == NULL) + return NULL; + + /* Get the Language Code Identifier (LCID). */ + +#if (_MSC_VER >= 1400) /* VC8.0 or later */ + loct = _create_locale(LC_COLLATE, winlocale); + + if (loct != NULL) + { +#if (_MSC_VER >= 1700) /* Visual Studio 2012 or later */ + if ((lcid = get_lcid(loct->locinfo->locale_name[LC_COLLATE])) == 0) + { + /* there's an error */ +#ifdef FRONTEND + fprintf(stderr, + _("failed to get the Language Code Identifier (LCID) for locale \"%s\""), + winlocale); + /* keep newline separate so there's only one translatable string */ + fputc('\n', stderr); +#else /* not FRONTEND */ + ereport(ERROR, + (errmsg("failed to get the Language Code Identifier (LCID) for locale \"%s\"", + winlocale))); +#endif /* not FRONTEND */ + _free_locale(loct); + return NULL; + } +#else /* _MSC_VER >= 1400 && _MSC_VER < 1700 */ + if ((lcid = loct->locinfo->lc_handle[LC_COLLATE]) == 0) + { + /* there's an error */ +#ifdef FRONTEND + fprintf(stderr, + _("failed to get the Language Code Identifier (LCID) for locale \"%s\""), + winlocale); + /* keep newline separate so there's only one translatable string */ + fputc('\n', stderr); +#else /* not FRONTEND */ + ereport(ERROR, + (errmsg("failed to get the Language Code Identifier (LCID) for locale \"%s\"", + winlocale))); +#endif /* not FRONTEND */ + _free_locale(loct); + return NULL; + } +#endif /* _MSC_VER >= 1400 && _MSC_VER < 1700 */ + _free_locale(loct); + } + else +#endif /* VC8.0 or later */ + { + if (strlen(winlocale) == 0) + { + lcid = LOCALE_USER_DEFAULT; + } + else + { + size_t locale_len = strlen(winlocale); + wchar_t *wlocale = (wchar_t*) ALLOC( + (locale_len + 1) * sizeof(wchar_t)); + /* Locale names use only ASCII */ + size_t locale_wlen = char2wchar_ascii(wlocale, locale_len + 1, + winlocale, locale_len); + if (locale_wlen == -1) + { + /* there's an error */ +#ifdef FRONTEND + fprintf(stderr, + _("failed to convert locale \"%s\" to wide characters"), + winlocale); + /* keep newline separate so there's only one translatable string */ + fputc('\n', stderr); +#else + ereport(ERROR, + (errmsg("failed to convert locale \"%s\" to wide characters", + winlocale))); +#endif + FREE(wlocale); + return NULL; + } + + if ((lcid = get_lcid(wlocale)) == 0) + { + /* there's an error */ +#ifdef FRONTEND + fprintf(stderr, + _("failed to get the Language Code Identifier (LCID) for locale \"%s\""), + winlocale); + /* keep newline separate so there's only one translatable string */ + fputc('\n', stderr); +#else + ereport(ERROR, + (errmsg("failed to get the Language Code Identifier (LCID) for locale \"%s\"", + winlocale))); +#endif + FREE(wlocale); + return NULL; + } + + FREE(wlocale); + } + } + + /* Get the ICU canoname. */ + + uloc_getLocaleForLCID(lcid, canonname_buf, sizeof(canonname_buf), &status); + if (U_FAILURE(status)) + { +#ifdef FRONTEND + fprintf(stderr, + _("ICU error: failed to get the locale name for LCID 0x%04x: %s"), + lcid, u_errorName(status)); + /* keep newline separate so there's only one translatable string */ + fputc('\n', stderr); +#else + ereport(ERROR, + (errmsg("ICU error: failed to get the locale name for LCID 0x%04x: %s", + lcid, u_errorName(status)))); +#endif + return NULL; + } + + return STRDUP(canonname_buf); +} +#endif /* WIN32 */ +#endif /* USE_ICU */ + +#endif /* not LIBPQ_MAKE */ diff --git a/src/test/Makefile b/src/test/Makefile index 73abf16..259bb1f 100644 --- a/src/test/Makefile +++ b/src/test/Makefile @@ -12,7 +12,7 @@ subdir = src/test top_builddir = ../.. include $(top_builddir)/src/Makefile.global -SUBDIRS = perl regress isolation modules authentication recovery subscription +SUBDIRS = perl regress isolation modules authentication recovery subscription default_collation # We don't build or execute examples/, locale/, or thread/ by default, # but we do want "make clean" etc to recurse into them. Likewise for diff --git a/src/test/default_collation/Makefile b/src/test/default_collation/Makefile new file mode 100644 index 0000000..2efe8be --- /dev/null +++ b/src/test/default_collation/Makefile @@ -0,0 +1,28 @@ +# src/test/default_collation/Makefile + +subdir = src/test/default_collation +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +ifeq ($(with_icu),yes) +check: + $(MAKE) -C icu check +check-utf8: + $(MAKE) -C icu.utf8 check + $(MAKE) -C libc.utf8 check +else +check: + $(MAKE) -C libc check +check-utf8: + $(MAKE) -C libc.utf8 check +endif + +# We don't check libc/ if with_icu or vice versa, but we do want "make clean" to +# recurse into it. The same goes for libc.utf8/ or icu.utf8/, which we don't +# check by default. +ALWAYS_SUBDIRS = libc libc.utf8 icu icu.utf8 + +clean distclean maintainer-clean: + for d in $(ALWAYS_SUBDIRS); do \ + $(MAKE) -C $$d clean || exit; \ + done diff --git a/src/test/default_collation/icu.utf8/.gitignore b/src/test/default_collation/icu.utf8/.gitignore new file mode 100644 index 0000000..871e943 --- /dev/null +++ b/src/test/default_collation/icu.utf8/.gitignore @@ -0,0 +1,2 @@ +# Generated by test suite +/tmp_check/ diff --git a/src/test/default_collation/icu.utf8/Makefile b/src/test/default_collation/icu.utf8/Makefile new file mode 100644 index 0000000..7adecfd --- /dev/null +++ b/src/test/default_collation/icu.utf8/Makefile @@ -0,0 +1,11 @@ +# src/test/default_collation/icu.utf8/Makefile + +subdir = src/test/default_collation/icu.utf8 +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global + +check: + $(prove_check) + +clean distclean maintainer-clean: + rm -rf tmp_check diff --git a/src/test/default_collation/icu.utf8/t/001_default_collation.pl b/src/test/default_collation/icu.utf8/t/001_default_collation.pl new file mode 100644 index 0000000..617c06d --- /dev/null +++ b/src/test/default_collation/icu.utf8/t/001_default_collation.pl @@ -0,0 +1,799 @@ +use strict; +use warnings; + +use Config; +use PostgresNode; +use TestLib; +use Test::More tests => 188; + +my $tempdir = TestLib::tempdir; +my $datadir = "$tempdir/data"; + +# test initdb + +sub test_initdb +{ + my ($test_name, $options, $expected_collprovider, $error_message) = @_; + my ($in_initdb, $out_initdb, $err_initdb); + + mkdir $datadir; + + my @command = (qw(initdb -A trust -N -D), $datadir, split(" ", $options)); + print "# Running: " . join(" ", @command) . "\n"; + my $result = IPC::Run::run \@command, \$in_initdb, \$out_initdb, + \$err_initdb; + + if ($error_message) + { + like($err_initdb, + qr{$error_message}, + "initdb: $test_name: check error message"); + } + else + { + ok($result, "\"@command\" exit code 0"); + is($err_initdb, "", "\"@command\" no stderr"); + like($out_initdb, + qr{The default collation provider is \"$expected_collprovider\"\.}, + "initdb: $test_name: check output"); + } + + File::Path::rmtree $datadir; +} + +sub psql +{ + my ($command, $db) = @_; + my ($result, $in, $out, $err); + my @psql = ('psql', '-X', '-c', $command); + if (defined($db)) + { + push(@psql, $db); + } + print "# Running: " . join(" ", @psql) . "\n"; + $result = IPC::Run::run \@psql, \$in, \$out, \$err; + ($result, $out, $err); +} + +# --locale + +test_initdb( + "en_US.utf8 locale", + "--locale=en_US.utf8", + "icu", + ""); + +test_initdb( + "en_US.utf8 locale with C ctype", + "--locale=en_US.utf8 --lc-ctype=C", + "", + "collations with different collate and ctype values are not supported by " + . "ICU"); + +test_initdb( + "be_BY\@latin icu locale", + "--locale=be_BY\@latin\@icu", + "icu", + ""); + +test_initdb( + "be_BY\@latin icu locale invalid modifier order", + "--locale=be_BY\@icu\@latin", + "", + "invalid locale name \"be_BY\@icu\@latin\""); + +# --lc-collate with the same --lc-ctype if needed + +test_initdb( + "en_US.utf8 lc_collate", + "--lc-collate=en_US.utf8 --lc-ctype=en_US.utf8", + "icu", + ""); + +test_initdb( + "en_US.utf8 lc_collate with C ctype", + "--lc-collate=en_US.utf8 --lc-ctype=C", + "", + "collations with different collate and ctype values are not supported by " + . "ICU"); + +test_initdb( + "be_BY\@latin icu lc_collate", + "--lc-collate=be_BY\@latin\@icu --lc-ctype=be_BY\@latin", + "icu", + ""); + +test_initdb( + "be_BY\@latin icu lc_collate invalid modifier order", + "--lc-collate=be_BY\@icu\@latin", + "", + "invalid locale name \"be_BY\@icu\@latin\" \\(provider \"libc\"\\)"); + +# test createdb and CREATE DATABASE + +sub test_createdb +{ + my ($test_name, $options, $expected_collprovider, $error_message) = @_; + my (@command, $result, $in_command, $out_command, $err_command); + + @command = ("createdb", + split(" ", $options), + "--template=template0", + "mydb"); + + print "# Running: " . join(" ", @command) . "\n"; + $result = IPC::Run::run \@command, \$in_command, \$out_command, + \$err_command; + + if ($error_message) + { + like($err_command, + qr{$error_message}, + "createdb: $test_name: check error message"); + } + else + { + ok($result, "\"@command\" exit code 0"); + is($err_command, "", "\"@command\" no stderr"); + + ($result, $out_command, $err_command) = psql( + "select datcollate from pg_database where datname = 'mydb';"); + + if ($expected_collprovider eq "libc") + { + like($out_command, + qr{\@$expected_collprovider\n}, + "createdb: $test_name: check pg_database.datcollate"); + } + elsif ($expected_collprovider eq "icu") + { + like($out_command, + qr{\@$expected_collprovider([\.\d]+)?\n}, + "createdb: $test_name: check pg_database.datcollate"); + } + + @command = ("dropdb mydb"); + print "# Running: " . join(" ", @command) . "\n"; + system(@command); + } +} + +sub test_create_database +{ + my ($test_name, + $options, + $expected_collprovider, + $error_message) = @_; + my (@command, $result, $in_command, $out_command, $err_command); + + ($result, $out_command, $err_command) = psql( + "create database mydb " + . $options + . " template = template0;"); + + if ($error_message) + { + like($err_command, + qr{$error_message}, + "CREATE DATABASE: $test_name: check error message"); + } + else + { + ok($result, "\"@command\" exit code 0"); + is($err_command, "", "\"@command\" no stderr"); + like($out_command, qr{CREATE DATABASE}, "\"@command\" check output"); + + ($result, $out_command, $err_command) = psql( + "select datcollate from pg_database where datname = 'mydb';"); + + if ($expected_collprovider eq "libc") + { + like($out_command, + qr{\@$expected_collprovider\n}, + "CREATE DATABASE: $test_name: check pg_database.datcollate"); + } + elsif ($expected_collprovider eq "icu") + { + like($out_command, + qr{\@$expected_collprovider([\.\d]+)?\n}, + "CREATE DATABASE: $test_name: check pg_database.datcollate"); + } + + @command = ("dropdb mydb"); + print "# Running: " . join(" ", @command) . "\n"; + system(@command); + } +} + +sub test_default_collation +{ + my ($createdb_options, $collation, $expected_collprovider, @commands) = @_; + my (@command, $result, $in_command, $out_command, $err_command); + + @command = ("createdb", split(" ", $createdb_options), "mydb"); + print "# Running: " . join(" ", @command) . "\n"; + $result = IPC::Run::run \@command, \$in_command, \$out_command, + \$err_command; + + ok($result, "\"@command\" exit code 0"); + is($err_command, "", "\"@command\" no stderr"); + + ($result, $out_command, $err_command) = psql( + "select datcollate from pg_database where datname = 'mydb';"); + + if ($expected_collprovider eq "libc") + { + like($out_command, + qr{\@$expected_collprovider\n}, + "\"@command\" check output"); + } + elsif ($expected_collprovider eq "icu") + { + like($out_command, + qr{\@$expected_collprovider([\.\d]+)?\n}, + "\"@command\" check output"); + } + + for (my $row = 0; $row <= $#commands; $row++) + { + my ($command_text, $expected) = @{$commands[$row]}; + ($result, $out_command, $err_command) = psql($command_text, "mydb"); + + ok($result, "\"@command\" exit code 0"); + is($err_command, "", "\"@command\" no stderr"); + if ($out_command) + { + is( + $out_command, + $expected, + "default collation " + . $collation + . ": \"" + . $command_text + . "\" check output"); + } + } + + @command = ("dropdb mydb"); + print "# Running: " . join(" ", @command) . "\n"; + system(@command); +} + +my $node = get_new_node('main'); +$node->init; +$node->start; +local $ENV{PGPORT} = $node->port; + +my @command = ("createuser --createdb --no-superuser non_superuser"); +print "# Running: " . join(" ", @command) . "\n"; +system(@command); + +# test createdb + +# --locale + +test_createdb( + "en_US.utf8 locale", + "--locale=en_US.utf8", + "icu", + ""); + +test_createdb( + "be_BY\@latin icu locale", + "--locale=be_BY\@latin\@icu", + "icu", + ""); + +test_createdb( + "be_BY\@latin icu locale invalid modifier order", + "--locale=be_BY\@icu\@latin", + "", + "invalid locale name: \"be_BY\@icu\@latin\""); + +# --lc-collate with the same --lc-ctype if needed + +test_createdb( + "en_US.utf8 lc_collate", + "--lc-collate=en_US.utf8 --lc-ctype=en_US.utf8", + "icu", + ""); + +test_createdb( + "en_US.utf8 lc_collate with C ctype", + "--lc-collate=en_US.utf8 --lc-ctype=C", + "", + "collations with different collate and ctype values are not supported by " + . "ICU"); + +test_createdb( + "be_BY\@latin icu lc_collate", + "--lc-collate=be_BY\@latin\@icu --lc-ctype=be_BY\@latin", + "icu", + ""); + +test_createdb( + "be_BY\@latin icu lc_collate invalid modifier order", + "--lc-collate=be_BY\@icu\@latin", + "", + "invalid locale name: \"be_BY\@icu\@latin\" \\(provider \"libc\"\\)"); + +# test CREATE DATABASE + +test_create_database( + "en_US.utf8 lc_collate", + "LC_COLLATE = 'en_US.utf8' LC_CTYPE = 'en_US.utf8'", + "icu", + ""); + +test_create_database( + "en_US.utf8 lc_collate with C ctype", + "LC_COLLATE = 'en_US.utf8' LC_CTYPE = 'C'", + "", + "collations with different collate and ctype values are not supported by " + . "ICU"); + +test_create_database( + "be_BY\@latin icu lc_collate", + "LC_COLLATE = 'be_BY\@latin' LC_CTYPE = 'be_BY\@latin'", + "icu", + ""); + +test_create_database( + "be_BY\@latin icu lc_collate invalid modifier order", + "LC_COLLATE = 'be_BY\@icu\@latin'", + "", + "invalid locale name: \"be_BY\@icu\@latin\" \\(provider \"libc\"\\)"); + +# test default collation behaviour +# use commands and outputs from the regression test collate.icu.utf8 + +test_default_collation( + "--lc-collate=en_US.utf8 --lc-ctype=en_US.utf8 --template=template0", + "en_US.utf8\@icu", + "icu", + ( + [ + "CREATE TABLE collate_test1 (a int, b text NOT NULL);", + "CREATE TABLE\n" + ], + [ + "INSERT INTO collate_test1 VALUES " + . "(1, 'abc'), (2, 'äbc'), (3, 'bbc'), (4, 'ABC');", + "INSERT 0 4\n"], + [ + "SELECT * FROM collate_test1 WHERE b >= 'bbc';", + " a | b \n" + . "---+-----\n" + . " 3 | bbc\n" + . "(1 row)\n" + . "\n" + ], + [ + "SELECT a, b FROM collate_test1 ORDER BY b;", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 4 | ABC\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . "(4 rows)\n" + . "\n" + ], + # star expansion + [ + "SELECT * FROM collate_test1 ORDER BY b;", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 4 | ABC\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . "(4 rows)\n" + . "\n" + ], + # upper/lower + ["CREATE TABLE collate_test10 (a int, x text);", "CREATE TABLE\n", ""], + [ + "INSERT INTO collate_test10 VALUES (1, 'hij'), (2, 'HIJ');", + "INSERT 0 2\n" + ], + [ + "SELECT a, lower(x), upper(x), initcap(x) FROM collate_test10;", + " a | lower | upper | initcap \n" + . "---+-------+-------+---------\n" + . " 1 | hij | HIJ | Hij\n" + . " 2 | hij | HIJ | Hij\n" + . "(2 rows)\n" + . "\n" + ], + # LIKE/ILIKE + [ + "SELECT * FROM collate_test1 WHERE b LIKE 'abc';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . "(1 row)\n" + . "\n" + ], + [ + "SELECT * FROM collate_test1 WHERE b LIKE 'abc%';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . "(1 row)\n" + . "\n" + ], + [ + "SELECT * FROM collate_test1 WHERE b LIKE '%bc%';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . "(3 rows)\n" + . "\n" + ], + [ + "SELECT * FROM collate_test1 WHERE b ILIKE 'abc';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 4 | ABC\n" + . "(2 rows)\n" + . "\n" + ], + [ + "SELECT * FROM collate_test1 WHERE b ILIKE 'abc%';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 4 | ABC\n" + . "(2 rows)\n" + . "\n" + ], + [ + "SELECT * FROM collate_test1 WHERE b ILIKE '%bc%';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . " 4 | ABC\n" + . "(4 rows)\n" + . "\n" + ], + [ + "SELECT 'Türkiye' ILIKE '%KI%' AS \"true\";", + " true \n" + . "------\n" + . " t\n" + . "(1 row)\n" + . "\n" + ], + [ + "SELECT 'bıt' ILIKE 'BIT' AS \"false\";", + " false \n" + . "-------\n" + . " f\n" + . "(1 row)\n" + . "\n" + ], + # regular expressions + [ + "SELECT * FROM collate_test1 WHERE b ~ '^abc\$';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . "(1 row)\n" + . "\n" + ], + [ + "SELECT * FROM collate_test1 WHERE b ~ '^abc';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . "(1 row)\n" + . "\n" + ], + [ + "SELECT * FROM collate_test1 WHERE b ~ 'bc';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . "(3 rows)\n" + . "\n" + ], + [ + "SELECT * FROM collate_test1 WHERE b ~* '^abc\$';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 4 | ABC\n" + . "(2 rows)\n" + . "\n" + ], + [ + "SELECT * FROM collate_test1 WHERE b ~* '^abc';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 4 | ABC\n" + . "(2 rows)\n" + . "\n" + ], + [ + "SELECT * FROM collate_test1 WHERE b ~* 'bc';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . " 4 | ABC\n" + . "(4 rows)\n" + . "\n" + ], + ["CREATE TABLE collate_test6 (a int, b text);", "CREATE TABLE\n", ""], + [ + "INSERT INTO collate_test6 VALUES " + . "(1, 'abc'), (2, 'ABC'), (3, '123'), (4, 'ab1'), " + . "(5, 'a1!'), (6, 'a c'), (7, '!.;'), (8, ' '), " + . "(9, 'äbç'), (10, 'ÄBÇ');", + "INSERT 0 10\n" + ], + [ + "SELECT b, " + . "b ~ '^[[:alpha:]]+\$' AS is_alpha, " + . "b ~ '^[[:upper:]]+\$' AS is_upper, " + . "b ~ '^[[:lower:]]+\$' AS is_lower, " + . "b ~ '^[[:digit:]]+\$' AS is_digit, " + . "b ~ '^[[:alnum:]]+\$' AS is_alnum, " + . "b ~ '^[[:graph:]]+\$' AS is_graph, " + . "b ~ '^[[:print:]]+\$' AS is_print, " + . "b ~ '^[[:punct:]]+\$' AS is_punct, " + . "b ~ '^[[:space:]]+\$' AS is_space " + . "FROM collate_test6;", + " b | is_alpha | is_upper | is_lower | is_digit | is_alnum | is_graph | is_print | is_punct | is_space \n" + . "-----+----------+----------+----------+----------+----------+----------+----------+----------+----------\n" + . " abc | t | f | t | f | t | t | t | f | f\n" + . " ABC | t | t | f | f | t | t | t | f | f\n" + . " 123 | f | f | f | t | t | t | t | f | f\n" + . " ab1 | f | f | f | f | t | t | t | f | f\n" + . " a1! | f | f | f | f | f | t | t | f | f\n" + . " a c | f | f | f | f | f | f | t | f | f\n" + . " !.; | f | f | f | f | f | t | t | t | f\n" + . " | f | f | f | f | f | f | t | f | t\n" + . " äbç | t | f | t | f | t | t | t | f | f\n" + . " ÄBÇ | t | t | f | f | t | t | t | f | f\n" + . "(10 rows)\n" + . "\n" + ], + [ + "SELECT 'Türkiye' ~* 'KI' AS \"true\";", + " true \n" + . "------\n" + . " t\n" + . "(1 row)\n" + . "\n" + ], + [ + "SELECT 'bıt' ~* 'BIT' AS \"false\";", + " false \n" + . "-------\n" + . " f\n" + . "(1 row)\n" + . "\n" + ], + [ + "SELECT a, lower(coalesce(x, 'foo')) FROM collate_test10;", + " a | lower \n" + . "---+-------\n" + . " 1 | hij\n" + . " 2 | hij\n" + . "(2 rows)\n" + . "\n" + ], + [ + "SELECT a, b, greatest(b, 'CCC') FROM collate_test1 ORDER BY 3;", + " a | b | greatest \n" + . "---+-----+----------\n" + . " 1 | abc | CCC\n" + . " 2 | äbc | CCC\n" + . " 3 | bbc | CCC\n" + . " 4 | ABC | CCC\n" + . "(4 rows)\n" + . "\n" + ], + [ + "SELECT a, x, lower(greatest(x, 'foo')) FROM collate_test10;", + " a | x | lower \n" + . "---+-----+-------\n" + . " 1 | hij | hij\n" + . " 2 | HIJ | hij\n" + . "(2 rows)\n" + . "\n" + ], + [ + "SELECT a, nullif(b, 'abc') FROM collate_test1 ORDER BY 2;", + " a | nullif \n" + . "---+--------\n" + . " 4 | ABC\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . " 1 | \n" + . "(4 rows)\n" + . "\n" + ], + [ + "SELECT a, lower(nullif(x, 'foo')) FROM collate_test10;", + " a | lower \n" + . "---+-------\n" + . " 1 | hij\n" + . " 2 | hij\n" + . "(2 rows)\n" + . "\n" + ], + [ + "SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END " + . "FROM collate_test1 ORDER BY 2;", + " a | b \n" + . "---+------\n" + . " 4 | ABC\n" + . " 2 | äbc\n" + . " 1 | abcd\n" + . " 3 | bbc\n" + . "(4 rows)\n" + . "\n" + ], + ["CREATE DOMAIN testdomain AS text;", "CREATE DOMAIN\n", ""], + [ + "SELECT a, b::testdomain FROM collate_test1 ORDER BY 2;", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 4 | ABC\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . "(4 rows)\n" + . "\n" + ], + [ + "SELECT a, lower(x::testdomain) FROM collate_test10;", + " a | lower \n" + . "---+-------\n" + . " 1 | hij\n" + . " 2 | hij\n" + . "(2 rows)\n" + . "\n" + ], + [ + "SELECT min(b), max(b) FROM collate_test1;", + " min | max \n" + . "-----+-----\n" + . " abc | bbc\n" + . "(1 row)\n" + . "\n", + "" + ], + [ + "SELECT array_agg(b ORDER BY b) FROM collate_test1;", + " array_agg \n" + . "-------------------\n" + . " {abc,ABC,äbc,bbc}\n" + . "(1 row)\n" + . "\n" + ], + [ + "SELECT a, b FROM collate_test1 " + . "UNION ALL " + . "SELECT a, b FROM collate_test1 ORDER BY 2;", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 1 | abc\n" + . " 4 | ABC\n" + . " 4 | ABC\n" + . " 2 | äbc\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . " 3 | bbc\n" + . "(8 rows)\n" + . "\n" + ], + # casting + [ + "SELECT a, CAST(b AS varchar) FROM collate_test1 ORDER BY 2;", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 4 | ABC\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . "(4 rows)\n" + . "\n" + ], + # propagation of collation in SQL functions (inlined and non-inlined + # cases) and plpgsql functions too + [ + "CREATE FUNCTION mylt (text, text) RETURNS boolean LANGUAGE sql " + . "AS \$\$ select \$1 < \$2 \$\$;", + "CREATE FUNCTION\n" + ], + [ + "CREATE FUNCTION mylt_noninline (text, text) " + . "RETURNS boolean LANGUAGE sql " + . "AS \$\$ select \$1 < \$2 limit 1 \$\$;", + "CREATE FUNCTION\n" + ], + [ + "CREATE FUNCTION mylt_plpgsql (text, text) " + . "RETURNS boolean LANGUAGE plpgsql " + . "AS \$\$ begin return \$1 < \$2; end \$\$;", + "CREATE FUNCTION\n" + ], + [ + "SELECT a.b AS a, b.b AS b, a.b < b.b AS lt, " + . "mylt(a.b, b.b), mylt_noninline(a.b, b.b), mylt_plpgsql(a.b, b.b) " + . "FROM collate_test1 a, collate_test1 b " + . "ORDER BY a.b, b.b;", + " a | b | lt | mylt | mylt_noninline | mylt_plpgsql \n" + . "-----+-----+----+------+----------------+--------------\n" + . " abc | abc | f | f | f | f\n" + . " abc | ABC | t | t | t | t\n" + . " abc | äbc | t | t | t | t\n" + . " abc | bbc | t | t | t | t\n" + . " ABC | abc | f | f | f | f\n" + . " ABC | ABC | f | f | f | f\n" + . " ABC | äbc | t | t | t | t\n" + . " ABC | bbc | t | t | t | t\n" + . " äbc | abc | f | f | f | f\n" + . " äbc | ABC | f | f | f | f\n" + . " äbc | äbc | f | f | f | f\n" + . " äbc | bbc | t | t | t | t\n" + . " bbc | abc | f | f | f | f\n" + . " bbc | ABC | f | f | f | f\n" + . " bbc | äbc | f | f | f | f\n" + . " bbc | bbc | f | f | f | f\n" + . "(16 rows)\n" + . "\n" + ], + # polymorphism + [ + "SELECT * FROM unnest(" + . "(SELECT array_agg(b ORDER BY b) FROM collate_test1)" + . ") ORDER BY 1;", + " unnest \n" + . "--------\n" + . " abc\n" + . " ABC\n" + . " äbc\n" + . " bbc\n" + . "(4 rows)\n" + . "\n" + ], + [ + "CREATE FUNCTION dup (anyelement) RETURNS anyelement " + . "AS 'select \$1' LANGUAGE sql;", + "CREATE FUNCTION\n" + ], + [ + "SELECT a, dup(b) FROM collate_test1 ORDER BY 2;", + " a | dup \n" + . "---+-----\n" + . " 1 | abc\n" + . " 4 | ABC\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . "(4 rows)\n" + . "\n" + ], + # indexes + [ + "CREATE INDEX collate_test1_idx1 ON collate_test1 (b);", + "CREATE INDEX\n" + ] + ) +); + +$node->stop; diff --git a/src/test/default_collation/icu/.gitignore b/src/test/default_collation/icu/.gitignore new file mode 100644 index 0000000..871e943 --- /dev/null +++ b/src/test/default_collation/icu/.gitignore @@ -0,0 +1,2 @@ +# Generated by test suite +/tmp_check/ diff --git a/src/test/default_collation/icu/Makefile b/src/test/default_collation/icu/Makefile new file mode 100644 index 0000000..5ee91d8 --- /dev/null +++ b/src/test/default_collation/icu/Makefile @@ -0,0 +1,11 @@ +# src/test/default_collation/icu/Makefile + +subdir = src/test/default_collation/icu +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global + +check: + $(prove_check) + +clean distclean maintainer-clean: + rm -rf tmp_check diff --git a/src/test/default_collation/icu/t/001_default_collation.pl b/src/test/default_collation/icu/t/001_default_collation.pl new file mode 100644 index 0000000..8b58be3 --- /dev/null +++ b/src/test/default_collation/icu/t/001_default_collation.pl @@ -0,0 +1,605 @@ +use strict; +use warnings; + +use Config; +use PostgresNode; +use TestLib; +use Test::More; + +my $tempdir = TestLib::tempdir; +my $datadir = "$tempdir/data"; + +# check whether ICU can convert C locale to a language tag + +my ($in_initdb, $out_initdb, $err_initdb); +my @command = (qw(initdb -A trust -N -D), $datadir, "--locale=C\@icu"); +print "# Running: " . join(" ", @command) . "\n"; +my $result = IPC::Run::run \@command, \$in_initdb, \$out_initdb, \$err_initdb; + +my $c_to_icu_language_tag = ( + not $err_initdb =~ /ICU error: could not convert locale name "C" to language tag: U_ILLEGAL_ARGUMENT_ERROR/); + +# get the number of tests + +plan tests => $c_to_icu_language_tag ? 124 : 110; + +# test initdb + +sub test_initdb +{ + my ($test_name, $options, $expected_collprovider, $error_message) = @_; + my ($in_initdb, $out_initdb, $err_initdb); + + mkdir $datadir; + + my @command = (qw(initdb -A trust -N -D), $datadir, split(" ", $options)); + print "# Running: " . join(" ", @command) . "\n"; + my $result = IPC::Run::run \@command, \$in_initdb, \$out_initdb, + \$err_initdb; + + if ($error_message) + { + like($err_initdb, + qr{$error_message}, + "initdb: $test_name: check error message"); + } + else + { + ok($result, "\"@command\" exit code 0"); + is($err_initdb, "", "\"@command\" no stderr"); + like($out_initdb, + qr{The default collation provider is \"$expected_collprovider\"\.}, + "initdb: $test_name: check output"); + } + + File::Path::rmtree $datadir; +} + +# --locale + +test_initdb( + "empty libc locale", + "--locale=\@libc", + "libc", + ""); + +test_initdb( + "C locale without collation provider", + "--locale=C", + "libc", + ""); + +test_initdb( + "POSIX locale without collation provider", + "--locale=POSIX", + "libc", + ""); + +test_initdb( + "C libc locale", + "--locale=C\@libc", + "libc", + ""); + +test_initdb( + "POSIX libc locale", + "--locale=POSIX\@libc", + "libc", + ""); + +test_initdb( + "C icu locale", + "--locale=C\@icu", + "", + ($c_to_icu_language_tag ? + "selected encoding \\(SQL_ASCII\\) is not supported for ICU locales" : + "ICU error: could not convert locale name \"C\" to language tag: U_ILLEGAL_ARGUMENT_ERROR")); + +test_initdb( + "POSIX icu locale", + "--locale=POSIX\@icu", + "", + ($c_to_icu_language_tag ? + "selected encoding \\(SQL_ASCII\\) is not supported for ICU locales" : + "ICU error: could not convert locale name \"C\" to language tag: U_ILLEGAL_ARGUMENT_ERROR")); + +test_initdb( + "C locale too many modifiers", + "--locale=C\@icu\@libc", + "", + "invalid locale name \"C\@icu\""); + +test_initdb( + "ICU language tag format locale", + "--locale=und-x-icu", + "", + "invalid locale name \"und-x-icu\""); + +# --lc-collate with the same --lc-ctype if needed + +test_initdb( + "empty libc lc_collate", + "--lc-collate=\@libc", + "libc", + ""); + +test_initdb( + "C lc_collate without collation provider", + "--lc-collate=C", + "libc", + ""); + +test_initdb( + "POSIX lc_collate without collation provider", + "--lc-collate=POSIX", + "libc", + ""); + +test_initdb( + "C libc lc_collate", + "--lc-collate=C\@libc", + "libc", + ""); + +test_initdb( + "POSIX libc lc_collate", + "--lc-collate=POSIX\@libc", + "libc", + ""); + +test_initdb( + "C icu lc_collate", + "--lc-collate=C\@icu --lc-ctype=C", + "", + ($c_to_icu_language_tag ? + "selected encoding \\(SQL_ASCII\\) is not supported for ICU locales" : + "ICU error: could not convert locale name \"C\" to language tag: U_ILLEGAL_ARGUMENT_ERROR"));; + +test_initdb( + "POSIX icu lc_collate", + "--lc-collate=POSIX\@icu --lc-ctype=POSIX", + "", + ($c_to_icu_language_tag ? + "selected encoding \\(SQL_ASCII\\) is not supported for ICU locales" : + "ICU error: could not convert locale name \"C\" to language tag: U_ILLEGAL_ARGUMENT_ERROR"));; + +test_initdb( + "C lc_collate too many modifiers", + "--lc-collate=C\@icu\@libc", + "", + "invalid locale name \"C\@icu\""); + +test_initdb( + "ICU language tag format lc_collate", + "--lc-collate=und-x-icu", + "", + "invalid locale name \"und-x-icu\""); + +# --locale & --lc-collate + +test_initdb( + "lc_collate implicit provider takes precedence", + "--locale=\@icu --lc-collate=C", + "libc", + ""); + +test_initdb( + "lc_collate explicit provider takes precedence", + "--locale=C\@libc --lc-collate=C\@icu", + "", + ($c_to_icu_language_tag ? + "selected encoding \\(SQL_ASCII\\) is not supported for ICU locales" : + "ICU error: could not convert locale name \"C\" to language tag: U_ILLEGAL_ARGUMENT_ERROR")); + +# test createdb and CREATE DATABASE + +sub test_createdb +{ + my ($test_name, $options, $expected_collprovider, $error_message) = @_; + my (@command, $result, $in_command, $out_command, $err_command); + + @command = ("createdb", + split(" ", $options), + "--template=template0", + "mydb"); + + print "# Running: " . join(" ", @command) . "\n"; + $result = IPC::Run::run \@command, \$in_command, \$out_command, + \$err_command; + + if ($error_message) + { + like($err_command, + qr{$error_message}, + "createdb: $test_name: check error message"); + } + else + { + ok($result, "\"@command\" exit code 0"); + is($err_command, "", "\"@command\" no stderr"); + + @command = ( + "psql", + "-c", + "select datcollate from pg_database where datname = 'mydb';"); + print "# Running: " . join(" ", @command) . "\n"; + $result = IPC::Run::run \@command, \$in_command, \$out_command, + \$err_command; + + if ($expected_collprovider eq "libc") + { + like($out_command, + qr{\@$expected_collprovider\n}, + "createdb: $test_name: check pg_database.datcollate"); + } + elsif ($expected_collprovider eq "icu") + { + like($out_command, + qr{\@$expected_collprovider([\.\d]+)?\n}, + "createdb: $test_name: check pg_database.datcollate"); + } + + @command = ("dropdb mydb"); + print "# Running: " . join(" ", @command) . "\n"; + system(@command); + } +} + +sub test_create_database +{ + my ($test_name, + $createdb_options, + $psql_options, + $expected_collprovider, + $error_message) = @_; + my (@command, $result, $in_command, $out_command, $err_command); + + @command = ("psql", + split(" ", $psql_options), + "-c", + "create database mydb " + . $createdb_options + . " template = template0;"); + print "# Running: " . join(" ", @command) . "\n"; + $result = IPC::Run::run \@command, \$in_command, \$out_command, + \$err_command; + + if ($error_message) + { + like($err_command, + qr{$error_message}, + "CREATE DATABASE: $test_name: check error message"); + } + else + { + ok($result, "\"@command\" exit code 0"); + is($err_command, "", "\"@command\" no stderr"); + like($out_command, qr{CREATE DATABASE}, "\"@command\" check output"); + + @command = ( + "psql", + "-c", + "select datcollate from pg_database where datname = 'mydb';"); + print "# Running: " . join(" ", @command) . "\n"; + $result = IPC::Run::run \@command, \$in_command, \$out_command, + \$err_command; + + if ($expected_collprovider eq "libc") + { + like($out_command, + qr{\@$expected_collprovider\n}, + "CREATE DATABASE: $test_name: check pg_database.datcollate"); + } + elsif ($expected_collprovider eq "icu") + { + like($out_command, + qr{\@$expected_collprovider([\.\d]+)?\n}, + "CREATE DATABASE: $test_name: check pg_database.datcollate"); + } + + @command = ("dropdb mydb"); + print "# Running: " . join(" ", @command) . "\n"; + system(@command); + } +} + +my $node = get_new_node('main'); +$node->init; +$node->start; +local $ENV{PGPORT} = $node->port; + +@command = ("createuser --createdb --no-superuser non_superuser"); +print "# Running: " . join(" ", @command) . "\n"; +system(@command); + +# test createdb + +# --locale + +test_createdb( + "empty libc locale", + "--locale=\@libc", + "libc", + ""); + +test_createdb( + "C locale without collation provider", + "--locale=C", + "libc", + ""); + +test_createdb( + "POSIX locale without collation provider", + "--locale=POSIX", + "libc", + ""); + +test_createdb( + "C libc locale", + "--locale=C\@libc", + "libc", + ""); + +test_createdb( + "POSIX libc locale", + "--locale=POSIX\@libc", + "libc", + ""); + +if ($c_to_icu_language_tag) +{ + test_createdb( + "C icu locale with SQL_ASCII encoding and superuser", + "--locale=C\@icu --encoding=SQL_ASCII", + "icu", + ""); +} +else +{ + test_createdb( + "C icu locale with SQL_ASCII encoding and superuser", + "--locale=C\@icu --encoding=SQL_ASCII", + "", + "ICU error: could not convert locale name \"C\" to language tag: U_ILLEGAL_ARGUMENT_ERROR"); +} + +test_createdb( + "C icu locale with SQL_ASCII encoding and non-superuser", + "--locale=C\@icu --encoding=SQL_ASCII --username=non_superuser", + "", + "encoding \"SQL_ASCII\" is not supported for ICU locales"); + +if ($c_to_icu_language_tag) +{ + test_createdb( + "POSIX icu locale with SQL_ASCII encoding and superuser", + "--locale=POSIX\@icu --encoding=SQL_ASCII", + "icu", + ""); +} +else +{ + test_createdb( + "POSIX icu locale with SQL_ASCII encoding and superuser", + "--locale=POSIX\@icu --encoding=SQL_ASCII", + "", + "ICU error: could not convert locale name \"C\" to language tag: U_ILLEGAL_ARGUMENT_ERROR"); +} + +test_createdb( + "POSIX icu locale with SQL_ASCII encoding and non-superuser", + "--locale=POSIX\@icu --encoding=SQL_ASCII --username=non_superuser", + "", + "encoding \"SQL_ASCII\" is not supported for ICU locales"); + +test_createdb( + "C locale too many modifiers", + "--locale=C\@icu\@libc", + "", + "invalid locale name: \"C\@icu\""); + +test_createdb( + "ICU language tag format locale", + "--locale=und-x-icu", + "", + "invalid locale name: \"und-x-icu\""); + +# --lc-collate with the same --lc-ctype if needed + +test_createdb( + "empty libc lc_collate", + "--lc-collate=\@libc", + "libc", + ""); + +test_createdb( + "C lc_collate without collation provider", + "--lc-collate=C --lc-ctype=C", + "libc", + ""); + +test_createdb( + "POSIX lc_collate without collation provider", + "--lc-collate=POSIX --lc-ctype=POSIX", + "libc", + ""); + +test_createdb( + "C libc lc_collate", + "--lc-collate=C\@libc --lc-ctype=C", + "libc", + ""); + +test_createdb( + "POSIX libc lc_collate", + "--lc-collate=POSIX\@libc --lc-ctype=POSIX", + "libc", + ""); + +if ($c_to_icu_language_tag) +{ + test_createdb( + "C icu lc_collate with SQL_ASCII encoding and superuser", + "--lc-collate=C\@icu --lc-ctype=C --encoding=SQL_ASCII", + "icu", + ""); +} +else +{ + test_createdb( + "C icu lc_collate with SQL_ASCII encoding and superuser", + "--lc-collate=C\@icu --lc-ctype=C --encoding=SQL_ASCII", + "", + "ICU error: could not convert locale name \"C\" to language tag: U_ILLEGAL_ARGUMENT_ERROR"); +} + +test_createdb( + "C icu lc_collate with SQL_ASCII encoding and non-superuser", + "--lc-collate=C\@icu --lc-ctype=C --encoding=SQL_ASCII " + . "--username=non_superuser", + "", + "encoding \"SQL_ASCII\" is not supported for ICU locales"); + +if ($c_to_icu_language_tag) +{ + test_createdb( + "POSIX icu lc_collate with SQL_ASCII encoding and superuser", + "--lc-collate=POSIX\@icu --lc-ctype=POSIX --encoding=SQL_ASCII", + "icu", + ""); + +} +else +{ + test_createdb( + "POSIX icu lc_collate with SQL_ASCII encoding and superuser", + "--lc-collate=POSIX\@icu --lc-ctype=POSIX --encoding=SQL_ASCII", + "", + "ICU error: could not convert locale name \"C\" to language tag: U_ILLEGAL_ARGUMENT_ERROR"); +} + +test_createdb( + "POSIX icu lc_collate with SQL_ASCII encoding and non-superuser", + "--lc-collate=POSIX\@icu --lc-ctype=POSIX --encoding=SQL_ASCII " + . "--username=non_superuser", + "", + "encoding \"SQL_ASCII\" is not supported for ICU locales"); + +test_createdb( + "C lc_collate too many modifiers", + "--lc-collate=C\@icu\@libc", + "", + "invalid locale name: \"C\@icu\""); + +test_createdb( + "ICU language tag format lc_collate", + "--lc-collate=und-x-icu", + "", + "invalid locale name: \"und-x-icu\""); + +# test CREATE DATABASE + +# LC_COLLATE with the same LC_CTYPE if needed + +test_create_database( + "empty libc lc_collate", + "LC_COLLATE = '\@libc'", + "", + "libc", + ""); + +test_create_database( + "C lc_collate without collation provider", + "LC_COLLATE = 'C' LC_CTYPE = 'C'", + "", + "libc", + ""); + +test_create_database( + "POSIX lc_collate without collation provider", + "LC_COLLATE = 'POSIX' LC_CTYPE = 'POSIX'", + "", + "libc", + ""); + +test_create_database( + "C libc lc_collate", + "LC_COLLATE = 'C\@libc' LC_CTYPE = 'C'", + "", + "libc", + ""); + +test_create_database( + "POSIX libc lc_collate", + "LC_COLLATE = 'POSIX\@libc' LC_CTYPE = 'POSIX'", + "", + "libc", + ""); + +if ($c_to_icu_language_tag) +{ + test_create_database( + "C icu lc_collate with SQL_ASCII encoding and superuser", + "LC_COLLATE = 'C\@icu' LC_CTYPE = 'C' ENCODING = 'SQL_ASCII'", + "", + "icu", + ""); +} +else +{ + test_create_database( + "C icu lc_collate with SQL_ASCII encoding and superuser", + "LC_COLLATE = 'C\@icu' LC_CTYPE = 'C' ENCODING = 'SQL_ASCII'", + "", + "", + "ICU error: could not convert locale name \"C\" to language tag: U_ILLEGAL_ARGUMENT_ERROR"); +} + +test_create_database( + "C icu lc_collate with SQL_ASCII encoding and non-superuser", + "LC_COLLATE = 'C\@icu' LC_CTYPE = 'C' ENCODING = 'SQL_ASCII'", + "--username=non_superuser", + "", + "encoding \"SQL_ASCII\" is not supported for ICU locales"); + +if ($c_to_icu_language_tag) +{ + test_create_database( + "POSIX icu lc_collate with SQL_ASCII encoding and superuser", + "LC_COLLATE = 'POSIX\@icu' LC_CTYPE = 'POSIX' ENCODING = 'SQL_ASCII'", + "", + "icu", + ""); +} +else +{ + test_create_database( + "POSIX icu lc_collate with SQL_ASCII encoding and superuser", + "LC_COLLATE = 'POSIX\@icu' LC_CTYPE = 'POSIX' ENCODING = 'SQL_ASCII'", + "", + "", + "ICU error: could not convert locale name \"C\" to language tag: U_ILLEGAL_ARGUMENT_ERROR"); +} + +test_create_database( + "POSIX icu lc_collate with SQL_ASCII encoding and non-superuser", + "LC_COLLATE = 'POSIX\@icu' LC_CTYPE = 'POSIX' ENCODING = 'SQL_ASCII'", + "--username=non_superuser", + "", + "encoding \"SQL_ASCII\" is not supported for ICU locales"); + +test_create_database( + "C lc_collate too many modifiers", + "LC_COLLATE = 'C\@icu\@libc'", + "", + "", + "invalid locale name: \"C\@icu\""); + +test_create_database( + "ICU language tag format lc_collate", + "LC_COLLATE = 'und-x-icu'", + "", + "", + "invalid locale name: \"und-x-icu\""); + +$node->stop; diff --git a/src/test/default_collation/libc.utf8/.gitignore b/src/test/default_collation/libc.utf8/.gitignore new file mode 100644 index 0000000..871e943 --- /dev/null +++ b/src/test/default_collation/libc.utf8/.gitignore @@ -0,0 +1,2 @@ +# Generated by test suite +/tmp_check/ diff --git a/src/test/default_collation/libc.utf8/Makefile b/src/test/default_collation/libc.utf8/Makefile new file mode 100644 index 0000000..e5b9d20 --- /dev/null +++ b/src/test/default_collation/libc.utf8/Makefile @@ -0,0 +1,11 @@ +# src/test/default_collation/libc.utf8/Makefile + +subdir = src/test/default_collation/libc.utf8 +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global + +check: + $(prove_check) + +clean distclean maintainer-clean: + rm -rf tmp_check diff --git a/src/test/default_collation/libc.utf8/t/001_default_collation.pl b/src/test/default_collation/libc.utf8/t/001_default_collation.pl new file mode 100644 index 0000000..e4b3552 --- /dev/null +++ b/src/test/default_collation/libc.utf8/t/001_default_collation.pl @@ -0,0 +1,703 @@ +use strict; +use warnings; + +use Config; +use PostgresNode; +use TestLib; +use Test::More tests => 168; + +my $tempdir = TestLib::tempdir; +my $datadir = "$tempdir/data"; + +# test initdb + +sub test_initdb +{ + my ($test_name, $options, $error_message) = @_; + my ($in_initdb, $out_initdb, $err_initdb); + + mkdir $datadir; + + my @command = (qw(initdb -A trust -N -D), $datadir, split(" ", $options)); + print "# Running: " . join(" ", @command) . "\n"; + my $result = IPC::Run::run \@command, \$in_initdb, \$out_initdb, + \$err_initdb; + + if ($error_message) + { + like($err_initdb, + qr{$error_message}, + "initdb: $test_name: check error message"); + } + else + { + ok($result, "\"@command\" exit code 0"); + is($err_initdb, "", "\"@command\" no stderr"); + like($out_initdb, + qr{The default collation provider is \"libc\"\.}, + "initdb: $test_name: check output"); + } + + File::Path::rmtree $datadir; +} + +sub psql +{ + my ($command, $db) = @_; + my ($result, $in, $out, $err); + my @psql = ('psql', '-X', '-c', $command); + if (defined($db)) + { + push(@psql, $db); + } + print "# Running: " . join(" ", @psql) . "\n"; + $result = IPC::Run::run \@psql, \$in, \$out, \$err; + ($result, $out, $err); +} + +# --locale + +test_initdb( + "be_BY\@latin libc locale", + "--locale=be_BY\@latin\@libc", + ""); + +test_initdb( + "be_BY\@latin libc locale invalid modifier order", + "--locale=be_BY\@libc\@latin", + "invalid locale name \"be_BY\@libc\@latin\""); + +# --lc-collate + +test_initdb( + "be_BY\@latin libc lc_collate", + "--lc-collate=be_BY\@latin\@libc", + ""); + +test_initdb( + "be_BY\@latin libc lc_collate invalid modifier order", + "--lc-collate=be_BY\@libc\@latin", + "invalid locale name \"be_BY\@libc\@latin\" \\(provider \"libc\"\\)"); + +# test createdb, CREATE DATABASE and default collation behaviour + +sub test_createdb +{ + my ($test_name, $options, $from_template0, $error_message) = @_; + my (@command, $result, $in_command, $out_command, $err_command); + + if ($from_template0) + { + $options = $options . " --template=template0"; + } + + @command = ("createdb", split(" ", $options), "mydb"); + print "# Running: " . join(" ", @command) . "\n"; + $result = IPC::Run::run \@command, \$in_command, \$out_command, + \$err_command; + + if ($error_message) + { + like($err_command, + qr{$error_message}, + "createdb: $test_name: check error message"); + } + else + { + ok($result, "\"@command\" exit code 0"); + is($err_command, "", "\"@command\" no stderr"); + + ($result, $out_command, $err_command) = psql( + "select datcollate from pg_database where datname = 'mydb';"); + + like($out_command, + qr{\@libc\n}, + "createdb: $test_name: check pg_database.datcollate"); + + @command = ("dropdb mydb"); + print "# Running: " . join(" ", @command) . "\n"; + system(@command); + } +} + +sub test_create_database +{ + my ($test_name, $options, $from_template0, $error_message) = @_; + my (@command, $result, $in_command, $out_command, $err_command); + + ($result, $out_command, $err_command) = psql( + "create database mydb " + . $options + . ($from_template0 ? " TEMPLATE = template0;" : ";")); + + if ($error_message) + { + like($err_command, + qr{$error_message}, + "CREATE DATABASE: $test_name: check error message"); + } + else + { + ok($result, "\"@command\" exit code 0"); + is($err_command, "", "\"@command\" no stderr"); + like($out_command, qr{CREATE DATABASE}, "\"@command\" check output"); + + ($result, $out_command, $err_command) = psql( + "select datcollate from pg_database where datname = 'mydb';"); + + like($out_command, + qr{\@libc\n}, + "CREATE DATABASE: $test_name: check pg_database.datcollate"); + + @command = ("dropdb mydb"); + print "# Running: " . join(" ", @command) . "\n"; + system(@command); + } +} + +sub test_default_collation +{ + my ($createdb_options, $collation, @commands) = @_; + my (@command, $result, $in_command, $out_command, $err_command); + + @command = ("createdb", split(" ", $createdb_options), "mydb"); + print "# Running: " . join(" ", @command) . "\n"; + $result = IPC::Run::run \@command, \$in_command, \$out_command, + \$err_command; + + ok($result, "\"@command\" exit code 0"); + is($err_command, "", "\"@command\" no stderr"); + + ($result, $out_command, $err_command) = psql( + "select datcollate from pg_database where datname = 'mydb';"); + + like($out_command, qr{\@libc\n}, "\"@command\" check output"); + + for (my $row = 0; $row <= $#commands; $row++) + { + my ($command_text, $expected) = @{$commands[$row]}; + ($result, $out_command, $err_command) = psql($command_text, "mydb"); + + ok($result, "\"@command\" exit code 0"); + is($err_command, "", "\"@command\" no stderr"); + if ($out_command) + { + is( + $out_command, + $expected, + "default collation " + . $collation + . ": \"" + . $command_text + . "\" check output"); + } + } + + @command = ("dropdb mydb"); + print "# Running: " . join(" ", @command) . "\n"; + system(@command); +} + +my $node = get_new_node('main'); +$node->init; +$node->start; +local $ENV{PGPORT} = $node->port; + +# test createdb + +# --locale + +test_createdb( + "be_BY\@latin libc locale", + "--locale=be_BY\@latin\@libc", + 1, + ""); + +test_createdb( + "be_BY\@latin libc locale invalid modifier order", + "--locale=be_BY\@libc\@latin", + 1, + "invalid locale name: \"be_BY\@libc\@latin\""); + +# --lc-collate + +test_createdb( + "be_BY\@latin libc lc_collate", + "--lc-collate=be_BY\@latin\@libc", + 1, + ""); + +test_createdb( + "be_BY\@latin libc lc_collate invalid modifier order", + "--lc-collate=be_BY\@libc\@latin", + 1, + "invalid locale name: \"be_BY\@libc\@latin\" \\(provider \"libc\"\\)"); + +# test CREATE DATABASE + +# LC_COLLATE + +test_create_database( + "be_BY\@latin libc lc_collate", + "LC_COLLATE = 'be_BY\@latin\@libc'", + 1, + ""); + +test_create_database( + "be_BY\@latin libc lc_collate invalid modifier order", + "LC_COLLATE = 'be_BY\@libc\@latin'", + 1, + "invalid locale name: \"be_BY\@libc\@latin\" \\(provider \"libc\"\\)"); + +# test default collation behaviour +# use commands and outputs from the regression test collate.linux.utf8 + +test_default_collation( + "--lc-collate=en_US.utf8\@libc --template=template0", + "en_US.utf8\@libc", + ( + [ + "CREATE TABLE collate_test1 (a int, b text NOT NULL);", + "CREATE TABLE\n" + ], + [ + "INSERT INTO collate_test1 VALUES " + . "(1, 'abc'), (2, 'äbc'), (3, 'bbc'), (4, 'ABC');", + "INSERT 0 4\n"], + [ + "SELECT * FROM collate_test1 WHERE b >= 'bbc';", + " a | b \n" + . "---+-----\n" + . " 3 | bbc\n" + . "(1 row)\n" + . "\n" + ], + [ + "SELECT a, b FROM collate_test1 ORDER BY b;", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 4 | ABC\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . "(4 rows)\n" + . "\n" + ], + # star expansion + [ + "SELECT * FROM collate_test1 ORDER BY b;", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 4 | ABC\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . "(4 rows)\n" + . "\n" + ], + # upper/lower + ["CREATE TABLE collate_test10 (a int, x text);", "CREATE TABLE\n", ""], + [ + "INSERT INTO collate_test10 VALUES (1, 'hij'), (2, 'HIJ');", + "INSERT 0 2\n" + ], + [ + "SELECT a, lower(x), upper(x), initcap(x) FROM collate_test10;", + " a | lower | upper | initcap \n" + . "---+-------+-------+---------\n" + . " 1 | hij | HIJ | Hij\n" + . " 2 | hij | HIJ | Hij\n" + . "(2 rows)\n" + . "\n" + ], + # LIKE/ILIKE + [ + "SELECT * FROM collate_test1 WHERE b LIKE 'abc';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . "(1 row)\n" + . "\n" + ], + [ + "SELECT * FROM collate_test1 WHERE b LIKE 'abc%';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . "(1 row)\n" + . "\n" + ], + [ + "SELECT * FROM collate_test1 WHERE b LIKE '%bc%';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . "(3 rows)\n" + . "\n" + ], + [ + "SELECT * FROM collate_test1 WHERE b ILIKE 'abc';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 4 | ABC\n" + . "(2 rows)\n" + . "\n" + ], + [ + "SELECT * FROM collate_test1 WHERE b ILIKE 'abc%';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 4 | ABC\n" + . "(2 rows)\n" + . "\n" + ], + [ + "SELECT * FROM collate_test1 WHERE b ILIKE '%bc%';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . " 4 | ABC\n" + . "(4 rows)\n" + . "\n" + ], + [ + "SELECT 'Türkiye' ILIKE '%KI%' AS \"true\";", + " true \n" + . "------\n" + . " t\n" + . "(1 row)\n" + . "\n" + ], + [ + "SELECT 'bıt' ILIKE 'BIT' AS \"false\";", + " false \n" + . "-------\n" + . " f\n" + . "(1 row)\n" + . "\n" + ], + # regular expressions + [ + "SELECT * FROM collate_test1 WHERE b ~ '^abc\$';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . "(1 row)\n" + . "\n" + ], + [ + "SELECT * FROM collate_test1 WHERE b ~ '^abc';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . "(1 row)\n" + . "\n" + ], + [ + "SELECT * FROM collate_test1 WHERE b ~ 'bc';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . "(3 rows)\n" + . "\n" + ], + [ + "SELECT * FROM collate_test1 WHERE b ~* '^abc\$';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 4 | ABC\n" + . "(2 rows)\n" + . "\n" + ], + [ + "SELECT * FROM collate_test1 WHERE b ~* '^abc';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 4 | ABC\n" + . "(2 rows)\n" + . "\n" + ], + [ + "SELECT * FROM collate_test1 WHERE b ~* 'bc';", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . " 4 | ABC\n" + . "(4 rows)\n" + . "\n" + ], + ["CREATE TABLE collate_test6 (a int, b text);", "CREATE TABLE\n", ""], + [ + "INSERT INTO collate_test6 VALUES " + . "(1, 'abc'), (2, 'ABC'), (3, '123'), (4, 'ab1'), " + . "(5, 'a1!'), (6, 'a c'), (7, '!.;'), (8, ' '), " + . "(9, 'äbç'), (10, 'ÄBÇ');", + "INSERT 0 10\n" + ], + [ + "SELECT b, " + . "b ~ '^[[:alpha:]]+\$' AS is_alpha, " + . "b ~ '^[[:upper:]]+\$' AS is_upper, " + . "b ~ '^[[:lower:]]+\$' AS is_lower, " + . "b ~ '^[[:digit:]]+\$' AS is_digit, " + . "b ~ '^[[:alnum:]]+\$' AS is_alnum, " + . "b ~ '^[[:graph:]]+\$' AS is_graph, " + . "b ~ '^[[:print:]]+\$' AS is_print, " + . "b ~ '^[[:punct:]]+\$' AS is_punct, " + . "b ~ '^[[:space:]]+\$' AS is_space " + . "FROM collate_test6;", + " b | is_alpha | is_upper | is_lower | is_digit | is_alnum | is_graph | is_print | is_punct | is_space \n" + . "-----+----------+----------+----------+----------+----------+----------+----------+----------+----------\n" + . " abc | t | f | t | f | t | t | t | f | f\n" + . " ABC | t | t | f | f | t | t | t | f | f\n" + . " 123 | f | f | f | t | t | t | t | f | f\n" + . " ab1 | f | f | f | f | t | t | t | f | f\n" + . " a1! | f | f | f | f | f | t | t | f | f\n" + . " a c | f | f | f | f | f | f | t | f | f\n" + . " !.; | f | f | f | f | f | t | t | t | f\n" + . " | f | f | f | f | f | f | t | f | t\n" + . " äbç | t | f | t | f | t | t | t | f | f\n" + . " ÄBÇ | t | t | f | f | t | t | t | f | f\n" + . "(10 rows)\n" + . "\n" + ], + [ + "SELECT 'Türkiye' ~* 'KI' AS \"true\";", + " true \n" + . "------\n" + . " t\n" + . "(1 row)\n" + . "\n" + ], + [ + "SELECT 'bıt' ~* 'BIT' AS \"false\";", + " false \n" + . "-------\n" + . " f\n" + . "(1 row)\n" + . "\n" + ], + [ + "SELECT a, lower(coalesce(x, 'foo')) FROM collate_test10;", + " a | lower \n" + . "---+-------\n" + . " 1 | hij\n" + . " 2 | hij\n" + . "(2 rows)\n" + . "\n" + ], + [ + "SELECT a, b, greatest(b, 'CCC') FROM collate_test1 ORDER BY 3;", + " a | b | greatest \n" + . "---+-----+----------\n" + . " 1 | abc | CCC\n" + . " 2 | äbc | CCC\n" + . " 3 | bbc | CCC\n" + . " 4 | ABC | CCC\n" + . "(4 rows)\n" + . "\n" + ], + [ + "SELECT a, x, lower(greatest(x, 'foo')) FROM collate_test10;", + " a | x | lower \n" + . "---+-----+-------\n" + . " 1 | hij | hij\n" + . " 2 | HIJ | hij\n" + . "(2 rows)\n" + . "\n" + ], + [ + "SELECT a, nullif(b, 'abc') FROM collate_test1 ORDER BY 2;", + " a | nullif \n" + . "---+--------\n" + . " 4 | ABC\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . " 1 | \n" + . "(4 rows)\n" + . "\n" + ], + [ + "SELECT a, lower(nullif(x, 'foo')) FROM collate_test10;", + " a | lower \n" + . "---+-------\n" + . " 1 | hij\n" + . " 2 | hij\n" + . "(2 rows)\n" + . "\n" + ], + [ + "SELECT a, CASE b WHEN 'abc' THEN 'abcd' ELSE b END " + . "FROM collate_test1 ORDER BY 2;", + " a | b \n" + . "---+------\n" + . " 4 | ABC\n" + . " 2 | äbc\n" + . " 1 | abcd\n" + . " 3 | bbc\n" + . "(4 rows)\n" + . "\n" + ], + ["CREATE DOMAIN testdomain AS text;", "CREATE DOMAIN\n", ""], + [ + "SELECT a, b::testdomain FROM collate_test1 ORDER BY 2;", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 4 | ABC\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . "(4 rows)\n" + . "\n" + ], + [ + "SELECT a, lower(x::testdomain) FROM collate_test10;", + " a | lower \n" + . "---+-------\n" + . " 1 | hij\n" + . " 2 | hij\n" + . "(2 rows)\n" + . "\n" + ], + [ + "SELECT min(b), max(b) FROM collate_test1;", + " min | max \n" + . "-----+-----\n" + . " abc | bbc\n" + . "(1 row)\n" + . "\n", + "" + ], + [ + "SELECT array_agg(b ORDER BY b) FROM collate_test1;", + " array_agg \n" + . "-------------------\n" + . " {abc,ABC,äbc,bbc}\n" + . "(1 row)\n" + . "\n" + ], + [ + "SELECT a, b FROM collate_test1 " + . "UNION ALL " + . "SELECT a, b FROM collate_test1 ORDER BY 2;", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 1 | abc\n" + . " 4 | ABC\n" + . " 4 | ABC\n" + . " 2 | äbc\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . " 3 | bbc\n" + . "(8 rows)\n" + . "\n" + ], + # casting + [ + "SELECT a, CAST(b AS varchar) FROM collate_test1 ORDER BY 2;", + " a | b \n" + . "---+-----\n" + . " 1 | abc\n" + . " 4 | ABC\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . "(4 rows)\n" + . "\n" + ], + # propagation of collation in SQL functions (inlined and non-inlined + # cases) and plpgsql functions too + [ + "CREATE FUNCTION mylt (text, text) RETURNS boolean LANGUAGE sql " + . "AS \$\$ select \$1 < \$2 \$\$;", + "CREATE FUNCTION\n" + ], + [ + "CREATE FUNCTION mylt_noninline (text, text) " + . "RETURNS boolean LANGUAGE sql " + . "AS \$\$ select \$1 < \$2 limit 1 \$\$;", + "CREATE FUNCTION\n" + ], + [ + "CREATE FUNCTION mylt_plpgsql (text, text) " + . "RETURNS boolean LANGUAGE plpgsql " + . "AS \$\$ begin return \$1 < \$2; end \$\$;", + "CREATE FUNCTION\n" + ], + [ + "SELECT a.b AS a, b.b AS b, a.b < b.b AS lt, " + . "mylt(a.b, b.b), mylt_noninline(a.b, b.b), mylt_plpgsql(a.b, b.b) " + . "FROM collate_test1 a, collate_test1 b " + . "ORDER BY a.b, b.b;", + " a | b | lt | mylt | mylt_noninline | mylt_plpgsql \n" + . "-----+-----+----+------+----------------+--------------\n" + . " abc | abc | f | f | f | f\n" + . " abc | ABC | t | t | t | t\n" + . " abc | äbc | t | t | t | t\n" + . " abc | bbc | t | t | t | t\n" + . " ABC | abc | f | f | f | f\n" + . " ABC | ABC | f | f | f | f\n" + . " ABC | äbc | t | t | t | t\n" + . " ABC | bbc | t | t | t | t\n" + . " äbc | abc | f | f | f | f\n" + . " äbc | ABC | f | f | f | f\n" + . " äbc | äbc | f | f | f | f\n" + . " äbc | bbc | t | t | t | t\n" + . " bbc | abc | f | f | f | f\n" + . " bbc | ABC | f | f | f | f\n" + . " bbc | äbc | f | f | f | f\n" + . " bbc | bbc | f | f | f | f\n" + . "(16 rows)\n" + . "\n" + ], + # polymorphism + [ + "SELECT * FROM unnest(" + . "(SELECT array_agg(b ORDER BY b) FROM collate_test1)" + . ") ORDER BY 1;", + " unnest \n" + . "--------\n" + . " abc\n" + . " ABC\n" + . " äbc\n" + . " bbc\n" + . "(4 rows)\n" + . "\n" + ], + [ + "CREATE FUNCTION dup (anyelement) RETURNS anyelement " + . "AS 'select \$1' LANGUAGE sql;", + "CREATE FUNCTION\n" + ], + [ + "SELECT a, dup(b) FROM collate_test1 ORDER BY 2;", + " a | dup \n" + . "---+-----\n" + . " 1 | abc\n" + . " 4 | ABC\n" + . " 2 | äbc\n" + . " 3 | bbc\n" + . "(4 rows)\n" + . "\n" + ], + # indexes + [ + "CREATE INDEX collate_test1_idx1 ON collate_test1 (b);", + "CREATE INDEX\n" + ] + ) +); + +$node->stop; diff --git a/src/test/default_collation/libc/.gitignore b/src/test/default_collation/libc/.gitignore new file mode 100644 index 0000000..871e943 --- /dev/null +++ b/src/test/default_collation/libc/.gitignore @@ -0,0 +1,2 @@ +# Generated by test suite +/tmp_check/ diff --git a/src/test/default_collation/libc/Makefile b/src/test/default_collation/libc/Makefile new file mode 100644 index 0000000..98ab736 --- /dev/null +++ b/src/test/default_collation/libc/Makefile @@ -0,0 +1,11 @@ +# src/test/default_collation/libc/Makefile + +subdir = src/test/default_collation/libc +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global + +check: + $(prove_check) + +clean distclean maintainer-clean: + rm -rf tmp_check diff --git a/src/test/default_collation/libc/t/001_default_collation.pl b/src/test/default_collation/libc/t/001_default_collation.pl new file mode 100644 index 0000000..bc8a6ad --- /dev/null +++ b/src/test/default_collation/libc/t/001_default_collation.pl @@ -0,0 +1,355 @@ +use strict; +use warnings; + +use Config; +use PostgresNode; +use TestLib; +use Test::More tests => 90; + +my $tempdir = TestLib::tempdir; +my $datadir = "$tempdir/data"; + +# test initdb + +sub test_initdb +{ + my ($test_name, $options, $error_message) = @_; + my ($in_initdb, $out_initdb, $err_initdb); + + mkdir $datadir; + + my @command = (qw(initdb -A trust -N -D), $datadir, split(" ", $options)); + print "# Running: " . join(" ", @command) . "\n"; + my $result = IPC::Run::run \@command, \$in_initdb, \$out_initdb, + \$err_initdb; + + if ($error_message) + { + like($err_initdb, + qr{$error_message}, + "initdb: $test_name: check error message"); + } + else + { + ok($result, "\"@command\" exit code 0"); + is($err_initdb, "", "\"@command\" no stderr"); + like($out_initdb, + qr{The default collation provider is \"libc\"\.}, + "initdb: $test_name: check output"); + } + + File::Path::rmtree $datadir; +} + +# empty locales + +test_initdb( + "empty locales", + "", + ""); + +# --locale + +test_initdb( + "empty libc locale", + "--locale=\@libc", + ""); + +test_initdb( + "C locale without collation provider", + "--locale=C", + ""); + +test_initdb( + "POSIX locale without collation provider", + "--locale=POSIX", + ""); + +test_initdb( + "C libc locale", + "--locale=C\@libc", + ""); + +test_initdb( + "C icu locale", + "--locale=C\@icu", + "ICU is not supported in this build"); + +test_initdb( + "C locale too many modifiers", + "--locale=C\@icu\@libc", + "invalid locale name \"C\@icu\""); + +# --lc-collate + +test_initdb( + "empty libc lc_collate", + "--lc-collate=\@libc", + ""); + +test_initdb( + "C lc_collate without collation provider", + "--lc-collate=C", + ""); + +test_initdb( + "POSIX lc_collate without collation provider", + "--lc-collate=POSIX", + ""); + +test_initdb( + "C libc lc_collate", + "--lc-collate=C\@libc", + ""); + +test_initdb( + "C icu lc_collate", + "--lc-collate=C\@icu", + "ICU is not supported in this build"); + +test_initdb( + "C lc_collate too many modifiers", + "--lc-collate=C\@icu\@libc", + "invalid locale name \"C\@icu\" \\(provider \"libc\"\\)"); + +# --locale & --lc-collate + +test_initdb( + "lc_collate implicit provider takes precedence", + "--locale=\@icu --lc-collate=C", + ""); + +test_initdb( + "lc_collate explicit provider takes precedence", + "--locale=\@icu --lc-collate=\@libc", + ""); + +# test createdb and CREATE DATABASE + +sub test_createdb +{ + my ($test_name, $options, $from_template0, $error_message) = @_; + my (@command, $result, $in_command, $out_command, $err_command); + + if ($from_template0) + { + $options = $options . " --template=template0"; + } + + @command = ("createdb", split(" ", $options), "mydb"); + print "# Running: " . join(" ", @command) . "\n"; + $result = IPC::Run::run \@command, \$in_command, \$out_command, + \$err_command; + + if ($error_message) + { + like($err_command, + qr{$error_message}, + "createdb: $test_name: check error message"); + } + else + { + ok($result, "\"@command\" exit code 0"); + is($err_command, "", "\"@command\" no stderr"); + + @command = ( + "psql", + "-c", + "select datcollate from pg_database where datname = 'mydb';"); + print "# Running: " . join(" ", @command) . "\n"; + $result = IPC::Run::run \@command, \$in_command, \$out_command, + \$err_command; + + like($out_command, + qr{\@libc\n}, + "createdb: $test_name: check pg_database.datcollate"); + + @command = ("dropdb mydb"); + print "# Running: " . join(" ", @command) . "\n"; + system(@command); + } +} + +sub test_create_database +{ + my ($test_name, $options, $from_template0, $error_message) = @_; + my (@command, $result, $in_command, $out_command, $err_command); + + @command = ("psql", + "-c", + "create database mydb " + . $options + . ($from_template0 ? " template = template0" : "") + . ";"); + print "# Running: " . join(" ", @command) . "\n"; + $result = IPC::Run::run \@command, \$in_command, \$out_command, + \$err_command; + + if ($error_message) + { + like($err_command, + qr{$error_message}, + "CREATE DATABASE: $test_name: check error message"); + } + else + { + ok($result, "\"@command\" exit code 0"); + is($err_command, "", "\"@command\" no stderr"); + like($out_command, qr{CREATE DATABASE}, "\"@command\" check output"); + + @command = ( + "psql", + "-c", + "select datcollate from pg_database where datname = 'mydb';"); + print "# Running: " . join(" ", @command) . "\n"; + $result = IPC::Run::run \@command, \$in_command, \$out_command, + \$err_command; + + like($out_command, + qr{\@libc\n}, + "CREATE DATABASE: $test_name: check pg_database.datcollate"); + + @command = ("dropdb mydb"); + print "# Running: " . join(" ", @command) . "\n"; + system(@command); + } +} + +my $node = get_new_node('main'); +$node->init; +$node->start; +local $ENV{PGPORT} = $node->port; + +# test createdb + +# empty locales + +test_createdb( + "empty locales", + "", + 0, + ""); + +# --locale + +test_createdb( + "empty libc locale", + "--locale=\@libc", + 0, + ""); + +test_createdb( + "C locale without collation provider", + "--locale=C", + 1, + ""); + +test_createdb( + "POSIX locale without collation provider", + "--locale=POSIX", + 1, + ""); + +test_createdb( + "C libc locale", + "--locale=C\@libc", + 1, + ""); + +test_createdb( + "C icu locale", + "--locale=C\@icu", + 1, + "ICU is not supported in this build"); + +test_createdb( + "C locale too many modifiers", + "--locale=C\@icu\@libc", + 1, + "invalid locale name: \"C\@icu\""); + +# --lc-collate + +test_createdb( + "empty libc lc_collate", + "--lc-collate=\@libc", + 0, + ""); + +test_createdb( + "C lc_collate without collation provider", + "--lc-collate=C", + 1, + ""); +test_createdb( + "POSIX lc_collate without collation provider", + "--lc-collate=POSIX", + 1, + ""); + +test_createdb( + "C libc lc_collate", + "--lc-collate=C\@libc", + 1, + ""); + +test_createdb( + "C icu lc_collate", + "--lc-collate=C\@icu", + 1, + "ICU is not supported in this build"); + +test_createdb( + "C lc_collate too many modifiers", + "--lc-collate=C\@icu\@libc", + 1, + "invalid locale name: \"C\@icu\" \\(provider \"libc\"\\)"); + +# test CREATE DATABASE + +# empty locales + +test_create_database( + "empty locales", + "", + 0, + ""); + +# LC_COLLATE + +test_create_database( + "empty libc lc_collate", + "LC_COLLATE = '\@libc'", + 0, + ""); + +test_create_database( + "C lc_collate without collation provider", + "LC_COLLATE = 'C'", + 1, + ""); +test_create_database( + "POSIX lc_collate without collation provider", + "LC_COLLATE = 'POSIX'", + 1, + ""); + +test_create_database( + "C libc lc_collate", + "LC_COLLATE = 'C\@libc'", + 1, + ""); + +test_create_database( + "C icu lc_collate", + "LC_COLLATE = 'C\@icu'", + 1, + "ICU is not supported in this build"); + +test_create_database( + "C lc_collate too many modifiers", + "LC_COLLATE = 'C\@icu\@libc'", + 1, + "invalid locale name: \"C\@icu\" \\(provider \"libc\"\\)"); + +$node->stop; diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index e1fc998..53f53d4 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -979,11 +979,14 @@ SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_t -- schema manipulation commands CREATE ROLE regress_test_role; CREATE SCHEMA test_schema; +-- remove provider modifier and collation version +CREATE FUNCTION get_lc_collate (text) RETURNS text LANGUAGE sql + AS $$ select substring($1 from '(.*)@[^@]+$') $$; -- We need to do this this way to cope with varying names for encodings: do $$ BEGIN EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' || - quote_literal(current_setting('lc_collate')) || ');'; + quote_literal(get_lc_collate(current_setting('lc_collate'))) || ');'; END $$; CREATE COLLATION test0 FROM "C"; -- fail, duplicate name @@ -991,7 +994,7 @@ ERROR: collation "test0" already exists do $$ BEGIN EXECUTE 'CREATE COLLATION test1 (provider = icu, lc_collate = ' || - quote_literal(current_setting('lc_collate')) || + quote_literal(get_lc_collate(current_setting('lc_collate'))) || ', lc_ctype = ' || quote_literal(current_setting('lc_ctype')) || ');'; END @@ -1102,7 +1105,7 @@ drop type textrange_c; drop type textrange_en_us; -- cleanup DROP SCHEMA collate_tests CASCADE; -NOTICE: drop cascades to 18 other objects +NOTICE: drop cascades to 19 other objects DETAIL: drop cascades to table collate_test1 drop cascades to table collate_test_like drop cascades to table collate_test2 @@ -1121,6 +1124,7 @@ drop cascades to function mylt_noninline(text,text) drop cascades to function mylt_plpgsql(text,text) drop cascades to function mylt2(text,text) drop cascades to function dup(anyelement) +drop cascades to function get_lc_collate(text) RESET search_path; -- leave a collation for pg_upgrade test CREATE COLLATION coll_icu_upgrade FROM "und-x-icu"; diff --git a/src/test/regress/expected/collate.linux.utf8.out b/src/test/regress/expected/collate.linux.utf8.out index 6b73186..dec1420 100644 --- a/src/test/regress/expected/collate.linux.utf8.out +++ b/src/test/regress/expected/collate.linux.utf8.out @@ -988,11 +988,14 @@ SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_t -- schema manipulation commands CREATE ROLE regress_test_role; CREATE SCHEMA test_schema; +-- remove provider modifier and collation version +CREATE FUNCTION get_lc_collate (text) RETURNS text LANGUAGE sql + AS $$ select substring($1 from '(.*)@[^@]+$') $$; -- We need to do this this way to cope with varying names for encodings: do $$ BEGIN EXECUTE 'CREATE COLLATION test0 (locale = ' || - quote_literal(current_setting('lc_collate')) || ');'; + quote_literal(get_lc_collate(current_setting('lc_collate'))) || ');'; END $$; CREATE COLLATION test0 FROM "C"; -- fail, duplicate name @@ -1004,7 +1007,7 @@ NOTICE: collation "test0" for encoding "UTF8" already exists, skipping do $$ BEGIN EXECUTE 'CREATE COLLATION test1 (lc_collate = ' || - quote_literal(current_setting('lc_collate')) || + quote_literal(get_lc_collate(current_setting('lc_collate'))) || ', lc_ctype = ' || quote_literal(current_setting('lc_ctype')) || ');'; END @@ -1119,7 +1122,7 @@ drop type textrange_c; drop type textrange_en_us; -- cleanup DROP SCHEMA collate_tests CASCADE; -NOTICE: drop cascades to 18 other objects +NOTICE: drop cascades to 19 other objects DETAIL: drop cascades to table collate_test1 drop cascades to table collate_test_like drop cascades to table collate_test2 @@ -1138,3 +1141,4 @@ drop cascades to function mylt_noninline(text,text) drop cascades to function mylt_plpgsql(text,text) drop cascades to function mylt2(text,text) drop cascades to function dup(anyelement) +drop cascades to function get_lc_collate(text) diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index ef39445..936d684 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -339,18 +339,22 @@ SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_t CREATE ROLE regress_test_role; CREATE SCHEMA test_schema; +-- remove provider modifier and collation version +CREATE FUNCTION get_lc_collate (text) RETURNS text LANGUAGE sql + AS $$ select substring($1 from '(.*)@[^@]+$') $$; + -- We need to do this this way to cope with varying names for encodings: do $$ BEGIN EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' || - quote_literal(current_setting('lc_collate')) || ');'; + quote_literal(get_lc_collate(current_setting('lc_collate'))) || ');'; END $$; CREATE COLLATION test0 FROM "C"; -- fail, duplicate name do $$ BEGIN EXECUTE 'CREATE COLLATION test1 (provider = icu, lc_collate = ' || - quote_literal(current_setting('lc_collate')) || + quote_literal(get_lc_collate(current_setting('lc_collate'))) || ', lc_ctype = ' || quote_literal(current_setting('lc_ctype')) || ');'; END diff --git a/src/test/regress/sql/collate.linux.utf8.sql b/src/test/regress/sql/collate.linux.utf8.sql index b51162e..e03ea1b 100644 --- a/src/test/regress/sql/collate.linux.utf8.sql +++ b/src/test/regress/sql/collate.linux.utf8.sql @@ -339,11 +339,15 @@ SELECT relname, pg_get_indexdef(oid) FROM pg_class WHERE relname LIKE 'collate_t CREATE ROLE regress_test_role; CREATE SCHEMA test_schema; +-- remove provider modifier and collation version +CREATE FUNCTION get_lc_collate (text) RETURNS text LANGUAGE sql + AS $$ select substring($1 from '(.*)@[^@]+$') $$; + -- We need to do this this way to cope with varying names for encodings: do $$ BEGIN EXECUTE 'CREATE COLLATION test0 (locale = ' || - quote_literal(current_setting('lc_collate')) || ');'; + quote_literal(get_lc_collate(current_setting('lc_collate'))) || ');'; END $$; CREATE COLLATION test0 FROM "C"; -- fail, duplicate name @@ -352,7 +356,7 @@ CREATE COLLATION IF NOT EXISTS test0 (locale = 'foo'); -- ok, skipped do $$ BEGIN EXECUTE 'CREATE COLLATION test1 (lc_collate = ' || - quote_literal(current_setting('lc_collate')) || + quote_literal(get_lc_collate(current_setting('lc_collate'))) || ', lc_ctype = ' || quote_literal(current_setting('lc_ctype')) || ');'; END diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm index d8c279a..27fc2b8 100644 --- a/src/tools/msvc/Mkvcbuild.pm +++ b/src/tools/msvc/Mkvcbuild.pm @@ -49,7 +49,13 @@ my @contrib_excludes = ( 'snapshot_too_old'); # Set of variables for frontend modules -my $frontend_defines = { 'initdb' => 'FRONTEND' }; +my $frontend_defines = { + 'initdb' => 'FRONTEND', + 'psql' => 'FRONTEND', + 'pg_dump' => 'FRONTEND', + 'pg_dumpall' => 'FRONTEND', + 'pg_restore' => 'FRONTEND', + }; my @frontend_uselibpq = ('pg_ctl', 'pg_upgrade', 'pgbench', 'psql', 'initdb'); my @frontend_uselibpgport = ( 'pg_archivecleanup', 'pg_test_fsync', @@ -59,11 +65,14 @@ my @frontend_uselibpgcommon = ( 'pg_archivecleanup', 'pg_test_fsync', 'pg_test_timing', 'pg_upgrade', 'pg_waldump', 'pgbench'); +my @iculibs = ('icuin.lib', 'icuuc.lib'); my $frontend_extralibs = { 'initdb' => ['ws2_32.lib'], 'pg_restore' => ['ws2_32.lib'], 'pgbench' => ['ws2_32.lib'], + 'mchar' => [@iculibs], 'psql' => ['ws2_32.lib'] }; +my @frontend_iculibs = ('initdb', 'pg_upgrade'); my $frontend_extraincludes = { 'initdb' => ['src/timezone'], 'psql' => ['src/backend'] }; @@ -111,9 +120,9 @@ sub mkvcbuild our @pgcommonallfiles = qw( base64.c config_info.c controldata_utils.c exec.c ip.c keywords.c - md5.c pg_lzcompress.c pgfnames.c psprintf.c relpath.c rmtree.c - saslprep.c scram-common.c string.c unicode_norm.c username.c - wait_error.c); + md5.c pg_collation_fn_common.c pg_lzcompress.c pgfnames.c psprintf.c + relpath.c rmtree.c saslprep.c scram-common.c string.c unicode_norm.c + username.c wait_error.c); if ($solution->{options}->{openssl}) { @@ -145,6 +154,7 @@ sub mkvcbuild $libpgfeutils->AddDefine('FRONTEND'); $libpgfeutils->AddIncludeDir('src/interfaces/libpq'); $libpgfeutils->AddFiles('src/fe_utils', @pgfeutilsfiles); + $libpgfeutils->AddFile('src/common/pg_collation_fn_common.c'); $postgres = $solution->AddProject('postgres', 'exe', '', 'src/backend'); $postgres->AddIncludeDir('src/backend'); @@ -228,6 +238,7 @@ sub mkvcbuild 'src/interfaces/libpq'); $libpq->AddDefine('FRONTEND'); $libpq->AddDefine('UNSAFE_STAT_OK'); + $libpq->AddDefine('LIBPQ_MAKE'); $libpq->AddIncludeDir('src/port'); $libpq->AddLibrary('secur32.lib'); $libpq->AddLibrary('ws2_32.lib'); @@ -236,6 +247,7 @@ sub mkvcbuild $libpq->ReplaceFile('src/interfaces/libpq/libpqrc.c', 'src/interfaces/libpq/libpq.rc'); $libpq->AddReference($libpgport); + $libpq->AddFile('src/common/pg_collation_fn_common.c'); # The OBJS scraper doesn't know about ifdefs, so remove fe-secure-openssl.c # and sha2_openssl.c if building without OpenSSL, and remove sha2.c if @@ -420,6 +432,12 @@ sub mkvcbuild { push @contrib_excludes, 'uuid-ossp'; } + else + { + foreach my $fe (@frontend_iculibs) { + push @{$frontend_extralibs->{$fe}}, @iculibs; + } + } # AddProject() does not recognize the constructs used to populate OBJS in # the pgcrypto Makefile, so it will discover no files. -- 2.7.4