From f3884b483884a4e39b577dc01b72bab5176964bb Mon Sep 17 00:00:00 2001 From: Julien Rouhaud Date: Fri, 18 Mar 2022 16:20:01 +0800 Subject: [PATCH v2] Fix global icu collations for ICU < 54 createdb() didn't check for collation attributes validity, which has to be done explicitly on ICU < 54. It also forgot to close the ICU collator opened during the check which leaks some memory. To fix both, add a new check_icu_locale() that does all the appropriate verification and close the ICU collator. initdb also had some partial check for ICU < 54. To have consistent error reporting across major ICU versions, and get rid of the need to include ucol.h, remove the partial check there. The backend will report an error if needed during the post-boostrap iniitialization phase. --- src/backend/commands/dbcommands.c | 9 +-------- src/backend/utils/adt/pg_locale.c | 21 +++++++++++++++++++++ src/bin/initdb/initdb.c | 22 +++------------------- src/bin/initdb/t/001_initdb.pl | 2 +- src/include/utils/pg_locale.h | 1 + src/test/icu/t/010_database.pl | 4 ++-- 6 files changed, 29 insertions(+), 30 deletions(-) diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 962e11dd8f..15b6eb8e0e 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -460,14 +460,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) if (dblocprovider == COLLPROVIDER_ICU) { #ifdef USE_ICU - UErrorCode status; - - status = U_ZERO_ERROR; - ucol_open(dbiculocale, &status); - if (U_FAILURE(status)) - ereport(ERROR, - (errmsg("could not open collator for locale \"%s\": %s", - dbiculocale, u_errorName(status)))); + check_icu_locale(dbiculocale); #else ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 4019255f8e..af69f2729e 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1884,6 +1884,27 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar) return len_result; } +/* + * Check if the given locale ID is valid, and ereport(ERROR) if it isn't. + */ +void +check_icu_locale(const char *icu_locale) +{ + UCollator *collator; + UErrorCode status; + + status = U_ZERO_ERROR; + collator = ucol_open(icu_locale, &status); + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not open collator for locale \"%s\": %s", + icu_locale, u_errorName(status)))); + + if (U_ICU_VERSION_MAJOR_NUM < 54) + icu_set_collation_attributes(collator, icu_locale); + ucol_close(collator); +} + /* * Parse collation attributes and apply them to the open collator. This takes * a string like "und@colStrength=primary;colCaseLevel=yes" and parses and diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index cbcd55288f..5e36943ef3 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -55,10 +55,6 @@ #include #include -#ifdef USE_ICU -#include -#endif - #ifdef HAVE_SHM_OPEN #include "sys/mman.h" #endif @@ -2205,22 +2201,10 @@ setlocales(void) } /* - * Check ICU locale ID + * In supported builds, the ICU locale ID will be checked by the + * backend when performing the post-boostrap initialization. */ -#ifdef USE_ICU - { - UErrorCode status; - - status = U_ZERO_ERROR; - ucol_open(icu_locale, &status); - if (U_FAILURE(status)) - { - pg_log_error("could not open collator for locale \"%s\": %s", - icu_locale, u_errorName(status)); - exit(1); - } - } -#else +#ifndef USE_ICU pg_log_error("ICU is not supported in this build"); fprintf(stderr, _("You need to rebuild PostgreSQL using %s.\n"), "--with-icu"); exit(1); diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl index c636bf3ab2..a3397777cf 100644 --- a/src/bin/initdb/t/001_initdb.pl +++ b/src/bin/initdb/t/001_initdb.pl @@ -105,7 +105,7 @@ if ($ENV{with_icu} eq 'yes') 'option --icu-locale'); command_fails_like(['initdb', '--no-sync', '--locale-provider=icu', '--icu-locale=@colNumeric=lower', "$tempdir/dataX"], - qr/initdb: error: could not open collator for locale/, + qr/FATAL: could not open collator for locale/, 'fails for invalid ICU locale'); } else diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 9b158f24a0..297d14d918 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -115,6 +115,7 @@ extern char *get_collation_actual_version(char collprovider, const char *collcol #ifdef USE_ICU extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes); extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar); +extern void check_icu_locale(const char *icu_locale); #endif /* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */ diff --git a/src/test/icu/t/010_database.pl b/src/test/icu/t/010_database.pl index d50941b53d..07a1084b09 100644 --- a/src/test/icu/t/010_database.pl +++ b/src/test/icu/t/010_database.pl @@ -16,11 +16,11 @@ $node1->init; $node1->start; $node1->safe_psql('postgres', - q{CREATE DATABASE dbicu LOCALE_PROVIDER icu LOCALE 'C' ICU_LOCALE 'en-u-kf-upper' ENCODING 'UTF8' TEMPLATE template0}); + q{CREATE DATABASE dbicu LOCALE_PROVIDER icu LOCALE 'C' ICU_LOCALE 'en@colCaseFirst=upper' ENCODING 'UTF8' TEMPLATE template0}); $node1->safe_psql('dbicu', q{ -CREATE COLLATION upperfirst (provider = icu, locale = 'en-u-kf-upper'); +CREATE COLLATION upperfirst (provider = icu, locale = 'en@colCaseFirst=upper'); CREATE TABLE icu (def text, en text COLLATE "en-x-icu", upfirst text COLLATE upperfirst); INSERT INTO icu VALUES ('a', 'a', 'a'), ('b', 'b', 'b'), ('A', 'A', 'A'), ('B', 'B', 'B'); }); -- 2.33.1