From 711ec5f45e83abab93d06cc1d65647d04c6e97d0 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 11 Mar 2019 21:20:03 +0100 Subject: [PATCH 2/2] Add support for collation attributes on older ICU versions Starting in ICU 54, collation customization attributes can be specified in the locale string, for example "@colStrength=primary;colCaseLevel=yes". Add support for this for older ICU versions as well, by adding some minimal parsing of the attributes in the locale string and calling ucol_setAttribute() on them. This is essentially what never ICU versions do internally in ucol_open(). This was we can offer this functionality in a consistent way in all ICU versions supported by PostgreSQL. --- src/backend/utils/adt/pg_locale.c | 104 ++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 50b8b31645..c6fedab8f4 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -58,6 +58,7 @@ #include "catalog/pg_control.h" #include "mb/pg_wchar.h" #include "utils/builtins.h" +#include "utils/formatting.h" #include "utils/hsearch.h" #include "utils/lsyscache.h" #include "utils/memutils.h" @@ -132,6 +133,9 @@ static HTAB *collation_cache = NULL; static char *IsoLocaleName(const char *); /* MSVC specific */ #endif +#ifdef USE_ICU +static void icu_set_collation_attributes(UCollator *collator, const char *loc); +#endif /* * pg_perm_setlocale @@ -1380,6 +1384,9 @@ pg_newlocale_from_collation(Oid collid) (errmsg("could not open collator for locale \"%s\": %s", collcollate, u_errorName(status)))); + if (U_ICU_VERSION_MAJOR_NUM < 54) + icu_set_collation_attributes(collator, collcollate); + /* We will leak this string if we get an error below :-( */ result.info.icu.locale = MemoryContextStrdup(TopMemoryContext, collcollate); @@ -1588,6 +1595,103 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar) return len_result; } +/* + * Parse collation attributes and apply them to the open collator. This takes + * a string like "und@colStrength=primary;colCaseLevel=yes" and parses and + * applyes the key-value arguments. + * + * Starting with ICU version 54, the attributes are processed automatically by + * ucol_open(), so this is only necessary for emulating this behavior on older + * versions. + */ +pg_attribute_unused() +static void +icu_set_collation_attributes(UCollator *collator, const char *loc) +{ + char *str = asc_tolower(loc, strlen(loc)); + + str = strchr(str, '@'); + if (!str) + return; + str++; + + for (char *token = strtok(str, ";"); token; token = strtok(NULL, ";")) + { + char *e = strchr(token, '='); + + if (e) + { + char *name; + char *value; + UColAttribute uattr = -1; + UColAttributeValue uvalue = -1; + UErrorCode status; + + status = U_ZERO_ERROR; + + *e = '\0'; + name = token; + value = e + 1; + + /* + * See attribute name and value lists in ICU i18n/coll.cpp + */ + if (strcmp(name, "colstrength") == 0) + uattr = UCOL_STRENGTH; + else if (strcmp(name, "colbackwards") == 0) + uattr = UCOL_FRENCH_COLLATION; + else if (strcmp(name, "colcaselevel") == 0) + uattr = UCOL_CASE_LEVEL; + else if (strcmp(name, "colcasefirst") == 0) + uattr = UCOL_CASE_FIRST; + else if (strcmp(name, "colalternate") == 0) + uattr = UCOL_ALTERNATE_HANDLING; + else if (strcmp(name, "colnormalization") == 0) + uattr = UCOL_NORMALIZATION_MODE; + else if (strcmp(name, "colnumeric") == 0) + uattr = UCOL_NUMERIC_COLLATION; + /* ignore if unknown */ + + if (strcmp(value, "primary") == 0) + uvalue = UCOL_PRIMARY; + else if (strcmp(value, "secondary") == 0) + uvalue = UCOL_SECONDARY; + else if (strcmp(value, "tertiary") == 0) + uvalue = UCOL_TERTIARY; + else if (strcmp(value, "quaternary") == 0) + uvalue = UCOL_QUATERNARY; + else if (strcmp(value, "identical") == 0) + uvalue = UCOL_IDENTICAL; + else if (strcmp(value, "no") == 0) + uvalue = UCOL_OFF; + else if (strcmp(value, "yes") == 0) + uvalue = UCOL_ON; + else if (strcmp(value, "shifted") == 0) + uvalue = UCOL_SHIFTED; + else if (strcmp(value, "non-ignorable") == 0) + uvalue = UCOL_NON_IGNORABLE; + else if (strcmp(value, "lower") == 0) + uvalue = UCOL_LOWER_FIRST; + else if (strcmp(value, "upper") == 0) + uvalue = UCOL_UPPER_FIRST; + else + status = U_ILLEGAL_ARGUMENT_ERROR; + + if (uattr != -1 && uvalue != -1) + ucol_setAttribute(collator, uattr, uvalue, &status); + + /* + * Pretend the error came from ucol_open(), for consistent error + * message across ICU versions. + */ + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("could not open collator for locale \"%s\": %s", + loc, u_errorName(status)))); + } + } +} + #endif /* USE_ICU */ /* -- 2.21.0