From 2ee4b77221020e81ec83cf37d36e3955bf619d80 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Thu, 13 Jan 2022 09:14:41 +0100 Subject: [PATCH] Call pg_newlocale_from_collation() also with default collation Previously, callers of pg_newlocale_from_collation() did not call it if the collation was DEFAULT_COLLATION_OID and instead proceeded with a pg_locale_t of 0. Instead, now we call it anyway and have it return 0 if the default collation was passed. It already did this, so we just have to adjust the callers. This simplifies all the call sites and also makes future enhancements easier. After discussion and testing, the previous comment in pg_locale.c about avoiding this for performance reasons may have been mistaken since it was testing a very different patch version way back when. Discussion: https://www.postgresql.org/message-id/ed3baa81-7fac-7788-cc12-41e3f7917e34@enterprisedb.com --- src/backend/access/hash/hashfunc.c | 4 +- src/backend/regex/regc_pg_locale.c | 40 ++++++------ src/backend/utils/adt/formatting.c | 96 +++++++++++++--------------- src/backend/utils/adt/like.c | 37 ++++++----- src/backend/utils/adt/like_support.c | 27 ++++---- src/backend/utils/adt/pg_locale.c | 3 - src/backend/utils/adt/varchar.c | 26 +++++--- src/backend/utils/adt/varlena.c | 34 ++++++---- 8 files changed, 135 insertions(+), 132 deletions(-) diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index 0521c69dd5..b57ed946c4 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -278,7 +278,7 @@ hashtext(PG_FUNCTION_ARGS) errmsg("could not determine which collation to use for string hashing"), errhint("Use the COLLATE clause to set the collation explicitly."))); - if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); if (!mylocale || mylocale->deterministic) @@ -334,7 +334,7 @@ hashtextextended(PG_FUNCTION_ARGS) errmsg("could not determine which collation to use for string hashing"), errhint("Use the COLLATE clause to set the collation explicitly."))); - if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); if (!mylocale || mylocale->deterministic) diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c index e0d93eab32..6e84f42cb2 100644 --- a/src/backend/regex/regc_pg_locale.c +++ b/src/backend/regex/regc_pg_locale.c @@ -231,6 +231,18 @@ static const unsigned char pg_char_properties[128] = { void pg_set_regex_collation(Oid collation) { + if (!OidIsValid(collation)) + { + /* + * This typically means that the parser could not resolve a + * conflict of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for regular expression"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + if (lc_ctype_is_c(collation)) { /* C/POSIX collations use this path regardless of database encoding */ @@ -240,28 +252,12 @@ pg_set_regex_collation(Oid collation) } else { - if (collation == DEFAULT_COLLATION_OID) - pg_regex_locale = 0; - else if (OidIsValid(collation)) - { - /* - * NB: pg_newlocale_from_collation will fail if not HAVE_LOCALE_T; - * the case of pg_regex_locale != 0 but not HAVE_LOCALE_T does not - * have to be considered below. - */ - pg_regex_locale = pg_newlocale_from_collation(collation); - } - else - { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for regular expression"), - errhint("Use the COLLATE clause to set the collation explicitly."))); - } + /* + * NB: pg_newlocale_from_collation will fail if not HAVE_LOCALE_T; + * the case of pg_regex_locale != 0 but not HAVE_LOCALE_T does not + * have to be considered below. + */ + pg_regex_locale = pg_newlocale_from_collation(collation); if (pg_regex_locale && !pg_regex_locale->deterministic) ereport(ERROR, diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index e8f996ac83..d4c2e7b069 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -1641,6 +1641,19 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) if (!buff) return NULL; + if (!OidIsValid(collid)) + { + /* + * This typically means that the parser could not resolve a + * conflict of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for %s function", + "lower()"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + /* C/POSIX collations use this path regardless of database encoding */ if (lc_ctype_is_c(collid)) { @@ -1648,24 +1661,9 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) } else { - pg_locale_t mylocale = 0; + pg_locale_t mylocale; - if (collid != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collid)) - { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for %s function", - "lower()"), - errhint("Use the COLLATE clause to set the collation explicitly."))); - } - mylocale = pg_newlocale_from_collation(collid); - } + mylocale = pg_newlocale_from_collation(collid); #ifdef USE_ICU if (mylocale && mylocale->provider == COLLPROVIDER_ICU) @@ -1765,6 +1763,19 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) if (!buff) return NULL; + if (!OidIsValid(collid)) + { + /* + * This typically means that the parser could not resolve a + * conflict of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for %s function", + "upper()"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + /* C/POSIX collations use this path regardless of database encoding */ if (lc_ctype_is_c(collid)) { @@ -1772,24 +1783,9 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) } else { - pg_locale_t mylocale = 0; + pg_locale_t mylocale; - if (collid != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collid)) - { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for %s function", - "upper()"), - errhint("Use the COLLATE clause to set the collation explicitly."))); - } - mylocale = pg_newlocale_from_collation(collid); - } + mylocale = pg_newlocale_from_collation(collid); #ifdef USE_ICU if (mylocale && mylocale->provider == COLLPROVIDER_ICU) @@ -1890,6 +1886,19 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) if (!buff) return NULL; + if (!OidIsValid(collid)) + { + /* + * This typically means that the parser could not resolve a + * conflict of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for %s function", + "initcap()"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + /* C/POSIX collations use this path regardless of database encoding */ if (lc_ctype_is_c(collid)) { @@ -1897,24 +1906,9 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) } else { - pg_locale_t mylocale = 0; + pg_locale_t mylocale; - if (collid != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collid)) - { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for %s function", - "initcap()"), - errhint("Use the COLLATE clause to set the collation explicitly."))); - } - mylocale = pg_newlocale_from_collation(collid); - } + mylocale = pg_newlocale_from_collation(collid); #ifdef USE_ICU if (mylocale && mylocale->provider == COLLPROVIDER_ICU) diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 9f241dc7c6..833ee8f814 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -150,7 +150,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c) static inline int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation) { - if (collation && !lc_ctype_is_c(collation) && collation != DEFAULT_COLLATION_OID) + if (collation && !lc_ctype_is_c(collation)) { pg_locale_t locale = pg_newlocale_from_collation(collation); @@ -178,28 +178,27 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) pg_locale_t locale = 0; bool locale_is_c = false; + if (!OidIsValid(collation)) + { + /* + * This typically means that the parser could not resolve a + * conflict of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for ILIKE"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + if (lc_ctype_is_c(collation)) locale_is_c = true; - else if (collation != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collation)) - { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for ILIKE"), - errhint("Use the COLLATE clause to set the collation explicitly."))); - } + else locale = pg_newlocale_from_collation(collation); - if (locale && !locale->deterministic) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("nondeterministic collations are not supported for ILIKE"))); - } + if (locale && !locale->deterministic) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations are not supported for ILIKE"))); /* * For efficiency reasons, in the single byte case we don't call lower() diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c index 7ca2a01e49..65a57fc3c4 100644 --- a/src/backend/utils/adt/like_support.c +++ b/src/backend/utils/adt/like_support.c @@ -1012,24 +1012,23 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("case insensitive matching not supported on type bytea"))); + if (!OidIsValid(collation)) + { + /* + * This typically means that the parser could not resolve a + * conflict of implicit collations, so report it that way. + */ + ereport(ERROR, + (errcode(ERRCODE_INDETERMINATE_COLLATION), + errmsg("could not determine which collation to use for ILIKE"), + errhint("Use the COLLATE clause to set the collation explicitly."))); + } + /* If case-insensitive, we need locale info */ if (lc_ctype_is_c(collation)) locale_is_c = true; - else if (collation != DEFAULT_COLLATION_OID) - { - if (!OidIsValid(collation)) - { - /* - * This typically means that the parser could not resolve a - * conflict of implicit collations, so report it that way. - */ - ereport(ERROR, - (errcode(ERRCODE_INDETERMINATE_COLLATION), - errmsg("could not determine which collation to use for ILIKE"), - errhint("Use the COLLATE clause to set the collation explicitly."))); - } + else locale = pg_newlocale_from_collation(collation); - } } if (typeid != BYTEAOID) diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 18f3afdc62..33cccc5c6c 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1454,8 +1454,6 @@ report_newlocale_failure(const char *localename) * * As a special optimization, the default/database collation returns 0. * Callers should then revert to the non-locale_t-enabled code path. - * In fact, they shouldn't call this function at all when they are dealing - * with the default locale. That can save quite a bit in hotspots. * Also, callers should avoid calling this before going down a C/POSIX * fastpath, because such a fastpath should work even on platforms without * locale_t support in the C library. @@ -1472,7 +1470,6 @@ pg_newlocale_from_collation(Oid collid) /* Callers must pass a valid OID */ Assert(OidIsValid(collid)); - /* Return 0 for "default" collation, just in case caller forgets */ if (collid == DEFAULT_COLLATION_OID) return (pg_locale_t) 0; diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c index 95f768c884..8b5b30ed71 100644 --- a/src/backend/utils/adt/varchar.c +++ b/src/backend/utils/adt/varchar.c @@ -743,15 +743,20 @@ bpchareq(PG_FUNCTION_ARGS) len2; bool result; Oid collid = PG_GET_COLLATION(); + bool locale_is_c = false; + pg_locale_t mylocale = 0; check_collation_set(collid); len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); - if (lc_collate_is_c(collid) || - collid == DEFAULT_COLLATION_OID || - pg_newlocale_from_collation(collid)->deterministic) + if (lc_collate_is_c(collid)) + locale_is_c = true; + else + mylocale = pg_newlocale_from_collation(collid); + + if (locale_is_c || !mylocale || mylocale->deterministic) { /* * Since we only care about equality or not-equality, we can avoid all @@ -783,15 +788,20 @@ bpcharne(PG_FUNCTION_ARGS) len2; bool result; Oid collid = PG_GET_COLLATION(); + bool locale_is_c = false; + pg_locale_t mylocale = 0; check_collation_set(collid); len1 = bcTruelen(arg1); len2 = bcTruelen(arg2); - if (lc_collate_is_c(collid) || - collid == DEFAULT_COLLATION_OID || - pg_newlocale_from_collation(collid)->deterministic) + if (lc_collate_is_c(collid)) + locale_is_c = true; + else + mylocale = pg_newlocale_from_collation(collid); + + if (locale_is_c || !mylocale || mylocale->deterministic) { /* * Since we only care about equality or not-equality, we can avoid all @@ -996,7 +1006,7 @@ hashbpchar(PG_FUNCTION_ARGS) keydata = VARDATA_ANY(key); keylen = bcTruelen(key); - if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); if (!mylocale || mylocale->deterministic) @@ -1056,7 +1066,7 @@ hashbpcharextended(PG_FUNCTION_ARGS) keydata = VARDATA_ANY(key); keylen = bcTruelen(key); - if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); if (!mylocale || mylocale->deterministic) diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index b3eb39761d..a8db8080e2 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -1200,7 +1200,7 @@ text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state) check_collation_set(collid); - if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); if (mylocale && !mylocale->deterministic) @@ -1556,10 +1556,9 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) char a2buf[TEXTBUFLEN]; char *a1p, *a2p; - pg_locale_t mylocale = 0; + pg_locale_t mylocale; - if (collid != DEFAULT_COLLATION_OID) - mylocale = pg_newlocale_from_collation(collid); + mylocale = pg_newlocale_from_collation(collid); /* * memcmp() can't tell us which of two unequal strings sorts first, @@ -1776,13 +1775,18 @@ Datum texteq(PG_FUNCTION_ARGS) { Oid collid = PG_GET_COLLATION(); + bool locale_is_c = false; + pg_locale_t mylocale = 0; bool result; check_collation_set(collid); - if (lc_collate_is_c(collid) || - collid == DEFAULT_COLLATION_OID || - pg_newlocale_from_collation(collid)->deterministic) + if (lc_collate_is_c(collid)) + locale_is_c = true; + else + mylocale = pg_newlocale_from_collation(collid); + + if (locale_is_c || !mylocale || mylocale->deterministic) { Datum arg1 = PG_GETARG_DATUM(0); Datum arg2 = PG_GETARG_DATUM(1); @@ -1830,13 +1834,18 @@ Datum textne(PG_FUNCTION_ARGS) { Oid collid = PG_GET_COLLATION(); + bool locale_is_c = false; + pg_locale_t mylocale = 0; bool result; check_collation_set(collid); - if (lc_collate_is_c(collid) || - collid == DEFAULT_COLLATION_OID || - pg_newlocale_from_collation(collid)->deterministic) + if (lc_collate_is_c(collid)) + locale_is_c = true; + else + mylocale = pg_newlocale_from_collation(collid); + + if (locale_is_c || !mylocale || mylocale->deterministic) { Datum arg1 = PG_GETARG_DATUM(0); Datum arg2 = PG_GETARG_DATUM(1); @@ -1947,7 +1956,7 @@ text_starts_with(PG_FUNCTION_ARGS) check_collation_set(collid); - if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) + if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); if (mylocale && !mylocale->deterministic) @@ -2061,8 +2070,7 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid) * we'll figure out the collation based on the locale id and cache the * result. */ - if (collid != DEFAULT_COLLATION_OID) - locale = pg_newlocale_from_collation(collid); + locale = pg_newlocale_from_collation(collid); /* * There is a further exception on Windows. When the database -- 2.34.1