From cc0a6ebd31a8d5591ae0edc0d9d7c0e3206668f8 Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Thu, 26 Sep 2024 12:12:51 -0700
Subject: [PATCH v9 07/11] Control ctype behavior internally with a method
 table.

Previously, pattern matching and case mapping behavior branched based
on the provider.

Refactor to use a method table, which is less error-prone and easier
to hook.
---
 src/backend/regex/regc_pg_locale.c        | 388 ++++--------------
 src/backend/utils/adt/formatting.c        | 445 +++------------------
 src/backend/utils/adt/like.c              |  22 +-
 src/backend/utils/adt/like_support.c      |   7 +-
 src/backend/utils/adt/pg_locale.c         |  71 ++++
 src/backend/utils/adt/pg_locale_builtin.c | 129 ++++++
 src/backend/utils/adt/pg_locale_icu.c     | 188 ++++++++-
 src/backend/utils/adt/pg_locale_libc.c    | 465 ++++++++++++++++++++++
 src/include/utils/pg_locale.h             |  71 +++-
 src/tools/pgindent/typedefs.list          |   1 -
 10 files changed, 1062 insertions(+), 725 deletions(-)

diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c
index b75784b6ce5..e898634fdf6 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -63,33 +63,18 @@
  * NB: the coding here assumes pg_wchar is an unsigned type.
  */
 
-typedef enum
-{
-	PG_REGEX_STRATEGY_C,		/* C locale (encoding independent) */
-	PG_REGEX_STRATEGY_BUILTIN,	/* built-in Unicode semantics */
-	PG_REGEX_STRATEGY_LIBC_WIDE,	/* Use locale_t <wctype.h> functions */
-	PG_REGEX_STRATEGY_LIBC_1BYTE,	/* Use locale_t <ctype.h> functions */
-	PG_REGEX_STRATEGY_ICU,		/* Use ICU uchar.h functions */
-} PG_Locale_Strategy;
-
-static PG_Locale_Strategy pg_regex_strategy;
 static pg_locale_t pg_regex_locale;
 static Oid	pg_regex_collation;
 
+static struct pg_locale_struct dummy_c_locale = {
+	.collate_is_c = true,
+	.ctype_is_c = true,
+};
+
 /*
  * Hard-wired character properties for C locale
  */
-#define PG_ISDIGIT	0x01
-#define PG_ISALPHA	0x02
-#define PG_ISALNUM	(PG_ISDIGIT | PG_ISALPHA)
-#define PG_ISUPPER	0x04
-#define PG_ISLOWER	0x08
-#define PG_ISGRAPH	0x10
-#define PG_ISPRINT	0x20
-#define PG_ISPUNCT	0x40
-#define PG_ISSPACE	0x80
-
-static const unsigned char pg_char_properties[128] = {
+static const unsigned char char_properties_tbl[128] = {
 	 /* NUL */ 0,
 	 /* ^A */ 0,
 	 /* ^B */ 0,
@@ -232,7 +217,6 @@ void
 pg_set_regex_collation(Oid collation)
 {
 	pg_locale_t locale = 0;
-	PG_Locale_Strategy strategy;
 
 	if (!OidIsValid(collation))
 	{
@@ -253,8 +237,8 @@ pg_set_regex_collation(Oid collation)
 		 * catalog access is available, so we can't call
 		 * pg_newlocale_from_collation().
 		 */
-		strategy = PG_REGEX_STRATEGY_C;
 		collation = C_COLLATION_OID;
+		locale = &dummy_c_locale;
 	}
 	else
 	{
@@ -271,32 +255,11 @@ pg_set_regex_collation(Oid collation)
 			 * C/POSIX collations use this path regardless of database
 			 * encoding
 			 */
-			strategy = PG_REGEX_STRATEGY_C;
-			locale = 0;
+			locale = &dummy_c_locale;
 			collation = C_COLLATION_OID;
 		}
-		else if (locale->provider == COLLPROVIDER_BUILTIN)
-		{
-			Assert(GetDatabaseEncoding() == PG_UTF8);
-			strategy = PG_REGEX_STRATEGY_BUILTIN;
-		}
-#ifdef USE_ICU
-		else if (locale->provider == COLLPROVIDER_ICU)
-		{
-			strategy = PG_REGEX_STRATEGY_ICU;
-		}
-#endif
-		else
-		{
-			Assert(locale->provider == COLLPROVIDER_LIBC);
-			if (GetDatabaseEncoding() == PG_UTF8)
-				strategy = PG_REGEX_STRATEGY_LIBC_WIDE;
-			else
-				strategy = PG_REGEX_STRATEGY_LIBC_1BYTE;
-		}
 	}
 
-	pg_regex_strategy = strategy;
 	pg_regex_locale = locale;
 	pg_regex_collation = collation;
 }
@@ -304,82 +267,31 @@ pg_set_regex_collation(Oid collation)
 static int
 pg_wc_isdigit(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISDIGIT));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isdigit(c, true);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isdigit_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isdigit(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(char_properties_tbl[c] & PG_ISDIGIT));
+	else
+		return char_properties(c, PG_ISDIGIT, pg_regex_locale) != 0;
 }
 
 static int
 pg_wc_isalpha(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISALPHA));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isalpha(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isalpha_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isalpha(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(char_properties_tbl[c] & PG_ISALPHA));
+	else
+		return char_properties(c, PG_ISALPHA, pg_regex_locale) != 0;
 }
 
 static int
 pg_wc_isalnum(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISALNUM));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isalnum(c, true);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isalnum_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isalnum(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(char_properties_tbl[c] & PG_ISALNUM));
+	else
+		return char_properties(c, PG_ISDIGIT | PG_ISALPHA, pg_regex_locale) != 0;
 }
 
 static int
@@ -394,219 +306,87 @@ pg_wc_isword(pg_wchar c)
 static int
 pg_wc_isupper(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISUPPER));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isupper(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isupper_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isupper(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(char_properties_tbl[c] & PG_ISUPPER));
+	else
+		return char_properties(c, PG_ISUPPER, pg_regex_locale) != 0;
 }
 
 static int
 pg_wc_islower(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISLOWER));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_islower(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					islower_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_islower(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(char_properties_tbl[c] & PG_ISLOWER));
+	else
+		return char_properties(c, PG_ISLOWER, pg_regex_locale) != 0;
 }
 
 static int
 pg_wc_isgraph(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISGRAPH));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isgraph(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isgraph_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isgraph(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(char_properties_tbl[c] & PG_ISGRAPH));
+	else
+		return char_properties(c, PG_ISGRAPH, pg_regex_locale) != 0;
 }
 
 static int
 pg_wc_isprint(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISPRINT));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isprint(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isprint_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isprint(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(char_properties_tbl[c] & PG_ISPRINT));
+	else
+		return char_properties(c, PG_ISPRINT, pg_regex_locale) != 0;
 }
 
 static int
 pg_wc_ispunct(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISPUNCT));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_ispunct(c, true);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					ispunct_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_ispunct(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(char_properties_tbl[c] & PG_ISPUNCT));
+	else
+		return char_properties(c, PG_ISPUNCT, pg_regex_locale) != 0;
 }
 
 static int
 pg_wc_isspace(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISSPACE));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isspace(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isspace_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isspace(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(char_properties_tbl[c] & PG_ISSPACE));
+	else
+		return char_properties(c, PG_ISSPACE, pg_regex_locale) != 0;
 }
 
 static pg_wchar
 pg_wc_toupper(pg_wchar c)
 {
-	switch (pg_regex_strategy)
+	if (pg_regex_locale->ctype_is_c)
 	{
-		case PG_REGEX_STRATEGY_C:
-			if (c <= (pg_wchar) 127)
-				return pg_ascii_toupper((unsigned char) c);
-			return c;
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return unicode_uppercase_simple(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return towupper_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			if (c <= (pg_wchar) UCHAR_MAX)
-				return toupper_l((unsigned char) c, pg_regex_locale->info.lt);
-			return c;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_toupper(c);
-#endif
-			break;
+		if (c <= (pg_wchar) 127)
+			return pg_ascii_toupper((unsigned char) c);
+		return c;
 	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	else
+		return pg_regex_locale->ctype->wc_toupper(c, pg_regex_locale);
 }
 
 static pg_wchar
 pg_wc_tolower(pg_wchar c)
 {
-	switch (pg_regex_strategy)
+	if (pg_regex_locale->ctype_is_c)
 	{
-		case PG_REGEX_STRATEGY_C:
-			if (c <= (pg_wchar) 127)
-				return pg_ascii_tolower((unsigned char) c);
-			return c;
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return unicode_lowercase_simple(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return towlower_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			if (c <= (pg_wchar) UCHAR_MAX)
-				return tolower_l((unsigned char) c, pg_regex_locale->info.lt);
-			return c;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_tolower(c);
-#endif
-			break;
+		if (c <= (pg_wchar) 127)
+			return pg_ascii_tolower((unsigned char) c);
+		return c;
 	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	else
+		return pg_regex_locale->ctype->wc_tolower(c, pg_regex_locale);
 }
 
 
@@ -732,37 +512,25 @@ pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
 	 * would always be true for production values of MAX_SIMPLE_CHR, but it's
 	 * useful to allow it to be small for testing purposes.)
 	 */
-	switch (pg_regex_strategy)
+	if (pg_regex_locale->ctype_is_c)
 	{
-		case PG_REGEX_STRATEGY_C:
 #if MAX_SIMPLE_CHR >= 127
-			max_chr = (pg_wchar) 127;
-			pcc->cv.cclasscode = -1;
+		max_chr = (pg_wchar) 127;
+		pcc->cv.cclasscode = -1;
 #else
-			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
+		max_chr = (pg_wchar) MAX_SIMPLE_CHR;
 #endif
-			break;
-		case PG_REGEX_STRATEGY_BUILTIN:
-			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
-			break;
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
-			break;
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-#if MAX_SIMPLE_CHR >= UCHAR_MAX
-			max_chr = (pg_wchar) UCHAR_MAX;
+	}
+	else
+	{
+		if (pg_regex_locale->ctype->max_chr != 0 &&
+			pg_regex_locale->ctype->max_chr <= MAX_SIMPLE_CHR)
+		{
+			max_chr = pg_regex_locale->ctype->max_chr;
 			pcc->cv.cclasscode = -1;
-#else
-			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
-#endif
-			break;
-		case PG_REGEX_STRATEGY_ICU:
+		}
+		else
 			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
-			break;
-		default:
-			Assert(false);
-			max_chr = 0;		/* can't get here, but keep compiler quiet */
-			break;
 	}
 
 	/*
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 85a7dd45619..6a0571f93e6 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1570,52 +1570,6 @@ str_numth(char *dest, char *num, int type)
  *			upper/lower/initcap functions
  *****************************************************************************/
 
-#ifdef USE_ICU
-
-typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
-									 const UChar *src, int32_t srcLength,
-									 const char *locale,
-									 UErrorCode *pErrorCode);
-
-static int32_t
-icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
-				 UChar **buff_dest, UChar *buff_source, int32_t len_source)
-{
-	UErrorCode	status;
-	int32_t		len_dest;
-
-	len_dest = len_source;		/* try first with same length */
-	*buff_dest = palloc(len_dest * sizeof(**buff_dest));
-	status = U_ZERO_ERROR;
-	len_dest = func(*buff_dest, len_dest, buff_source, len_source,
-					mylocale->info.icu.locale, &status);
-	if (status == U_BUFFER_OVERFLOW_ERROR)
-	{
-		/* try again with adjusted length */
-		pfree(*buff_dest);
-		*buff_dest = palloc(len_dest * sizeof(**buff_dest));
-		status = U_ZERO_ERROR;
-		len_dest = func(*buff_dest, len_dest, buff_source, len_source,
-						mylocale->info.icu.locale, &status);
-	}
-	if (U_FAILURE(status))
-		ereport(ERROR,
-				(errmsg("case conversion failed: %s", u_errorName(status))));
-	return len_dest;
-}
-
-static int32_t
-u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
-						const UChar *src, int32_t srcLength,
-						const char *locale,
-						UErrorCode *pErrorCode)
-{
-	return u_strToTitle(dest, destCapacity, src, srcLength,
-						NULL, locale, pErrorCode);
-}
-
-#endif							/* USE_ICU */
-
 /*
  * If the system provides the needed functions for wide-character manipulation
  * (which are all standardized by C99), then we implement upper/lower/initcap
@@ -1663,101 +1617,28 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 	}
 	else
 	{
-#ifdef USE_ICU
-		if (mylocale->provider == COLLPROVIDER_ICU)
+		const char *src = buff;
+		size_t		srclen = nbytes;
+		size_t		dstsize;
+		char	   *dst;
+		size_t		needed;
+
+		/* first try buffer of equal size plus terminating NUL */
+		dstsize = srclen + 1;
+		dst = palloc(dstsize);
+
+		needed = pg_strlower(dst, dstsize, src, srclen, mylocale);
+		if (needed + 1 > dstsize)
 		{
-			int32_t		len_uchar;
-			int32_t		len_conv;
-			UChar	   *buff_uchar;
-			UChar	   *buff_conv;
-
-			len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
-			len_conv = icu_convert_case(u_strToLower, mylocale,
-										&buff_conv, buff_uchar, len_uchar);
-			icu_from_uchar(&result, buff_conv, len_conv);
-			pfree(buff_uchar);
-			pfree(buff_conv);
+			/* grow buffer if needed and retry */
+			dstsize = needed + 1;
+			dst = repalloc(dst, dstsize);
+			needed = pg_strlower(dst, dstsize, src, srclen, mylocale);
+			Assert(needed + 1 <= dstsize);
 		}
-		else
-#endif
-		if (mylocale->provider == COLLPROVIDER_BUILTIN)
-		{
-			const char *src = buff;
-			size_t		srclen = nbytes;
-			size_t		dstsize;
-			char	   *dst;
-			size_t		needed;
-
-			Assert(GetDatabaseEncoding() == PG_UTF8);
-
-			/* first try buffer of equal size plus terminating NUL */
-			dstsize = srclen + 1;
-			dst = palloc(dstsize);
-
-			needed = unicode_strlower(dst, dstsize, src, srclen);
-			if (needed + 1 > dstsize)
-			{
-				/* grow buffer if needed and retry */
-				dstsize = needed + 1;
-				dst = repalloc(dst, dstsize);
-				needed = unicode_strlower(dst, dstsize, src, srclen);
-				Assert(needed + 1 == dstsize);
-			}
-
-			Assert(dst[needed] == '\0');
-			result = dst;
-		}
-		else
-		{
-			Assert(mylocale->provider == COLLPROVIDER_LIBC);
-
-			if (pg_database_encoding_max_length() > 1)
-			{
-				wchar_t    *workspace;
-				size_t		curr_char;
-				size_t		result_size;
-
-				/* Overflow paranoia */
-				if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
-					ereport(ERROR,
-							(errcode(ERRCODE_OUT_OF_MEMORY),
-							 errmsg("out of memory")));
-
-				/* Output workspace cannot have more codes than input bytes */
-				workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
-
-				char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
-
-				for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
-					workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
 
-				/*
-				 * Make result large enough; case change might change number
-				 * of bytes
-				 */
-				result_size = curr_char * pg_database_encoding_max_length() + 1;
-				result = palloc(result_size);
-
-				wchar2char(result, workspace, result_size, mylocale);
-				pfree(workspace);
-			}
-			else
-			{
-				char	   *p;
-
-				result = pnstrdup(buff, nbytes);
-
-				/*
-				 * Note: we assume that tolower_l() will not be so broken as
-				 * to need an isupper_l() guard test.  When using the default
-				 * collation, we apply the traditional Postgres behavior that
-				 * forces ASCII-style treatment of I/i, but in non-default
-				 * collations you get exactly what the collation says.
-				 */
-				for (p = result; *p; p++)
-					*p = tolower_l((unsigned char) *p, mylocale->info.lt);
-			}
-		}
+		Assert(dst[needed] == '\0');
+		result = dst;
 	}
 
 	return result;
@@ -1800,147 +1681,33 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 	}
 	else
 	{
-#ifdef USE_ICU
-		if (mylocale->provider == COLLPROVIDER_ICU)
-		{
-			int32_t		len_uchar,
-						len_conv;
-			UChar	   *buff_uchar;
-			UChar	   *buff_conv;
-
-			len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
-			len_conv = icu_convert_case(u_strToUpper, mylocale,
-										&buff_conv, buff_uchar, len_uchar);
-			icu_from_uchar(&result, buff_conv, len_conv);
-			pfree(buff_uchar);
-			pfree(buff_conv);
-		}
-		else
-#endif
-		if (mylocale->provider == COLLPROVIDER_BUILTIN)
+		const char *src = buff;
+		size_t		srclen = nbytes;
+		size_t		dstsize;
+		char	   *dst;
+		size_t		needed;
+
+		/* first try buffer of equal size plus terminating NUL */
+		dstsize = srclen + 1;
+		dst = palloc(dstsize);
+
+		needed = pg_strupper(dst, dstsize, src, srclen, mylocale);
+		if (needed + 1 > dstsize)
 		{
-			const char *src = buff;
-			size_t		srclen = nbytes;
-			size_t		dstsize;
-			char	   *dst;
-			size_t		needed;
-
-			Assert(GetDatabaseEncoding() == PG_UTF8);
-
-			/* first try buffer of equal size plus terminating NUL */
-			dstsize = srclen + 1;
-			dst = palloc(dstsize);
-
-			needed = unicode_strupper(dst, dstsize, src, srclen);
-			if (needed + 1 > dstsize)
-			{
-				/* grow buffer if needed and retry */
-				dstsize = needed + 1;
-				dst = repalloc(dst, dstsize);
-				needed = unicode_strupper(dst, dstsize, src, srclen);
-				Assert(needed + 1 == dstsize);
-			}
-
-			Assert(dst[needed] == '\0');
-			result = dst;
+			/* grow buffer if needed and retry */
+			dstsize = needed + 1;
+			dst = repalloc(dst, dstsize);
+			needed = pg_strupper(dst, dstsize, src, srclen, mylocale);
+			Assert(needed + 1 <= dstsize);
 		}
-		else
-		{
-			Assert(mylocale->provider == COLLPROVIDER_LIBC);
-
-			if (pg_database_encoding_max_length() > 1)
-			{
-				wchar_t    *workspace;
-				size_t		curr_char;
-				size_t		result_size;
-
-				/* Overflow paranoia */
-				if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
-					ereport(ERROR,
-							(errcode(ERRCODE_OUT_OF_MEMORY),
-							 errmsg("out of memory")));
-
-				/* Output workspace cannot have more codes than input bytes */
-				workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
-
-				char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
-
-				for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
-					workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
 
-				/*
-				 * Make result large enough; case change might change number
-				 * of bytes
-				 */
-				result_size = curr_char * pg_database_encoding_max_length() + 1;
-				result = palloc(result_size);
-
-				wchar2char(result, workspace, result_size, mylocale);
-				pfree(workspace);
-			}
-			else
-			{
-				char	   *p;
-
-				result = pnstrdup(buff, nbytes);
-
-				/*
-				 * Note: we assume that toupper_l() will not be so broken as
-				 * to need an islower_l() guard test.  When using the default
-				 * collation, we apply the traditional Postgres behavior that
-				 * forces ASCII-style treatment of I/i, but in non-default
-				 * collations you get exactly what the collation says.
-				 */
-				for (p = result; *p; p++)
-					*p = toupper_l((unsigned char) *p, mylocale->info.lt);
-			}
-		}
+		Assert(dst[needed] == '\0');
+		result = dst;
 	}
 
 	return result;
 }
 
-struct WordBoundaryState
-{
-	const char *str;
-	size_t		len;
-	size_t		offset;
-	bool		init;
-	bool		prev_alnum;
-};
-
-/*
- * Simple word boundary iterator that draws boundaries each time the result of
- * pg_u_isalnum() changes.
- */
-static size_t
-initcap_wbnext(void *state)
-{
-	struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
-
-	while (wbstate->offset < wbstate->len &&
-		   wbstate->str[wbstate->offset] != '\0')
-	{
-		pg_wchar	u = utf8_to_unicode((unsigned char *) wbstate->str +
-										wbstate->offset);
-		bool		curr_alnum = pg_u_isalnum(u, true);
-
-		if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
-		{
-			size_t		prev_offset = wbstate->offset;
-
-			wbstate->init = true;
-			wbstate->offset += unicode_utf8len(u);
-			wbstate->prev_alnum = curr_alnum;
-			return prev_offset;
-		}
-
-		wbstate->offset += unicode_utf8len(u);
-	}
-
-	return wbstate->len;
-}
-
 /*
  * collation-aware, wide-character-aware initcap function
  *
@@ -1951,7 +1718,6 @@ char *
 str_initcap(const char *buff, size_t nbytes, Oid collid)
 {
 	char	   *result;
-	int			wasalnum = false;
 	pg_locale_t mylocale;
 
 	if (!buff)
@@ -1979,125 +1745,28 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 	}
 	else
 	{
-#ifdef USE_ICU
-		if (mylocale->provider == COLLPROVIDER_ICU)
+		const char *src = buff;
+		size_t		srclen = nbytes;
+		size_t		dstsize;
+		char	   *dst;
+		size_t		needed;
+
+		/* first try buffer of equal size plus terminating NUL */
+		dstsize = srclen + 1;
+		dst = palloc(dstsize);
+
+		needed = pg_strtitle(dst, dstsize, src, srclen, mylocale);
+		if (needed + 1 > dstsize)
 		{
-			int32_t		len_uchar,
-						len_conv;
-			UChar	   *buff_uchar;
-			UChar	   *buff_conv;
-
-			len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
-			len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
-										&buff_conv, buff_uchar, len_uchar);
-			icu_from_uchar(&result, buff_conv, len_conv);
-			pfree(buff_uchar);
-			pfree(buff_conv);
+			/* grow buffer if needed and retry */
+			dstsize = needed + 1;
+			dst = repalloc(dst, dstsize);
+			needed = pg_strtitle(dst, dstsize, src, srclen, mylocale);
+			Assert(needed + 1 <= dstsize);
 		}
-		else
-#endif
-		if (mylocale->provider == COLLPROVIDER_BUILTIN)
-		{
-			const char *src = buff;
-			size_t		srclen = nbytes;
-			size_t		dstsize;
-			char	   *dst;
-			size_t		needed;
-			struct WordBoundaryState wbstate = {
-				.str = src,
-				.len = srclen,
-				.offset = 0,
-				.init = false,
-				.prev_alnum = false,
-			};
-
-			Assert(GetDatabaseEncoding() == PG_UTF8);
-
-			/* first try buffer of equal size plus terminating NUL */
-			dstsize = srclen + 1;
-			dst = palloc(dstsize);
-
-			needed = unicode_strtitle(dst, dstsize, src, srclen,
-									  initcap_wbnext, &wbstate);
-			if (needed + 1 > dstsize)
-			{
-				/* reset iterator */
-				wbstate.offset = 0;
-				wbstate.init = false;
-
-				/* grow buffer if needed and retry */
-				dstsize = needed + 1;
-				dst = repalloc(dst, dstsize);
-				needed = unicode_strtitle(dst, dstsize, src, srclen,
-										  initcap_wbnext, &wbstate);
-				Assert(needed + 1 == dstsize);
-			}
-
-			result = dst;
-		}
-		else
-		{
-			Assert(mylocale->provider == COLLPROVIDER_LIBC);
-
-			if (pg_database_encoding_max_length() > 1)
-			{
-				wchar_t    *workspace;
-				size_t		curr_char;
-				size_t		result_size;
-
-				/* Overflow paranoia */
-				if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
-					ereport(ERROR,
-							(errcode(ERRCODE_OUT_OF_MEMORY),
-							 errmsg("out of memory")));
-
-				/* Output workspace cannot have more codes than input bytes */
-				workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
-
-				char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
-
-				for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
-				{
-					if (wasalnum)
-						workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
-					else
-						workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
-					wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
-				}
-
-				/*
-				 * Make result large enough; case change might change number
-				 * of bytes
-				 */
-				result_size = curr_char * pg_database_encoding_max_length() + 1;
-				result = palloc(result_size);
-
-				wchar2char(result, workspace, result_size, mylocale);
-				pfree(workspace);
-			}
-			else
-			{
-				char	   *p;
 
-				result = pnstrdup(buff, nbytes);
-
-				/*
-				 * Note: we assume that toupper_l()/tolower_l() will not be so
-				 * broken as to need guard tests.  When using the default
-				 * collation, we apply the traditional Postgres behavior that
-				 * forces ASCII-style treatment of I/i, but in non-default
-				 * collations you get exactly what the collation says.
-				 */
-				for (p = result; *p; p++)
-				{
-					if (wasalnum)
-						*p = tolower_l((unsigned char) *p, mylocale->info.lt);
-					else
-						*p = toupper_l((unsigned char) *p, mylocale->info.lt);
-					wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
-				}
-			}
-		}
+		Assert(dst[needed] == '\0');
+		result = dst;
 	}
 
 	return result;
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 7b3d1b5be71..1e5f07dfcab 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -96,7 +96,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale)
 	if (locale->ctype_is_c)
 		return pg_ascii_tolower(c);
 	else
-		return tolower_l(c, locale->info.lt);
+		return char_tolower(c, locale);
 }
 
 
@@ -207,7 +207,17 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
 	 * way.
 	 */
 
-	if (pg_database_encoding_max_length() > 1 || (locale->provider == COLLPROVIDER_ICU))
+	if (locale->ctype_is_c ||
+		(char_tolower_enabled(locale) &&
+		 pg_database_encoding_max_length() == 1))
+	{
+		p = VARDATA_ANY(pat);
+		plen = VARSIZE_ANY_EXHDR(pat);
+		s = VARDATA_ANY(str);
+		slen = VARSIZE_ANY_EXHDR(str);
+		return SB_IMatchText(s, slen, p, plen, locale);
+	}
+	else
 	{
 		pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
 													 PointerGetDatum(pat)));
@@ -222,14 +232,6 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
 		else
 			return MB_MatchText(s, slen, p, plen, 0);
 	}
-	else
-	{
-		p = VARDATA_ANY(pat);
-		plen = VARSIZE_ANY_EXHDR(pat);
-		s = VARDATA_ANY(str);
-		slen = VARSIZE_ANY_EXHDR(str);
-		return SB_IMatchText(s, slen, p, plen, locale);
-	}
 }
 
 /*
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index ee71ca89ffd..c172f7e55fc 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -1495,13 +1495,8 @@ pattern_char_isalpha(char c, bool is_multibyte,
 {
 	if (locale->ctype_is_c)
 		return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
-	else if (is_multibyte && IS_HIGHBIT_SET(c))
-		return true;
-	else if (locale->provider != COLLPROVIDER_LIBC)
-		return IS_HIGHBIT_SET(c) ||
-			(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
 	else
-		return isalpha_l((unsigned char) c, locale->info.lt);
+		return char_is_cased(c, locale);
 }
 
 
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 5643ef45ed3..9d27567cab7 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1208,6 +1208,9 @@ create_pg_locale(Oid collid, MemoryContext context)
 	Assert((result->collate_is_c && result->collate == NULL) ||
 		   (!result->collate_is_c && result->collate != NULL));
 
+	Assert((result->ctype_is_c && result->ctype == NULL) ||
+		   (!result->ctype_is_c && result->ctype != NULL));
+
 	datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
 							&isnull);
 	if (!isnull)
@@ -1377,6 +1380,27 @@ is_encoding_supported_by_collprovider(char collprovider, int encoding)
 		return true;
 }
 
+size_t
+pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
+			pg_locale_t locale)
+{
+	return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
+}
+
+size_t
+pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
+			pg_locale_t locale)
+{
+	return locale->ctype->strtitle(dst, dstsize, src, srclen, locale);
+}
+
+size_t
+pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
+			pg_locale_t locale)
+{
+	return locale->ctype->strupper(dst, dstsize, src, srclen, locale);
+}
+
 /*
  * pg_strcoll
  *
@@ -1511,6 +1535,53 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
 	return locale->collate->strnxfrm_prefix(dest, destsize, src, srclen, locale);
 }
 
+/*
+ * char_properties()
+ *
+ * Out of the properties specified in the given mask, return a new mask of the
+ * properties true for the given character.
+ */
+int
+char_properties(pg_wchar wc, int mask, pg_locale_t locale)
+{
+	return locale->ctype->char_properties(wc, mask, locale);
+}
+
+/*
+ * char_is_cased()
+ *
+ * Fuzzy test of whether the given char is case-varying or not. The argument
+ * is a single byte, so in a multibyte encoding, just assume any non-ASCII
+ * char is case-varying.
+ */
+bool
+char_is_cased(char ch, pg_locale_t locale)
+{
+	return locale->ctype->char_is_cased(ch, locale);
+}
+
+/*
+ * char_tolower_enabled()
+ *
+ * Does the provider support char_tolower()?
+ */
+bool
+char_tolower_enabled(pg_locale_t locale)
+{
+	return (locale->ctype->char_tolower != NULL);
+}
+
+/*
+ * char_tolower()
+ *
+ * Convert char (single-byte encoding) to lowercase.
+ */
+char
+char_tolower(unsigned char ch, pg_locale_t locale)
+{
+	return locale->ctype->char_tolower(ch, locale);
+}
+
 /*
  * Return required encoding ID for the given locale, or -1 if any encoding is
  * valid for the locale.
diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c
index 2e2d78758e1..50efcb5e3d3 100644
--- a/src/backend/utils/adt/pg_locale_builtin.c
+++ b/src/backend/utils/adt/pg_locale_builtin.c
@@ -13,6 +13,8 @@
 
 #include "catalog/pg_database.h"
 #include "catalog/pg_collation.h"
+#include "common/unicode_case.h"
+#include "common/unicode_category.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "utils/builtins.h"
@@ -24,6 +26,131 @@ extern pg_locale_t create_pg_locale_builtin(Oid collid,
 											MemoryContext context);
 extern char *get_collation_actual_version_builtin(const char *collcollate);
 
+struct WordBoundaryState
+{
+	const char *str;
+	size_t		len;
+	size_t		offset;
+	bool		init;
+	bool		prev_alnum;
+};
+
+/*
+ * Simple word boundary iterator that draws boundaries each time the result of
+ * pg_u_isalnum() changes.
+ */
+static size_t
+initcap_wbnext(void *state)
+{
+	struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
+
+	while (wbstate->offset < wbstate->len &&
+		   wbstate->str[wbstate->offset] != '\0')
+	{
+		pg_wchar	u = utf8_to_unicode((unsigned char *) wbstate->str +
+										wbstate->offset);
+		bool		curr_alnum = pg_u_isalnum(u, true);
+
+		if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
+		{
+			size_t		prev_offset = wbstate->offset;
+
+			wbstate->init = true;
+			wbstate->offset += unicode_utf8len(u);
+			wbstate->prev_alnum = curr_alnum;
+			return prev_offset;
+		}
+
+		wbstate->offset += unicode_utf8len(u);
+	}
+
+	return wbstate->len;
+}
+
+static size_t
+strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
+				 pg_locale_t locale)
+{
+	return unicode_strlower(dest, destsize, src, srclen);
+}
+
+static size_t
+strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
+				 pg_locale_t locale)
+{
+	struct WordBoundaryState wbstate = {
+		.str = src,
+		.len = srclen,
+		.offset = 0,
+		.init = false,
+		.prev_alnum = false,
+	};
+
+	return unicode_strtitle(dest, destsize, src, srclen,
+							initcap_wbnext, &wbstate);
+}
+
+static size_t
+strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
+				 pg_locale_t locale)
+{
+	return unicode_strupper(dest, destsize, src, srclen);
+}
+
+static int
+char_properties_builtin(pg_wchar wc, int mask, pg_locale_t locale)
+{
+	int			result = 0;
+
+	if ((mask & PG_ISDIGIT) && pg_u_isdigit(wc, true))
+		result |= PG_ISDIGIT;
+	if ((mask & PG_ISALPHA) && pg_u_isalpha(wc))
+		result |= PG_ISALPHA;
+	if ((mask & PG_ISUPPER) && pg_u_isupper(wc))
+		result |= PG_ISUPPER;
+	if ((mask & PG_ISLOWER) && pg_u_islower(wc))
+		result |= PG_ISLOWER;
+	if ((mask & PG_ISGRAPH) && pg_u_isgraph(wc))
+		result |= PG_ISGRAPH;
+	if ((mask & PG_ISPRINT) && pg_u_isprint(wc))
+		result |= PG_ISPRINT;
+	if ((mask & PG_ISPUNCT) && pg_u_ispunct(wc, true))
+		result |= PG_ISPUNCT;
+	if ((mask & PG_ISSPACE) && pg_u_isspace(wc))
+		result |= PG_ISSPACE;
+
+	return result;
+}
+
+static bool
+char_is_cased_builtin(char ch, pg_locale_t locale)
+{
+	return IS_HIGHBIT_SET(ch) ||
+		(ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
+}
+
+static pg_wchar
+wc_toupper_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return unicode_uppercase_simple(wc);
+}
+
+static pg_wchar
+wc_tolower_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return unicode_lowercase_simple(wc);
+}
+
+static const struct ctype_methods ctype_methods_builtin = {
+	.strlower = strlower_builtin,
+	.strtitle = strtitle_builtin,
+	.strupper = strupper_builtin,
+	.char_properties = char_properties_builtin,
+	.char_is_cased = char_is_cased_builtin,
+	.wc_tolower = wc_tolower_builtin,
+	.wc_toupper = wc_toupper_builtin,
+};
+
 pg_locale_t
 create_pg_locale_builtin(Oid collid, MemoryContext context)
 {
@@ -66,6 +193,8 @@ create_pg_locale_builtin(Oid collid, MemoryContext context)
 	result->deterministic = true;
 	result->collate_is_c = true;
 	result->ctype_is_c = (strcmp(locstr, "C") == 0);
+	if (!result->ctype_is_c)
+		result->ctype = &ctype_methods_builtin;
 
 	return result;
 }
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index 4b7a897e930..839b905c560 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -64,6 +64,11 @@ static size_t strnxfrm_prefix_icu(char *dest, size_t destsize,
 								  pg_locale_t locale);
 extern char *get_collation_actual_version_icu(const char *collcollate);
 
+typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
+									 const UChar *src, int32_t srcLength,
+									 const char *locale,
+									 UErrorCode *pErrorCode);
+
 /*
  * Converter object for converting between ICU's UChar strings and C strings
  * in database encoding.  Since the database encoding doesn't change, we only
@@ -73,6 +78,16 @@ static UConverter *icu_converter = NULL;
 
 static UCollator *make_icu_collator(const char *iculocstr,
 									const char *icurules);
+
+static size_t strlower_icu(char *dest, size_t destsize,
+						   const char *src, ssize_t srclen,
+						   pg_locale_t locale);
+static size_t strtitle_icu(char *dest, size_t destsize,
+						   const char *src, ssize_t srclen,
+						   pg_locale_t locale);
+static size_t strupper_icu(char *dest, size_t destsize,
+						   const char *src, ssize_t srclen,
+						   pg_locale_t locale);
 static int	strncoll_icu(const char *arg1, ssize_t len1,
 						 const char *arg2, ssize_t len2,
 						 pg_locale_t locale);
@@ -93,8 +108,63 @@ static size_t uchar_length(UConverter *converter,
 static int32_t uchar_convert(UConverter *converter,
 							 UChar *dest, int32_t destlen,
 							 const char *src, int32_t srclen);
+static int32_t icu_to_uchar(UChar **buff_uchar, const char *buff,
+							size_t nbytes);
+static size_t icu_from_uchar(char *dest, size_t destsize,
+							 const UChar *buff_uchar, int32_t len_uchar);
 static void icu_set_collation_attributes(UCollator *collator, const char *loc,
 										 UErrorCode *status);
+static int32_t icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
+								UChar **buff_dest, UChar *buff_source,
+								int32_t len_source);
+static int32_t u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
+									   const UChar *src, int32_t srcLength,
+									   const char *locale,
+									   UErrorCode *pErrorCode);
+
+static int
+char_properties_icu(pg_wchar wc, int mask, pg_locale_t locale)
+{
+	int			result = 0;
+
+	if ((mask & PG_ISDIGIT) && u_isdigit(wc))
+		result |= PG_ISDIGIT;
+	if ((mask & PG_ISALPHA) && u_isalpha(wc))
+		result |= PG_ISALPHA;
+	if ((mask & PG_ISUPPER) && u_isupper(wc))
+		result |= PG_ISUPPER;
+	if ((mask & PG_ISLOWER) && u_islower(wc))
+		result |= PG_ISLOWER;
+	if ((mask & PG_ISGRAPH) && u_isgraph(wc))
+		result |= PG_ISGRAPH;
+	if ((mask & PG_ISPRINT) && u_isprint(wc))
+		result |= PG_ISPRINT;
+	if ((mask & PG_ISPUNCT) && u_ispunct(wc))
+		result |= PG_ISPUNCT;
+	if ((mask & PG_ISSPACE) && u_isspace(wc))
+		result |= PG_ISSPACE;
+
+	return result;
+}
+
+static bool
+char_is_cased_icu(char ch, pg_locale_t locale)
+{
+	return IS_HIGHBIT_SET(ch) ||
+		(ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
+}
+
+static pg_wchar
+toupper_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_toupper(wc);
+}
+
+static pg_wchar
+tolower_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_tolower(wc);
+}
 
 static const struct collate_methods collate_methods_icu = {
 	.strncoll = strncoll_icu,
@@ -114,6 +184,15 @@ static const struct collate_methods collate_methods_icu_utf8 = {
 	.strxfrm_is_safe = true,
 };
 
+static const struct ctype_methods ctype_methods_icu = {
+	.strlower = strlower_icu,
+	.strtitle = strtitle_icu,
+	.strupper = strupper_icu,
+	.char_properties = char_properties_icu,
+	.char_is_cased = char_is_cased_icu,
+	.wc_toupper = toupper_icu,
+	.wc_tolower = tolower_icu,
+};
 #endif
 
 pg_locale_t
@@ -184,6 +263,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
 		result->collate = &collate_methods_icu_utf8;
 	else
 		result->collate = &collate_methods_icu;
+	result->ctype = &ctype_methods_icu;
 
 	return result;
 #else
@@ -357,6 +437,66 @@ make_icu_collator(const char *iculocstr, const char *icurules)
 	}
 }
 
+static size_t
+strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
+			 pg_locale_t locale)
+{
+	int32_t		len_uchar;
+	int32_t		len_conv;
+	UChar	   *buff_uchar;
+	UChar	   *buff_conv;
+	size_t		result_len;
+
+	len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
+	len_conv = icu_convert_case(u_strToLower, locale,
+								&buff_conv, buff_uchar, len_uchar);
+	result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
+	pfree(buff_uchar);
+	pfree(buff_conv);
+
+	return result_len;
+}
+
+static size_t
+strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
+			 pg_locale_t locale)
+{
+	int32_t		len_uchar;
+	int32_t		len_conv;
+	UChar	   *buff_uchar;
+	UChar	   *buff_conv;
+	size_t		result_len;
+
+	len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
+	len_conv = icu_convert_case(u_strToTitle_default_BI, locale,
+								&buff_conv, buff_uchar, len_uchar);
+	result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
+	pfree(buff_uchar);
+	pfree(buff_conv);
+
+	return result_len;
+}
+
+static size_t
+strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
+			 pg_locale_t locale)
+{
+	int32_t		len_uchar;
+	int32_t		len_conv;
+	UChar	   *buff_uchar;
+	UChar	   *buff_conv;
+	size_t		result_len;
+
+	len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
+	len_conv = icu_convert_case(u_strToUpper, locale,
+								&buff_conv, buff_uchar, len_uchar);
+	result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
+	pfree(buff_uchar);
+	pfree(buff_conv);
+
+	return result_len;
+}
+
 /*
  * strncoll_icu_utf8
  *
@@ -496,7 +636,7 @@ get_collation_actual_version_icu(const char *collcollate)
  * The result string is nul-terminated, though most callers rely on the
  * result length instead.
  */
-int32_t
+static int32_t
 icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
 {
 	int32_t		len_uchar;
@@ -523,8 +663,8 @@ icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
  *
  * The result string is nul-terminated.
  */
-int32_t
-icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
+static size_t
+icu_from_uchar(char *dest, size_t destsize, const UChar *buff_uchar, int32_t len_uchar)
 {
 	UErrorCode	status;
 	int32_t		len_result;
@@ -539,10 +679,11 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
 				(errmsg("%s failed: %s", "ucnv_fromUChars",
 						u_errorName(status))));
 
-	*result = palloc(len_result + 1);
+	if (len_result + 1 > destsize)
+		return len_result;
 
 	status = U_ZERO_ERROR;
-	len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
+	len_result = ucnv_fromUChars(icu_converter, dest, len_result + 1,
 								 buff_uchar, len_uchar, &status);
 	if (U_FAILURE(status) ||
 		status == U_STRING_NOT_TERMINATED_WARNING)
@@ -553,6 +694,43 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
 	return len_result;
 }
 
+static int32_t
+icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
+				 UChar **buff_dest, UChar *buff_source, int32_t len_source)
+{
+	UErrorCode	status;
+	int32_t		len_dest;
+
+	len_dest = len_source;		/* try first with same length */
+	*buff_dest = palloc(len_dest * sizeof(**buff_dest));
+	status = U_ZERO_ERROR;
+	len_dest = func(*buff_dest, len_dest, buff_source, len_source,
+					mylocale->info.icu.locale, &status);
+	if (status == U_BUFFER_OVERFLOW_ERROR)
+	{
+		/* try again with adjusted length */
+		pfree(*buff_dest);
+		*buff_dest = palloc(len_dest * sizeof(**buff_dest));
+		status = U_ZERO_ERROR;
+		len_dest = func(*buff_dest, len_dest, buff_source, len_source,
+						mylocale->info.icu.locale, &status);
+	}
+	if (U_FAILURE(status))
+		ereport(ERROR,
+				(errmsg("case conversion failed: %s", u_errorName(status))));
+	return len_dest;
+}
+
+static int32_t
+u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
+						const UChar *src, int32_t srcLength,
+						const char *locale,
+						UErrorCode *pErrorCode)
+{
+	return u_strToTitle(dest, destCapacity, src, srcLength,
+						NULL, locale, pErrorCode);
+}
+
 /*
  * strncoll_icu
  *
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index cb519cfb521..38f9164ad98 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -11,6 +11,9 @@
 
 #include "postgres.h"
 
+#include <limits.h>
+#include <wctype.h>
+
 #include "access/htup_details.h"
 #include "catalog/pg_database.h"
 #include "catalog/pg_collation.h"
@@ -57,6 +60,34 @@ static int	strncoll_libc_win32_utf8(const char *arg1, ssize_t len1,
 									 pg_locale_t locale);
 #endif
 
+static size_t strlower_libc_sb(char *dest, size_t destsize,
+							   const char *src, ssize_t srclen,
+							   pg_locale_t locale);
+static size_t strlower_libc_mb(char *dest, size_t destsize,
+							   const char *src, ssize_t srclen,
+							   pg_locale_t locale);
+static size_t strtitle_libc_sb(char *dest, size_t destsize,
+							   const char *src, ssize_t srclen,
+							   pg_locale_t locale);
+static size_t strtitle_libc_mb(char *dest, size_t destsize,
+							   const char *src, ssize_t srclen,
+							   pg_locale_t locale);
+static size_t strupper_libc_sb(char *dest, size_t destsize,
+							   const char *src, ssize_t srclen,
+							   pg_locale_t locale);
+static size_t strupper_libc_mb(char *dest, size_t destsize,
+							   const char *src, ssize_t srclen,
+							   pg_locale_t locale);
+
+static int	char_properties_libc_sb(pg_wchar wc, int mask,
+									   pg_locale_t locale);
+static int	char_properties_libc_mb(pg_wchar wc, int mask,
+									  pg_locale_t locale);
+static pg_wchar toupper_libc_sb(pg_wchar wc, pg_locale_t locale);
+static pg_wchar toupper_libc_mb(pg_wchar wc, pg_locale_t locale);
+static pg_wchar tolower_libc_sb(pg_wchar wc, pg_locale_t locale);
+static pg_wchar tolower_libc_mb(pg_wchar wc, pg_locale_t locale);
+
 static const struct collate_methods collate_methods_libc = {
 	.strncoll = strncoll_libc,
 	.strnxfrm = strnxfrm_libc,
@@ -78,6 +109,324 @@ static const struct collate_methods collate_methods_libc = {
 #endif
 };
 
+#ifdef WIN32
+static const struct collate_methods collate_methods_libc_win32_utf8 = {
+	.strncoll = strncoll_libc_win32_utf8,
+	.strnxfrm = strnxfrm_libc,
+	.strnxfrm_prefix = NULL,
+#ifdef TRUST_STRXFRM
+	.strxfrm_is_safe = true,
+#else
+	.strxfrm_is_safe = false,
+#endif
+};
+#endif
+
+static bool
+char_is_cased_libc(char ch, pg_locale_t locale)
+{
+	bool		is_multibyte = pg_database_encoding_max_length() > 1;
+
+	if (is_multibyte && IS_HIGHBIT_SET(ch))
+		return true;
+	else
+		return isalpha_l((unsigned char) ch, locale->info.lt);
+}
+
+static char
+char_tolower_libc(unsigned char ch, pg_locale_t locale)
+{
+	Assert(pg_database_encoding_max_length() == 1);
+	return tolower_l(ch, locale->info.lt);
+}
+
+static const struct ctype_methods ctype_methods_libc_sb = {
+	.strlower = strlower_libc_sb,
+	.strtitle = strtitle_libc_sb,
+	.strupper = strupper_libc_sb,
+	.char_properties = char_properties_libc_sb,
+	.char_is_cased = char_is_cased_libc,
+	.char_tolower = char_tolower_libc,
+	.wc_toupper = toupper_libc_sb,
+	.wc_tolower = tolower_libc_sb,
+	.max_chr = UCHAR_MAX,
+};
+
+/*
+ * Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but
+ * single-byte semantics for pattern matching.
+ */
+static const struct ctype_methods ctype_methods_libc_other_mb = {
+	.strlower = strlower_libc_mb,
+	.strtitle = strtitle_libc_mb,
+	.strupper = strupper_libc_mb,
+	.char_properties = char_properties_libc_sb,
+	.char_is_cased = char_is_cased_libc,
+	.char_tolower = char_tolower_libc,
+	.wc_toupper = toupper_libc_sb,
+	.wc_tolower = tolower_libc_sb,
+	.max_chr = UCHAR_MAX,
+};
+
+static const struct ctype_methods ctype_methods_libc_utf8 = {
+	.strlower = strlower_libc_mb,
+	.strtitle = strtitle_libc_mb,
+	.strupper = strupper_libc_mb,
+	.char_properties = char_properties_libc_mb,
+	.char_is_cased = char_is_cased_libc,
+	.char_tolower = char_tolower_libc,
+	.wc_toupper = toupper_libc_mb,
+	.wc_tolower = tolower_libc_mb,
+};
+
+static size_t
+strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
+				 pg_locale_t locale)
+{
+	if (srclen < 0)
+		srclen = strlen(src);
+
+	if (srclen + 1 <= destsize)
+	{
+		locale_t	loc = locale->info.lt;
+		char	   *p;
+
+		if (srclen + 1 > destsize)
+			return srclen;
+
+		memcpy(dest, src, srclen);
+		dest[srclen] = '\0';
+
+		/*
+		 * Note: we assume that tolower_l() will not be so broken as to need
+		 * an isupper_l() guard test.  When using the default collation, we
+		 * apply the traditional Postgres behavior that forces ASCII-style
+		 * treatment of I/i, but in non-default collations you get exactly
+		 * what the collation says.
+		 */
+		for (p = dest; *p; p++)
+			*p = tolower_l((unsigned char) *p, loc);
+	}
+
+	return srclen;
+}
+
+static size_t
+strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
+				 pg_locale_t locale)
+{
+	locale_t	loc = locale->info.lt;
+	size_t		result_size;
+	wchar_t    *workspace;
+	char	   *result;
+	size_t		curr_char;
+	size_t		max_size;
+
+	if (srclen < 0)
+		srclen = strlen(src);
+
+	/* Overflow paranoia */
+	if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
+		ereport(ERROR,
+				(errcode(ERRCODE_OUT_OF_MEMORY),
+				 errmsg("out of memory")));
+
+	/* Output workspace cannot have more codes than input bytes */
+	workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
+
+	char2wchar(workspace, srclen + 1, src, srclen, locale);
+
+	for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+		workspace[curr_char] = towlower_l(workspace[curr_char], loc);
+
+	/*
+	 * Make result large enough; case change might change number of bytes
+	 */
+	max_size = curr_char * pg_database_encoding_max_length();
+	result = palloc(max_size + 1);
+
+	result_size = wchar2char(result, workspace, max_size + 1, locale);
+
+	if (result_size + 1 > destsize)
+		return result_size;
+
+	memcpy(dest, result, result_size);
+	dest[result_size] = '\0';
+
+	pfree(workspace);
+	pfree(result);
+
+	return result_size;
+}
+
+static size_t
+strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
+				 pg_locale_t locale)
+{
+	if (srclen < 0)
+		srclen = strlen(src);
+
+	if (srclen + 1 <= destsize)
+	{
+		locale_t	loc = locale->info.lt;
+		int			wasalnum = false;
+		char	   *p;
+
+		memcpy(dest, src, srclen);
+		dest[srclen] = '\0';
+
+		/*
+		 * Note: we assume that toupper_l()/tolower_l() will not be so broken
+		 * as to need guard tests.  When using the default collation, we apply
+		 * the traditional Postgres behavior that forces ASCII-style treatment
+		 * of I/i, but in non-default collations you get exactly what the
+		 * collation says.
+		 */
+		for (p = dest; *p; p++)
+		{
+			if (wasalnum)
+				*p = tolower_l((unsigned char) *p, loc);
+			else
+				*p = toupper_l((unsigned char) *p, loc);
+			wasalnum = isalnum_l((unsigned char) *p, loc);
+		}
+	}
+
+	return srclen;
+}
+
+static size_t
+strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
+				 pg_locale_t locale)
+{
+	locale_t	loc = locale->info.lt;
+	int			wasalnum = false;
+	size_t		result_size;
+	wchar_t    *workspace;
+	char	   *result;
+	size_t		curr_char;
+	size_t		max_size;
+
+	if (srclen < 0)
+		srclen = strlen(src);
+
+	/* Overflow paranoia */
+	if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
+		ereport(ERROR,
+				(errcode(ERRCODE_OUT_OF_MEMORY),
+				 errmsg("out of memory")));
+
+	/* Output workspace cannot have more codes than input bytes */
+	workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
+
+	char2wchar(workspace, srclen + 1, src, srclen, locale);
+
+	for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+	{
+		if (wasalnum)
+			workspace[curr_char] = towlower_l(workspace[curr_char], loc);
+		else
+			workspace[curr_char] = towupper_l(workspace[curr_char], loc);
+		wasalnum = iswalnum_l(workspace[curr_char], loc);
+	}
+
+	/*
+	 * Make result large enough; case change might change number of bytes
+	 */
+	max_size = curr_char * pg_database_encoding_max_length();
+	result = palloc(max_size + 1);
+
+	result_size = wchar2char(result, workspace, max_size + 1, locale);
+
+	if (result_size + 1 > destsize)
+		return result_size;
+
+	memcpy(dest, result, result_size);
+	dest[result_size] = '\0';
+
+	pfree(workspace);
+	pfree(result);
+
+	return result_size;
+}
+
+static size_t
+strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
+				 pg_locale_t locale)
+{
+	if (srclen < 0)
+		srclen = strlen(src);
+
+	if (srclen + 1 <= destsize)
+	{
+		locale_t	loc = locale->info.lt;
+		char	   *p;
+
+		memcpy(dest, src, srclen);
+		dest[srclen] = '\0';
+
+		/*
+		 * Note: we assume that toupper_l() will not be so broken as to need
+		 * an islower_l() guard test.  When using the default collation, we
+		 * apply the traditional Postgres behavior that forces ASCII-style
+		 * treatment of I/i, but in non-default collations you get exactly
+		 * what the collation says.
+		 */
+		for (p = dest; *p; p++)
+			*p = toupper_l((unsigned char) *p, loc);
+	}
+
+	return srclen;
+}
+
+static size_t
+strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
+				 pg_locale_t locale)
+{
+	locale_t	loc = locale->info.lt;
+	size_t		result_size;
+	wchar_t    *workspace;
+	char	   *result;
+	size_t		curr_char;
+	size_t		max_size;
+
+	if (srclen < 0)
+		srclen = strlen(src);
+
+	/* Overflow paranoia */
+	if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
+		ereport(ERROR,
+				(errcode(ERRCODE_OUT_OF_MEMORY),
+				 errmsg("out of memory")));
+
+	/* Output workspace cannot have more codes than input bytes */
+	workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
+
+	char2wchar(workspace, srclen + 1, src, srclen, locale);
+
+	for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+		workspace[curr_char] = towupper_l(workspace[curr_char], loc);
+
+	/*
+	 * Make result large enough; case change might change number of bytes
+	 */
+	max_size = curr_char * pg_database_encoding_max_length();
+	result = palloc(max_size + 1);
+
+	result_size = wchar2char(result, workspace, max_size + 1, locale);
+
+	if (result_size + 1 > destsize)
+		return result_size;
+
+	memcpy(dest, result, result_size);
+	dest[result_size] = '\0';
+
+	pfree(workspace);
+	pfree(result);
+
+	return result_size;
+}
+
 pg_locale_t
 create_pg_locale_libc(Oid collid, MemoryContext context)
 {
@@ -142,6 +491,15 @@ create_pg_locale_libc(Oid collid, MemoryContext context)
 #endif
 			result->collate = &collate_methods_libc;
 	}
+	if (!result->ctype_is_c)
+	{
+		if (GetDatabaseEncoding() == PG_UTF8)
+			result->ctype = &ctype_methods_libc_utf8;
+		else if (pg_database_encoding_max_length() > 1)
+			result->ctype = &ctype_methods_libc_other_mb;
+		else
+			result->ctype = &ctype_methods_libc_sb;
+	}
 
 	return result;
 }
@@ -490,6 +848,113 @@ report_newlocale_failure(const char *localename)
 						localename) : 0)));
 }
 
+static int
+char_properties_libc_sb(pg_wchar wc, int mask, pg_locale_t locale)
+{
+	int			result = 0;
+
+	Assert(!locale->ctype_is_c);
+	Assert(GetDatabaseEncoding() != PG_UTF8);
+
+	if (wc > (pg_wchar) UCHAR_MAX)
+		return 0;
+
+	if ((mask & PG_ISDIGIT) && isdigit_l((unsigned char) wc, locale->info.lt))
+		result |= PG_ISDIGIT;
+	if ((mask & PG_ISALPHA) && isalpha_l((unsigned char) wc, locale->info.lt))
+		result |= PG_ISALPHA;
+	if ((mask & PG_ISUPPER) && isupper_l((unsigned char) wc, locale->info.lt))
+		result |= PG_ISUPPER;
+	if ((mask & PG_ISLOWER) && islower_l((unsigned char) wc, locale->info.lt))
+		result |= PG_ISLOWER;
+	if ((mask & PG_ISGRAPH) && isgraph_l((unsigned char) wc, locale->info.lt))
+		result |= PG_ISGRAPH;
+	if ((mask & PG_ISPRINT) && isprint_l((unsigned char) wc, locale->info.lt))
+		result |= PG_ISPRINT;
+	if ((mask & PG_ISPUNCT) && ispunct_l((unsigned char) wc, locale->info.lt))
+		result |= PG_ISPUNCT;
+	if ((mask & PG_ISSPACE) && isspace_l((unsigned char) wc, locale->info.lt))
+		result |= PG_ISSPACE;
+
+	return result;
+}
+
+static int
+char_properties_libc_mb(pg_wchar wc, int mask, pg_locale_t locale)
+{
+	int			result = 0;
+
+	Assert(!locale->ctype_is_c);
+	Assert(GetDatabaseEncoding() == PG_UTF8);
+
+	/* if wchar_t cannot represent the value, just return 0 */
+	if (sizeof(wchar_t) < 4 && wc > (pg_wchar) 0xFFFF)
+		return 0;
+
+	if ((mask & PG_ISDIGIT) && iswdigit_l((wint_t) wc, locale->info.lt))
+		result |= PG_ISDIGIT;
+	if ((mask & PG_ISALPHA) && iswalpha_l((wint_t) wc, locale->info.lt))
+		result |= PG_ISALPHA;
+	if ((mask & PG_ISUPPER) && iswupper_l((wint_t) wc, locale->info.lt))
+		result |= PG_ISUPPER;
+	if ((mask & PG_ISLOWER) && iswlower_l((wint_t) wc, locale->info.lt))
+		result |= PG_ISLOWER;
+	if ((mask & PG_ISGRAPH) && iswgraph_l((wint_t) wc, locale->info.lt))
+		result |= PG_ISGRAPH;
+	if ((mask & PG_ISPRINT) && iswprint_l((wint_t) wc, locale->info.lt))
+		result |= PG_ISPRINT;
+	if ((mask & PG_ISPUNCT) && iswpunct_l((wint_t) wc, locale->info.lt))
+		result |= PG_ISPUNCT;
+	if ((mask & PG_ISSPACE) && iswspace_l((wint_t) wc, locale->info.lt))
+		result |= PG_ISSPACE;
+
+	return result;
+}
+
+static pg_wchar
+toupper_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	Assert(GetDatabaseEncoding() != PG_UTF8);
+
+	if (wc <= (pg_wchar) UCHAR_MAX)
+		return toupper_l((unsigned char) wc, locale->info.lt);
+	else
+		return wc;
+}
+
+static pg_wchar
+toupper_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	Assert(GetDatabaseEncoding() == PG_UTF8);
+
+	if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
+		return towupper_l((wint_t) wc, locale->info.lt);
+	else
+		return wc;
+}
+
+static pg_wchar
+tolower_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	Assert(GetDatabaseEncoding() != PG_UTF8);
+
+	if (wc <= (pg_wchar) UCHAR_MAX)
+		return tolower_l((unsigned char) wc, locale->info.lt);
+	else
+		return wc;
+}
+
+static pg_wchar
+tolower_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	Assert(GetDatabaseEncoding() == PG_UTF8);
+
+	if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
+		return towlower_l((wint_t) wc, locale->info.lt);
+	else
+		return wc;
+}
+
 /*
  * POSIX doesn't define _l-variants of these functions, but several systems
  * have them.  We provide our own replacements here.
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 028eec63901..7a509596178 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -12,10 +12,25 @@
 #ifndef _PG_LOCALE_
 #define _PG_LOCALE_
 
+#include "mb/pg_wchar.h"
+
 #ifdef USE_ICU
 #include <unicode/ucol.h>
 #endif
 
+/*
+ * Character properties for regular expressions.
+ */
+#define PG_ISDIGIT     0x01
+#define PG_ISALPHA     0x02
+#define PG_ISALNUM     (PG_ISDIGIT | PG_ISALPHA)
+#define PG_ISUPPER     0x04
+#define PG_ISLOWER     0x08
+#define PG_ISGRAPH     0x10
+#define PG_ISPRINT     0x20
+#define PG_ISPUNCT     0x40
+#define PG_ISSPACE     0x80
+
 /* use for libc locale names */
 #define LOCALE_NAME_BUFLEN 128
 
@@ -77,6 +92,43 @@ struct collate_methods
 	bool		strxfrm_is_safe;
 };
 
+struct ctype_methods
+{
+	/* case mapping: LOWER()/INITCAP()/UPPER() */
+	size_t		(*strlower) (char *dest, size_t destsize,
+							 const char *src, ssize_t srclen,
+							 pg_locale_t locale);
+	size_t		(*strtitle) (char *dest, size_t destsize,
+							 const char *src, ssize_t srclen,
+							 pg_locale_t locale);
+	size_t		(*strupper) (char *dest, size_t destsize,
+							 const char *src, ssize_t srclen,
+							 pg_locale_t locale);
+
+	/* required */
+	int			(*char_properties) (pg_wchar wc, int mask, pg_locale_t locale);
+
+	/* required */
+	bool		(*char_is_cased) (char ch, pg_locale_t locale);
+
+	/*
+	 * Optional. If defined, will only be called for single-byte encodings. If
+	 * not defined, or if the encoding is multibyte, will fall back to
+	 * pg_strlower().
+	 */
+	char		(*char_tolower) (unsigned char ch, pg_locale_t locale);
+
+	/* required */
+	pg_wchar	(*wc_toupper) (pg_wchar wc, pg_locale_t locale);
+	pg_wchar	(*wc_tolower) (pg_wchar wc, pg_locale_t locale);
+
+	/*
+	 * For regex and pattern matching efficiency, the maximum char value
+	 * supported by the above methods. If zero, limit is set by regex code.
+	 */
+	pg_wchar	max_chr;
+};
+
 /*
  * We use a discriminated union to hold either a locale_t or an ICU collator.
  * pg_locale_t is occasionally checked for truth, so make it a pointer.
@@ -101,6 +153,7 @@ struct pg_locale_struct
 	bool		ctype_is_c;
 
 	const struct collate_methods *collate;	/* NULL if collate_is_c */
+	const struct ctype_methods *ctype;	/* NULL if ctype_is_c */
 
 	union
 	{
@@ -126,6 +179,19 @@ extern pg_locale_t pg_newlocale_from_collation(Oid collid);
 
 extern char *get_collation_actual_version(char collprovider, const char *collcollate);
 extern bool is_encoding_supported_by_collprovider(char collprovider, int encoding);
+extern int	char_properties(pg_wchar wc, int mask, pg_locale_t locale);
+extern bool char_is_cased(char ch, pg_locale_t locale);
+extern bool char_tolower_enabled(pg_locale_t locale);
+extern char char_tolower(unsigned char ch, pg_locale_t locale);
+extern size_t pg_strlower(char *dest, size_t destsize,
+						  const char *src, ssize_t srclen,
+						  pg_locale_t locale);
+extern size_t pg_strtitle(char *dest, size_t destsize,
+						  const char *src, ssize_t srclen,
+						  pg_locale_t locale);
+extern size_t pg_strupper(char *dest, size_t destsize,
+						  const char *src, ssize_t srclen,
+						  pg_locale_t locale);
 extern int	pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale);
 extern int	pg_strncoll(const char *arg1, ssize_t len1,
 						const char *arg2, ssize_t len2, pg_locale_t locale);
@@ -145,11 +211,6 @@ extern const char *builtin_validate_locale(int encoding, const char *locale);
 extern void icu_validate_locale(int encoding, const char *loc_str);
 extern char *icu_language_tag(const char *loc_str, int elevel);
 
-#ifdef USE_ICU
-extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
-extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar);
-#endif
-
 /* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */
 extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen,
 						 pg_locale_t locale);
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 2d4c870423a..94b041ec9e9 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1823,7 +1823,6 @@ PGTargetServerType
 PGTernaryBool
 PGTransactionStatusType
 PGVerbosity
-PG_Locale_Strategy
 PG_Lock_Status
 PG_init_t
 PGcancel
-- 
2.45.2

