From 46766448d3d9eea75bbfbd2d9080d45af398dcaa Mon Sep 17 00:00:00 2001
From: Jeff Davis <jeff@j-davis.com>
Date: Mon, 1 May 2023 15:38:29 -0700
Subject: [PATCH v23 1/6] Introduce collation provider "builtin".
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

----- CATVERSION -----

Initially, the only locale supported by the builtin provider is "C",
which has identical implementation and semantics as the libc
provider's "C" locale.

The builtin provider has one additional capability that libc does not
offer: its locale is entirely independent of the server environment
variables LC_COLLATE and LC_CTYPE. That means that the collation
locale can be "C" while LC_COLLATE and LC_CTYPE are set to "en_US".

By offering a new builtin provider, it clarifies that the semantics
will never depend on libc, and makes it easier to document the
behavior.

Discussion: https://postgr.es/m/ab925f69-5f9d-f85e-b87c-bd2a44798659@joeconway.com
Discussion: https://postgr.es/m/dd9261f4-7a98-4565-93ec-336c1c110d90@manitou-mail.org
Discussion: https://postgr.es/m/ff4c2f2f9c8fc7ca27c1c24ae37ecaeaeaff6b53.camel%40j-davis.com
Reviewed-by: Daniel Vérité, Peter Eisentraut, Jeremy Schneider
---
 doc/src/sgml/charset.sgml              |  90 ++++++++++++++----
 doc/src/sgml/ref/create_collation.sgml |  11 ++-
 doc/src/sgml/ref/create_database.sgml  |   7 +-
 doc/src/sgml/ref/createdb.sgml         |   2 +-
 doc/src/sgml/ref/initdb.sgml           |  17 +++-
 src/backend/catalog/pg_collation.c     |   5 +-
 src/backend/commands/collationcmds.c   |  94 +++++++++++++++----
 src/backend/commands/dbcommands.c      | 121 ++++++++++++++++++++-----
 src/backend/utils/adt/formatting.c     |   6 ++
 src/backend/utils/adt/pg_locale.c      | 119 ++++++++++++++++++++----
 src/backend/utils/init/postinit.c      |  20 +++-
 src/bin/initdb/initdb.c                |  39 +++++---
 src/bin/initdb/t/001_initdb.pl         |  40 +++++++-
 src/bin/pg_dump/pg_dump.c              |  49 ++++++----
 src/bin/pg_upgrade/t/002_pg_upgrade.pl |  81 +++++++++++++----
 src/bin/psql/describe.c                |   4 +-
 src/bin/scripts/createdb.c             |  18 +++-
 src/bin/scripts/t/020_createdb.pl      |  60 ++++++++++++
 src/include/catalog/pg_collation.dat   |   6 +-
 src/include/catalog/pg_collation.h     |   3 +
 src/include/utils/pg_locale.h          |   6 +-
 src/test/icu/t/010_database.pl         |  22 ++---
 src/test/regress/expected/collate.out  |  19 +++-
 src/test/regress/sql/collate.sql       |   8 ++
 24 files changed, 689 insertions(+), 158 deletions(-)

diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml
index 4fc143025e..7114eb7b52 100644
--- a/doc/src/sgml/charset.sgml
+++ b/doc/src/sgml/charset.sgml
@@ -342,22 +342,14 @@ initdb --locale=sv_SE
    <title>Locale Providers</title>
 
    <para>
-    <productname>PostgreSQL</productname> supports multiple <firstterm>locale
-    providers</firstterm>.  This specifies which library supplies the locale
-    data.  One standard provider name is <literal>libc</literal>, which uses
-    the locales provided by the operating system C library.  These are the
-    locales used by most tools provided by the operating system.  Another
-    provider is <literal>icu</literal>, which uses the external
-    ICU<indexterm><primary>ICU</primary></indexterm> library.  ICU locales can
-    only be used if support for ICU was configured when PostgreSQL was built.
+    A locale provider specifies which library defines the locale behavior for
+    collations and character classifications.
    </para>
 
    <para>
     The commands and tools that select the locale settings, as described
-    above, each have an option to select the locale provider.  The examples
-    shown earlier all use the <literal>libc</literal> provider, which is the
-    default.  Here is an example to initialize a database cluster using the
-    ICU provider:
+    above, each have an option to select the locale provider. Here is an
+    example to initialize a database cluster using the ICU provider:
 <programlisting>
 initdb --locale-provider=icu --icu-locale=en
 </programlisting>
@@ -370,12 +362,76 @@ initdb --locale-provider=icu --icu-locale=en
    </para>
 
    <para>
-    Which locale provider to use depends on individual requirements.  For most
-    basic uses, either provider will give adequate results.  For the libc
-    provider, it depends on what the operating system offers; some operating
-    systems are better than others.  For advanced uses, ICU offers more locale
-    variants and customization options.
+    Regardless of the locale provider, the operating system is still used to
+    provide some locale-aware behavior, such as messages (see <xref
+    linkend="guc-lc-messages"/>).
    </para>
+
+   <para>
+    The available locale providers are listed below:
+   </para>
+
+   <variablelist>
+    <varlistentry>
+     <term><literal>builtin</literal></term>
+     <listitem>
+      <para>
+       The <literal>builtin</literal> provider uses built-in operations. Only
+       the <literal>C</literal> locale is supported for this provider.
+      </para>
+      <para>
+       The <literal>C</literal> locale behavior is identical to the
+       <literal>C</literal> locale in the libc provider. When using this
+       locale, the behavior may depend on the database encoding.
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+     <term><literal>icu</literal></term>
+     <listitem>
+      <para>
+       The <literal>icu</literal> provider uses the external
+       ICU<indexterm><primary>ICU</primary></indexterm>
+       library. <productname>PostgreSQL</productname> must have been
+       configured with support.
+      </para>
+      <para>
+       ICU provides collation and character classification behavior that is
+       independent of the operating system and database encoding, which is
+       preferable if you expect to transition to other platforms without any
+       change in results. <literal>LC_COLLATE</literal> and
+       <literal>LC_CTYPE</literal> can be set independently of the ICU
+       locale.
+      </para>
+      <note>
+       <para>
+        For the ICU provider, results may depend on the version of the ICU
+        library used, as it is updated to reflect changes in natural language
+        over time.
+       </para>
+      </note>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+     <term><literal>libc</literal></term>
+     <listitem>
+      <para>
+       The <literal>libc</literal> provider uses the operating system's C
+       library. The collation and character classification behavior is
+       controlled by the settings <literal>LC_COLLATE</literal> and
+       <literal>LC_CTYPE</literal>, so they cannot be set independently.
+      </para>
+      <note>
+       <para>
+        The same locale name may have different behavior on different
+        platforms when using the libc provider.
+       </para>
+      </note>
+     </listitem>
+    </varlistentry>
+   </variablelist>
   </sect2>
 
   <sect2 id="icu-locales">
diff --git a/doc/src/sgml/ref/create_collation.sgml b/doc/src/sgml/ref/create_collation.sgml
index 5cf9777764..98cd7d56be 100644
--- a/doc/src/sgml/ref/create_collation.sgml
+++ b/doc/src/sgml/ref/create_collation.sgml
@@ -96,6 +96,11 @@ CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> FROM <replace
        <replaceable>locale</replaceable>, you cannot specify either of those
        parameters.
       </para>
+      <para>
+       If <replaceable>provider</replaceable> is <literal>builtin</literal>,
+       then <replaceable>locale</replaceable> must be specified and set to
+       <literal>C</literal>.
+      </para>
      </listitem>
     </varlistentry>
 
@@ -129,9 +134,9 @@ CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> FROM <replace
      <listitem>
       <para>
        Specifies the provider to use for locale services associated with this
-       collation.  Possible values are
-       <literal>icu</literal><indexterm><primary>ICU</primary></indexterm>
-       (if the server was built with ICU support) or <literal>libc</literal>.
+       collation.  Possible values are <literal>builtin</literal>,
+       <literal>icu</literal><indexterm><primary>ICU</primary></indexterm> (if
+       the server was built with ICU support) or <literal>libc</literal>.
        <literal>libc</literal> is the default.  See <xref
        linkend="locale-providers"/> for details.
       </para>
diff --git a/doc/src/sgml/ref/create_database.sgml b/doc/src/sgml/ref/create_database.sgml
index 72927960eb..6c1fd95602 100644
--- a/doc/src/sgml/ref/create_database.sgml
+++ b/doc/src/sgml/ref/create_database.sgml
@@ -162,6 +162,11 @@ CREATE DATABASE <replaceable class="parameter">name</replaceable>
         linkend="create-database-lc-ctype"/>, or <xref
         linkend="create-database-icu-locale"/> individually.
        </para>
+       <para>
+        If <xref linkend="create-database-locale-provider"/> is
+        <literal>builtin</literal>, then <replaceable>locale</replaceable>
+        must be specified and set to <literal>C</literal>.
+       </para>
        <tip>
         <para>
          The other locale settings <xref linkend="guc-lc-messages"/>, <xref
@@ -243,7 +248,7 @@ CREATE DATABASE <replaceable class="parameter">name</replaceable>
       <listitem>
        <para>
         Specifies the provider to use for the default collation in this
-        database.  Possible values are
+        database.  Possible values are <literal>builtin</literal>,
         <literal>icu</literal><indexterm><primary>ICU</primary></indexterm>
         (if the server was built with ICU support) or <literal>libc</literal>.
         By default, the provider is the same as that of the <xref
diff --git a/doc/src/sgml/ref/createdb.sgml b/doc/src/sgml/ref/createdb.sgml
index e4647d5ce7..d3e815f659 100644
--- a/doc/src/sgml/ref/createdb.sgml
+++ b/doc/src/sgml/ref/createdb.sgml
@@ -171,7 +171,7 @@ PostgreSQL documentation
      </varlistentry>
 
      <varlistentry>
-      <term><option>--locale-provider={<literal>libc</literal>|<literal>icu</literal>}</option></term>
+      <term><option>--locale-provider={<literal>builtin</literal>|<literal>libc</literal>|<literal>icu</literal>}</option></term>
       <listitem>
        <para>
         Specifies the locale provider for the database's default collation.
diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index cd75cae10e..4760570f6a 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -286,6 +286,11 @@ PostgreSQL documentation
         environment that <command>initdb</command> runs in. Locale
         support is described in <xref linkend="locale"/>.
        </para>
+       <para>
+        If <option>--locale-provider</option> is <literal>builtin</literal>,
+        <option>--locale</option> must be specified and set to
+        <literal>C</literal>.
+       </para>
       </listitem>
      </varlistentry>
 
@@ -314,8 +319,18 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry id="app-initdb-builtin-locale">
+      <term><option>--builtin-locale=<replaceable>locale</replaceable></option></term>
+      <listitem>
+       <para>
+        Specifies the locale name when the builtin provider is used. Locale support
+        is described in <xref linkend="locale"/>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="app-initdb-option-locale-provider">
-      <term><option>--locale-provider={<literal>libc</literal>|<literal>icu</literal>}</option></term>
+      <term><option>--locale-provider={<literal>builtin</literal>|<literal>libc</literal>|<literal>icu</literal>}</option></term>
       <listitem>
        <para>
         This option sets the locale provider for databases created in the new
diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c
index e42f2afccb..7f2f701229 100644
--- a/src/backend/catalog/pg_collation.c
+++ b/src/backend/catalog/pg_collation.c
@@ -64,7 +64,10 @@ CollationCreate(const char *collname, Oid collnamespace,
 	Assert(collname);
 	Assert(collnamespace);
 	Assert(collowner);
-	Assert((collcollate && collctype) || colllocale);
+	Assert((collprovider == COLLPROVIDER_LIBC &&
+			collcollate && collctype && !colllocale) ||
+		   (collprovider != COLLPROVIDER_LIBC &&
+			!collcollate && !collctype && colllocale));
 
 	/*
 	 * Make sure there is no existing collation of same name & encoding.
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index 59d7e17804..44675fa9da 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -66,7 +66,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
 	DefElem    *versionEl = NULL;
 	char	   *collcollate;
 	char	   *collctype;
-	char	   *colllocale;
+	const char *colllocale;
 	char	   *collicurules;
 	bool		collisdeterministic;
 	int			collencoding;
@@ -213,7 +213,9 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
 
 		if (collproviderstr)
 		{
-			if (pg_strcasecmp(collproviderstr, "icu") == 0)
+			if (pg_strcasecmp(collproviderstr, "builtin") == 0)
+				collprovider = COLLPROVIDER_BUILTIN;
+			else if (pg_strcasecmp(collproviderstr, "icu") == 0)
 				collprovider = COLLPROVIDER_ICU;
 			else if (pg_strcasecmp(collproviderstr, "libc") == 0)
 				collprovider = COLLPROVIDER_LIBC;
@@ -243,7 +245,18 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
 		if (lcctypeEl)
 			collctype = defGetString(lcctypeEl);
 
-		if (collprovider == COLLPROVIDER_LIBC)
+		if (collprovider == COLLPROVIDER_BUILTIN)
+		{
+			if (!colllocale)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+						 errmsg("parameter \"%s\" must be specified",
+								"locale")));
+
+			colllocale = builtin_validate_locale(GetDatabaseEncoding(),
+												 colllocale);
+		}
+		else if (collprovider == COLLPROVIDER_LIBC)
 		{
 			if (!collcollate)
 				ereport(ERROR,
@@ -303,7 +316,17 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
 					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
 					 errmsg("ICU rules cannot be specified unless locale provider is ICU")));
 
-		if (collprovider == COLLPROVIDER_ICU)
+		if (collprovider == COLLPROVIDER_BUILTIN)
+		{
+			/*
+			 * Behavior may be different in different encodings, so set
+			 * collencoding to the current database encoding. No validation is
+			 * required, because the "builtin" provider is compatible with any
+			 * encoding.
+			 */
+			collencoding = GetDatabaseEncoding();
+		}
+		else if (collprovider == COLLPROVIDER_ICU)
 		{
 #ifdef USE_ICU
 			/*
@@ -332,7 +355,18 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
 	}
 
 	if (!collversion)
-		collversion = get_collation_actual_version(collprovider, collprovider == COLLPROVIDER_ICU ? colllocale : collcollate);
+	{
+		const char *locale;
+
+		if (collprovider == COLLPROVIDER_ICU)
+			locale = colllocale;
+		else if (collprovider == COLLPROVIDER_LIBC)
+			locale = collcollate;
+		else
+			locale = NULL;		/* COLLPROVIDER_BUILTIN */
+
+		collversion = get_collation_actual_version(collprovider, locale);
+	}
 
 	newoid = CollationCreate(collName,
 							 collNamespace,
@@ -407,6 +441,7 @@ AlterCollation(AlterCollationStmt *stmt)
 	Form_pg_collation collForm;
 	Datum		datum;
 	bool		isnull;
+	char	   *locale;
 	char	   *oldversion;
 	char	   *newversion;
 	ObjectAddress address;
@@ -433,8 +468,20 @@ AlterCollation(AlterCollationStmt *stmt)
 	datum = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion, &isnull);
 	oldversion = isnull ? NULL : TextDatumGetCString(datum);
 
-	datum = SysCacheGetAttrNotNull(COLLOID, tup, collForm->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colllocale : Anum_pg_collation_collcollate);
-	newversion = get_collation_actual_version(collForm->collprovider, TextDatumGetCString(datum));
+	if (collForm->collprovider == COLLPROVIDER_ICU)
+	{
+		datum = SysCacheGetAttrNotNull(COLLOID, tup, Anum_pg_collation_colllocale);
+		locale = TextDatumGetCString(datum);
+	}
+	else if (collForm->collprovider == COLLPROVIDER_LIBC)
+	{
+		datum = SysCacheGetAttrNotNull(COLLOID, tup, Anum_pg_collation_collcollate);
+		locale = TextDatumGetCString(datum);
+	}
+	else
+		locale = NULL;			/* COLLPROVIDER_BUILTIN */
+
+	newversion = get_collation_actual_version(collForm->collprovider, locale);
 
 	/* cannot change from NULL to non-NULL or vice versa */
 	if ((!oldversion && newversion) || (oldversion && !newversion))
@@ -498,11 +545,18 @@ pg_collation_actual_version(PG_FUNCTION_ARGS)
 
 		provider = ((Form_pg_database) GETSTRUCT(dbtup))->datlocprovider;
 
-		datum = SysCacheGetAttrNotNull(DATABASEOID, dbtup,
-									   provider == COLLPROVIDER_ICU ?
-									   Anum_pg_database_datlocale : Anum_pg_database_datcollate);
-
-		locale = TextDatumGetCString(datum);
+		if (provider == COLLPROVIDER_ICU)
+		{
+			datum = SysCacheGetAttrNotNull(DATABASEOID, dbtup, Anum_pg_database_datlocale);
+			locale = TextDatumGetCString(datum);
+		}
+		else if (provider == COLLPROVIDER_LIBC)
+		{
+			datum = SysCacheGetAttrNotNull(DATABASEOID, dbtup, Anum_pg_database_datcollate);
+			locale = TextDatumGetCString(datum);
+		}
+		else
+			locale = NULL;		/* COLLPROVIDER_BUILTIN */
 
 		ReleaseSysCache(dbtup);
 	}
@@ -519,11 +573,19 @@ pg_collation_actual_version(PG_FUNCTION_ARGS)
 
 		provider = ((Form_pg_collation) GETSTRUCT(colltp))->collprovider;
 		Assert(provider != COLLPROVIDER_DEFAULT);
-		datum = SysCacheGetAttrNotNull(COLLOID, colltp,
-									   provider == COLLPROVIDER_ICU ?
-									   Anum_pg_collation_colllocale : Anum_pg_collation_collcollate);
 
-		locale = TextDatumGetCString(datum);
+		if (provider == COLLPROVIDER_ICU)
+		{
+			datum = SysCacheGetAttrNotNull(COLLOID, colltp, Anum_pg_collation_colllocale);
+			locale = TextDatumGetCString(datum);
+		}
+		else if (provider == COLLPROVIDER_LIBC)
+		{
+			datum = SysCacheGetAttrNotNull(COLLOID, colltp, Anum_pg_collation_collcollate);
+			locale = TextDatumGetCString(datum);
+		}
+		else
+			locale = NULL;		/* COLLPROVIDER_BUILTIN */
 
 		ReleaseSysCache(colltp);
 	}
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 0f27d7b14c..aaa2e54f21 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -697,6 +697,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 	DefElem    *dtemplate = NULL;
 	DefElem    *dencoding = NULL;
 	DefElem    *dlocale = NULL;
+	DefElem    *dbuiltinlocale = NULL;
 	DefElem    *dcollate = NULL;
 	DefElem    *dctype = NULL;
 	DefElem    *diculocale = NULL;
@@ -712,7 +713,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 	const char *dbtemplate = NULL;
 	char	   *dbcollate = NULL;
 	char	   *dbctype = NULL;
-	char	   *dblocale = NULL;
+	const char *dblocale = NULL;
 	char	   *dbicurules = NULL;
 	char		dblocprovider = '\0';
 	char	   *canonname;
@@ -761,6 +762,12 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 				errorConflictingDefElem(defel, pstate);
 			dlocale = defel;
 		}
+		else if (strcmp(defel->defname, "builtin_locale") == 0)
+		{
+			if (dbuiltinlocale)
+				errorConflictingDefElem(defel, pstate);
+			dbuiltinlocale = defel;
+		}
 		else if (strcmp(defel->defname, "lc_collate") == 0)
 		{
 			if (dcollate)
@@ -896,7 +903,10 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 	{
 		dbcollate = defGetString(dlocale);
 		dbctype = defGetString(dlocale);
+		dblocale = defGetString(dlocale);
 	}
+	if (dbuiltinlocale && dbuiltinlocale->arg)
+		dblocale = defGetString(dbuiltinlocale);
 	if (dcollate && dcollate->arg)
 		dbcollate = defGetString(dcollate);
 	if (dctype && dctype->arg)
@@ -909,7 +919,9 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 	{
 		char	   *locproviderstr = defGetString(dlocprovider);
 
-		if (pg_strcasecmp(locproviderstr, "icu") == 0)
+		if (pg_strcasecmp(locproviderstr, "builtin") == 0)
+			dblocprovider = COLLPROVIDER_BUILTIN;
+		else if (pg_strcasecmp(locproviderstr, "icu") == 0)
 			dblocprovider = COLLPROVIDER_ICU;
 		else if (pg_strcasecmp(locproviderstr, "libc") == 0)
 			dblocprovider = COLLPROVIDER_LIBC;
@@ -1026,14 +1038,9 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 		dbctype = src_ctype;
 	if (dblocprovider == '\0')
 		dblocprovider = src_locprovider;
-	if (dblocale == NULL && dblocprovider == COLLPROVIDER_ICU)
-	{
-		if (dlocale && dlocale->arg)
-			dblocale = defGetString(dlocale);
-		else
-			dblocale = src_locale;
-	}
-	if (dbicurules == NULL && dblocprovider == COLLPROVIDER_ICU)
+	if (dblocale == NULL)
+		dblocale = src_locale;
+	if (dbicurules == NULL)
 		dbicurules = src_icurules;
 
 	/* Some encodings are client only */
@@ -1058,6 +1065,27 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 
 	check_encoding_locale_matches(encoding, dbcollate, dbctype);
 
+	if (dblocprovider == COLLPROVIDER_BUILTIN)
+	{
+		/*
+		 * This would happen if template0 uses the libc provider but the new
+		 * database uses builtin.
+		 */
+		if (!dblocale)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("LOCALE or BUILTIN_LOCALE must be specified")));
+
+		dblocale = builtin_validate_locale(encoding, dblocale);
+	}
+	else
+	{
+		if (dbuiltinlocale && dbuiltinlocale->arg)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+					 errmsg("BUILTIN_LOCALE cannot be specified unless locale provider is builtin")));
+	}
+
 	if (dblocprovider == COLLPROVIDER_ICU)
 	{
 		if (!(is_encoding_supported_by_icu(encoding)))
@@ -1099,7 +1127,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 	}
 	else
 	{
-		if (dblocale)
+		if (diculocale && diculocale->arg)
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
 					 errmsg("ICU locale cannot be specified unless locale provider is ICU")));
@@ -1110,6 +1138,10 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 					 errmsg("ICU rules cannot be specified unless locale provider is ICU")));
 	}
 
+	/* for libc, locale comes from datcollate and datctype */
+	if (dblocprovider == COLLPROVIDER_LIBC)
+		dblocale = NULL;
+
 	/*
 	 * Check that the new encoding and locale settings match the source
 	 * database.  We insist on this because we simply copy the source data ---
@@ -1195,8 +1227,16 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 	if (src_collversion && !dcollversion)
 	{
 		char	   *actual_versionstr;
+		const char *locale;
 
-		actual_versionstr = get_collation_actual_version(dblocprovider, dblocprovider == COLLPROVIDER_ICU ? dblocale : dbcollate);
+		if (dblocprovider == COLLPROVIDER_ICU)
+			locale = dblocale;
+		else if (dblocprovider == COLLPROVIDER_LIBC)
+			locale = dbcollate;
+		else
+			locale = NULL;		/* COLLPROVIDER_BUILTIN */
+
+		actual_versionstr = get_collation_actual_version(dblocprovider, locale);
 		if (!actual_versionstr)
 			ereport(ERROR,
 					(errmsg("template database \"%s\" has a collation version, but no actual collation version could be determined",
@@ -1224,7 +1264,18 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 	 * collation version, which is normally only the case for template0.
 	 */
 	if (dbcollversion == NULL)
-		dbcollversion = get_collation_actual_version(dblocprovider, dblocprovider == COLLPROVIDER_ICU ? dblocale : dbcollate);
+	{
+		const char *locale;
+
+		if (dblocprovider == COLLPROVIDER_ICU)
+			locale = dblocale;
+		else if (dblocprovider == COLLPROVIDER_LIBC)
+			locale = dbcollate;
+		else
+			locale = NULL;		/* COLLPROVIDER_BUILTIN */
+
+		dbcollversion = get_collation_actual_version(dblocprovider, locale);
+	}
 
 	/* Resolve default tablespace for new database */
 	if (dtablespacename && dtablespacename->arg)
@@ -1363,8 +1414,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 	 * block on the unique index, and fail after we commit).
 	 */
 
-	Assert((dblocprovider == COLLPROVIDER_ICU && dblocale) ||
-		   (dblocprovider != COLLPROVIDER_ICU && !dblocale));
+	Assert((dblocprovider != COLLPROVIDER_LIBC && dblocale) ||
+		   (dblocprovider == COLLPROVIDER_LIBC && !dblocale));
 
 	/* Form tuple */
 	new_record[Anum_pg_database_oid - 1] = ObjectIdGetDatum(dboid);
@@ -2445,6 +2496,7 @@ AlterDatabaseRefreshColl(AlterDatabaseRefreshCollStmt *stmt)
 	ObjectAddress address;
 	Datum		datum;
 	bool		isnull;
+	char	   *locale;
 	char	   *oldversion;
 	char	   *newversion;
 
@@ -2471,10 +2523,24 @@ AlterDatabaseRefreshColl(AlterDatabaseRefreshCollStmt *stmt)
 	datum = heap_getattr(tuple, Anum_pg_database_datcollversion, RelationGetDescr(rel), &isnull);
 	oldversion = isnull ? NULL : TextDatumGetCString(datum);
 
-	datum = heap_getattr(tuple, datForm->datlocprovider == COLLPROVIDER_ICU ? Anum_pg_database_datlocale : Anum_pg_database_datcollate, RelationGetDescr(rel), &isnull);
-	if (isnull)
-		elog(ERROR, "unexpected null in pg_database");
-	newversion = get_collation_actual_version(datForm->datlocprovider, TextDatumGetCString(datum));
+	if (datForm->datlocprovider == COLLPROVIDER_ICU)
+	{
+		datum = heap_getattr(tuple, Anum_pg_database_datlocale, RelationGetDescr(rel), &isnull);
+		if (isnull)
+			elog(ERROR, "unexpected null in pg_database");
+		locale = TextDatumGetCString(datum);
+	}
+	else if (datForm->datlocprovider == COLLPROVIDER_LIBC)
+	{
+		datum = heap_getattr(tuple, Anum_pg_database_datcollate, RelationGetDescr(rel), &isnull);
+		if (isnull)
+			elog(ERROR, "unexpected null in pg_database");
+		locale = TextDatumGetCString(datum);
+	}
+	else
+		locale = NULL;			/* COLLPROVIDER_BUILTIN */
+
+	newversion = get_collation_actual_version(datForm->datlocprovider, locale);
 
 	/* cannot change from NULL to non-NULL or vice versa */
 	if ((!oldversion && newversion) || (oldversion && !newversion))
@@ -2659,6 +2725,7 @@ pg_database_collation_actual_version(PG_FUNCTION_ARGS)
 	HeapTuple	tp;
 	char		datlocprovider;
 	Datum		datum;
+	char	   *locale;
 	char	   *version;
 
 	tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dbid));
@@ -2669,8 +2736,20 @@ pg_database_collation_actual_version(PG_FUNCTION_ARGS)
 
 	datlocprovider = ((Form_pg_database) GETSTRUCT(tp))->datlocprovider;
 
-	datum = SysCacheGetAttrNotNull(DATABASEOID, tp, datlocprovider == COLLPROVIDER_ICU ? Anum_pg_database_datlocale : Anum_pg_database_datcollate);
-	version = get_collation_actual_version(datlocprovider, TextDatumGetCString(datum));
+	if (datlocprovider == COLLPROVIDER_ICU)
+	{
+		datum = SysCacheGetAttrNotNull(DATABASEOID, tp, Anum_pg_database_datlocale);
+		locale = TextDatumGetCString(datum);
+	}
+	else if (datlocprovider == COLLPROVIDER_LIBC)
+	{
+		datum = SysCacheGetAttrNotNull(DATABASEOID, tp, Anum_pg_database_datcollate);
+		locale = TextDatumGetCString(datum);
+	}
+	else
+		locale = NULL;			/* COLLPROVIDER_BUILTIN */
+
+	version = get_collation_actual_version(datlocprovider, locale);
 
 	ReleaseSysCache(tp);
 
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 036a463491..5f483b8dbc 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1680,6 +1680,8 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 		else
 #endif
 		{
+			Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC);
+
 			if (pg_database_encoding_max_length() > 1)
 			{
 				wchar_t    *workspace;
@@ -1798,6 +1800,8 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 		else
 #endif
 		{
+			Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC);
+
 			if (pg_database_encoding_max_length() > 1)
 			{
 				wchar_t    *workspace;
@@ -1917,6 +1921,8 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 		else
 #endif
 		{
+			Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC);
+
 			if (pg_database_encoding_max_length() > 1)
 			{
 				wchar_t    *workspace;
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 77d5752dc8..fbe5a035fd 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1268,7 +1268,18 @@ lookup_collation_cache(Oid collation, bool set_flags)
 			elog(ERROR, "cache lookup failed for collation %u", collation);
 		collform = (Form_pg_collation) GETSTRUCT(tp);
 
-		if (collform->collprovider == COLLPROVIDER_LIBC)
+		if (collform->collprovider == COLLPROVIDER_BUILTIN)
+		{
+			Datum		datum;
+			const char *colllocale;
+
+			datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
+			colllocale = TextDatumGetCString(datum);
+
+			cache_entry->collate_is_c = true;
+			cache_entry->ctype_is_c = (strcmp(colllocale, "C") == 0);
+		}
+		else if (collform->collprovider == COLLPROVIDER_LIBC)
 		{
 			Datum		datum;
 			const char *collcollate;
@@ -1319,16 +1330,30 @@ lc_collate_is_c(Oid collation)
 	if (collation == DEFAULT_COLLATION_OID)
 	{
 		static int	result = -1;
-		char	   *localeptr;
-
-		if (default_locale.provider == COLLPROVIDER_ICU)
-			return false;
+		const char *localeptr;
 
 		if (result >= 0)
 			return (bool) result;
-		localeptr = setlocale(LC_COLLATE, NULL);
-		if (!localeptr)
-			elog(ERROR, "invalid LC_COLLATE setting");
+
+		if (default_locale.provider == COLLPROVIDER_BUILTIN)
+		{
+			result = true;
+			return (bool) result;
+		}
+		else if (default_locale.provider == COLLPROVIDER_ICU)
+		{
+			result = false;
+			return (bool) result;
+		}
+		else if (default_locale.provider == COLLPROVIDER_LIBC)
+		{
+			localeptr = setlocale(LC_CTYPE, NULL);
+			if (!localeptr)
+				elog(ERROR, "invalid LC_CTYPE setting");
+		}
+		else
+			elog(ERROR, "unexpected collation provider '%c'",
+				 default_locale.provider);
 
 		if (strcmp(localeptr, "C") == 0)
 			result = true;
@@ -1372,16 +1397,29 @@ lc_ctype_is_c(Oid collation)
 	if (collation == DEFAULT_COLLATION_OID)
 	{
 		static int	result = -1;
-		char	   *localeptr;
-
-		if (default_locale.provider == COLLPROVIDER_ICU)
-			return false;
+		const char *localeptr;
 
 		if (result >= 0)
 			return (bool) result;
-		localeptr = setlocale(LC_CTYPE, NULL);
-		if (!localeptr)
-			elog(ERROR, "invalid LC_CTYPE setting");
+
+		if (default_locale.provider == COLLPROVIDER_BUILTIN)
+		{
+			localeptr = default_locale.info.builtin.locale;
+		}
+		else if (default_locale.provider == COLLPROVIDER_ICU)
+		{
+			result = false;
+			return (bool) result;
+		}
+		else if (default_locale.provider == COLLPROVIDER_LIBC)
+		{
+			localeptr = setlocale(LC_CTYPE, NULL);
+			if (!localeptr)
+				elog(ERROR, "invalid LC_CTYPE setting");
+		}
+		else
+			elog(ERROR, "unexpected collation provider '%c'",
+				 default_locale.provider);
 
 		if (strcmp(localeptr, "C") == 0)
 			result = true;
@@ -1519,10 +1557,10 @@ pg_newlocale_from_collation(Oid collid)
 
 	if (collid == DEFAULT_COLLATION_OID)
 	{
-		if (default_locale.provider == COLLPROVIDER_ICU)
-			return &default_locale;
-		else
+		if (default_locale.provider == COLLPROVIDER_LIBC)
 			return (pg_locale_t) 0;
+		else
+			return &default_locale;
 	}
 
 	cache_entry = lookup_collation_cache(collid, false);
@@ -1547,7 +1585,20 @@ pg_newlocale_from_collation(Oid collid)
 		result.provider = collform->collprovider;
 		result.deterministic = collform->collisdeterministic;
 
-		if (collform->collprovider == COLLPROVIDER_LIBC)
+		if (collform->collprovider == COLLPROVIDER_BUILTIN)
+		{
+			const char *locstr;
+
+			datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
+			locstr = TextDatumGetCString(datum);
+
+			if (strcmp(locstr, "C") != 0)
+				elog(ERROR, "unexpected builtin locale: %s", locstr);
+
+			result.info.builtin.locale = MemoryContextStrdup(TopMemoryContext,
+															 locstr);
+		}
+		else if (collform->collprovider == COLLPROVIDER_LIBC)
 		{
 			const char *collcollate;
 			const char *collctype pg_attribute_unused();
@@ -1626,6 +1677,7 @@ pg_newlocale_from_collation(Oid collid)
 
 			collversionstr = TextDatumGetCString(datum);
 
+			Assert(collform->collprovider != COLLPROVIDER_BUILTIN);
 			datum = SysCacheGetAttrNotNull(COLLOID, tp, collform->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colllocale : Anum_pg_collation_collcollate);
 
 			actual_versionstr = get_collation_actual_version(collform->collprovider,
@@ -1677,6 +1729,10 @@ get_collation_actual_version(char collprovider, const char *collcollate)
 {
 	char	   *collversion = NULL;
 
+	/* the builtin collation provider is not versioned */
+	if (collprovider == COLLPROVIDER_BUILTIN)
+		return NULL;
+
 #ifdef USE_ICU
 	if (collprovider == COLLPROVIDER_ICU)
 	{
@@ -2443,6 +2499,31 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
 	return result;
 }
 
+const char *
+builtin_validate_locale(int encoding, const char *locale)
+{
+	const char *canonical_name = NULL;
+	int			required_encoding = -1;
+
+	if (strcmp(locale, "C") == 0)
+		canonical_name = "C";
+
+	if (!canonical_name)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("invalid locale name \"%s\" for builtin provider",
+						locale)));
+
+	if (required_encoding >= 0 && encoding != required_encoding)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("encoding \"%s\" does not match locale \"%s\"",
+						pg_encoding_to_char(encoding), locale)));
+
+	return canonical_name;
+}
+
+
 #ifdef USE_ICU
 
 /*
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 2875bc97d3..0805398e24 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -423,7 +423,17 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
 		strcmp(ctype, "POSIX") == 0)
 		database_ctype_is_c = true;
 
-	if (dbform->datlocprovider == COLLPROVIDER_ICU)
+	if (dbform->datlocprovider == COLLPROVIDER_BUILTIN)
+	{
+		datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale);
+		datlocale = TextDatumGetCString(datum);
+
+		builtin_validate_locale(dbform->encoding, datlocale);
+
+		default_locale.info.builtin.locale = MemoryContextStrdup(
+																 TopMemoryContext, datlocale);
+	}
+	else if (dbform->datlocprovider == COLLPROVIDER_ICU)
 	{
 		char	   *icurules;
 
@@ -461,10 +471,16 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
 	{
 		char	   *actual_versionstr;
 		char	   *collversionstr;
+		char	   *locale;
 
 		collversionstr = TextDatumGetCString(datum);
 
-		actual_versionstr = get_collation_actual_version(dbform->datlocprovider, dbform->datlocprovider == COLLPROVIDER_ICU ? datlocale : collate);
+		if (dbform->datlocprovider == COLLPROVIDER_LIBC)
+			locale = collate;
+		else
+			locale = datlocale;
+
+		actual_versionstr = get_collation_actual_version(dbform->datlocprovider, locale);
 		if (!actual_versionstr)
 			/* should not happen */
 			elog(WARNING,
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index de58002a5d..a26073f7c7 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -146,6 +146,7 @@ static char *lc_time = NULL;
 static char *lc_messages = NULL;
 static char locale_provider = COLLPROVIDER_LIBC;
 static char *datlocale = NULL;
+static bool icu_locale_specified = false;
 static char *icu_rules = NULL;
 static const char *default_text_search_config = NULL;
 static char *username = NULL;
@@ -2368,7 +2369,7 @@ setlocales(void)
 			lc_monetary = locale;
 		if (!lc_messages)
 			lc_messages = locale;
-		if (!datlocale && locale_provider == COLLPROVIDER_ICU)
+		if (!datlocale && locale_provider != COLLPROVIDER_LIBC)
 			datlocale = locale;
 	}
 
@@ -2395,14 +2396,14 @@ setlocales(void)
 	lc_messages = canonname;
 #endif
 
+	if (locale_provider != COLLPROVIDER_LIBC && datlocale == NULL)
+		pg_fatal("locale must be specified if provider is %s",
+				 collprovider_name(locale_provider));
+
 	if (locale_provider == COLLPROVIDER_ICU)
 	{
 		char	   *langtag;
 
-		/* acquire default locale from the environment, if not specified */
-		if (datlocale == NULL)
-			pg_fatal("ICU locale must be specified");
-
 		/* canonicalize to a language tag */
 		langtag = icu_language_tag(datlocale);
 		printf(_("Using language tag \"%s\" for ICU locale \"%s\".\n"),
@@ -2447,7 +2448,8 @@ usage(const char *progname)
 			 "                            set default locale in the respective category for\n"
 			 "                            new databases (default taken from environment)\n"));
 	printf(_("      --no-locale           equivalent to --locale=C\n"));
-	printf(_("      --locale-provider={libc|icu}\n"
+	printf(_("      --builtin-locale=LOCALE   set builtin locale name for new databases\n"));
+	printf(_("      --locale-provider={builtin|libc|icu}\n"
 			 "                            set default locale provider for new databases\n"));
 	printf(_("      --pwfile=FILE         read password for the new superuser from file\n"));
 	printf(_("  -T, --text-search-config=CFG\n"
@@ -2609,9 +2611,9 @@ setup_locale_encoding(void)
 	else
 	{
 		printf(_("The database cluster will be initialized with this locale configuration:\n"));
-		printf(_("  provider:    %s\n"), collprovider_name(locale_provider));
-		if (datlocale)
-			printf(_("  ICU locale:  %s\n"), datlocale);
+		printf(_("  default collation provider:  %s\n"), collprovider_name(locale_provider));
+		if (locale_provider != COLLPROVIDER_LIBC)
+			printf(_("  default collation locale:    %s\n"), datlocale);
 		printf(_("  LC_COLLATE:  %s\n"
 				 "  LC_CTYPE:    %s\n"
 				 "  LC_MESSAGES: %s\n"
@@ -3104,9 +3106,10 @@ main(int argc, char *argv[])
 		{"allow-group-access", no_argument, NULL, 'g'},
 		{"discard-caches", no_argument, NULL, 14},
 		{"locale-provider", required_argument, NULL, 15},
-		{"icu-locale", required_argument, NULL, 16},
-		{"icu-rules", required_argument, NULL, 17},
-		{"sync-method", required_argument, NULL, 18},
+		{"builtin-locale", required_argument, NULL, 16},
+		{"icu-locale", required_argument, NULL, 17},
+		{"icu-rules", required_argument, NULL, 18},
+		{"sync-method", required_argument, NULL, 19},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -3274,7 +3277,9 @@ main(int argc, char *argv[])
 										 "-c debug_discard_caches=1");
 				break;
 			case 15:
-				if (strcmp(optarg, "icu") == 0)
+				if (strcmp(optarg, "builtin") == 0)
+					locale_provider = COLLPROVIDER_BUILTIN;
+				else if (strcmp(optarg, "icu") == 0)
 					locale_provider = COLLPROVIDER_ICU;
 				else if (strcmp(optarg, "libc") == 0)
 					locale_provider = COLLPROVIDER_LIBC;
@@ -3285,9 +3290,13 @@ main(int argc, char *argv[])
 				datlocale = pg_strdup(optarg);
 				break;
 			case 17:
-				icu_rules = pg_strdup(optarg);
+				datlocale = pg_strdup(optarg);
+				icu_locale_specified = true;
 				break;
 			case 18:
+				icu_rules = pg_strdup(optarg);
+				break;
+			case 19:
 				if (!parse_sync_method(optarg, &sync_method))
 					exit(1);
 				break;
@@ -3317,7 +3326,7 @@ main(int argc, char *argv[])
 		exit(1);
 	}
 
-	if (datlocale && locale_provider != COLLPROVIDER_ICU)
+	if (icu_locale_specified && locale_provider != COLLPROVIDER_ICU)
 		pg_fatal("%s cannot be specified unless locale provider \"%s\" is chosen",
 				 "--icu-locale", "icu");
 
diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl
index 594b20cc74..e719f70dae 100644
--- a/src/bin/initdb/t/001_initdb.pl
+++ b/src/bin/initdb/t/001_initdb.pl
@@ -117,7 +117,7 @@ if ($ENV{with_icu} eq 'yes')
 {
 	command_fails_like(
 		[ 'initdb', '--no-sync', '--locale-provider=icu', "$tempdir/data2" ],
-		qr/initdb: error: ICU locale must be specified/,
+		qr/initdb: error: locale must be specified if provider is icu/,
 		'locale provider ICU requires --icu-locale');
 
 	command_ok(
@@ -138,7 +138,7 @@ if ($ENV{with_icu} eq 'yes')
 			'--lc-monetary=C', '--lc-time=C',
 			"$tempdir/data4"
 		],
-		qr/^\s+ICU locale:\s+und\n/ms,
+		qr/^\s+default collation locale:\s+und\n/ms,
 		'options --locale-provider=icu --locale=und --lc-*=C');
 
 	command_fails_like(
@@ -184,6 +184,42 @@ else
 		'locale provider ICU fails since no ICU support');
 }
 
+command_fails(
+	[ 'initdb', '--no-sync', '--locale-provider=builtin', "$tempdir/data6" ],
+	'locale provider builtin fails without --locale');
+
+command_ok(
+	[
+		'initdb', '--no-sync',
+		'--locale-provider=builtin', '--locale=C',
+		"$tempdir/data7"
+	],
+	'locale provider builtin with --locale');
+
+command_ok(
+	[
+		'initdb', '--no-sync',
+		'--locale-provider=builtin', '--lc-ctype=C',
+		'--locale=C', "$tempdir/data10"
+	],
+	'locale provider builtin with --lc-ctype');
+
+command_fails(
+	[
+		'initdb', '--no-sync',
+		'--locale-provider=builtin', '--icu-locale=en',
+		"$tempdir/dataX"
+	],
+	'fails for locale provider builtin with ICU locale');
+
+command_fails(
+	[
+		'initdb', '--no-sync',
+		'--locale-provider=builtin', '--icu-rules=""',
+		"$tempdir/dataX"
+	],
+	'fails for locale provider builtin with ICU rules');
+
 command_fails(
 	[ 'initdb', '--no-sync', '--locale-provider=xyz', "$tempdir/dataX" ],
 	'fails for invalid locale provider');
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 23e6217b73..0e4e7565ae 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -3114,7 +3114,9 @@ dumpDatabase(Archive *fout)
 	}
 
 	appendPQExpBufferStr(creaQry, " LOCALE_PROVIDER = ");
-	if (datlocprovider[0] == 'c')
+	if (datlocprovider[0] == 'b')
+		appendPQExpBufferStr(creaQry, "builtin");
+	else if (datlocprovider[0] == 'c')
 		appendPQExpBufferStr(creaQry, "libc");
 	else if (datlocprovider[0] == 'i')
 		appendPQExpBufferStr(creaQry, "icu");
@@ -3122,27 +3124,33 @@ dumpDatabase(Archive *fout)
 		pg_fatal("unrecognized locale provider: %s",
 				 datlocprovider);
 
-	if (strlen(collate) > 0 && strcmp(collate, ctype) == 0)
+	if (!locale && datlocprovider[0] != 'c')
+		pg_log_warning("database '%s' with provider '%s' missing datlocale",
+					   datname, datlocprovider);
+
+	if (locale && datlocprovider[0] == 'c')
+		pg_log_warning("database '%s' with provider 'c' has non-NULL locale '%s'",
+					   datname, locale);
+
+	/* if collate and ctype are equal, and locale is NULL, use LOCALE */
+	if (!locale && strlen(collate) > 0 && strcmp(collate, ctype) == 0)
+		locale = collate;
+
+	/* output LC_COLLATE and LC_CTYPE if different from LOCALE */
+	if (strlen(collate) > 0 && (!locale || strcmp(collate, locale) != 0))
 	{
-		appendPQExpBufferStr(creaQry, " LOCALE = ");
+		appendPQExpBufferStr(creaQry, " LC_COLLATE = ");
 		appendStringLiteralAH(creaQry, collate, fout);
 	}
-	else
+	if (strlen(ctype) > 0 && (!locale || strcmp(ctype, locale) != 0))
 	{
-		if (strlen(collate) > 0)
-		{
-			appendPQExpBufferStr(creaQry, " LC_COLLATE = ");
-			appendStringLiteralAH(creaQry, collate, fout);
-		}
-		if (strlen(ctype) > 0)
-		{
-			appendPQExpBufferStr(creaQry, " LC_CTYPE = ");
-			appendStringLiteralAH(creaQry, ctype, fout);
-		}
+		appendPQExpBufferStr(creaQry, " LC_CTYPE = ");
+		appendStringLiteralAH(creaQry, ctype, fout);
 	}
+
 	if (locale)
 	{
-		appendPQExpBufferStr(creaQry, " ICU_LOCALE = ");
+		appendPQExpBufferStr(creaQry, " LOCALE = ");
 		appendStringLiteralAH(creaQry, locale, fout);
 	}
 
@@ -13870,7 +13878,9 @@ dumpCollation(Archive *fout, const CollInfo *collinfo)
 					  fmtQualifiedDumpable(collinfo));
 
 	appendPQExpBufferStr(q, "provider = ");
-	if (collprovider[0] == 'c')
+	if (collprovider[0] == 'b')
+		appendPQExpBufferStr(q, "builtin");
+	else if (collprovider[0] == 'c')
 		appendPQExpBufferStr(q, "libc");
 	else if (collprovider[0] == 'i')
 		appendPQExpBufferStr(q, "icu");
@@ -13891,6 +13901,13 @@ dumpCollation(Archive *fout, const CollInfo *collinfo)
 
 		/* no locale -- the default collation cannot be reloaded anyway */
 	}
+	else if (collprovider[0] == 'b')
+	{
+		if (collcollate || collctype || colllocale || collicurules)
+			pg_log_warning("invalid collation \"%s\"", qcollname);
+
+		appendPQExpBufferStr(q, ", locale = 'C'");
+	}
 	else if (collprovider[0] == 'i')
 	{
 		if (fout->remoteVersion >= 150000)
diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl
index 34a459496e..ed79c0930b 100644
--- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl
+++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl
@@ -104,19 +104,13 @@ if ($oldnode->pg_version >= 11)
 	push @custom_opts, '--allow-group-access';
 }
 
-# Set up the locale settings for the original cluster, so that we
-# can test that pg_upgrade copies the locale settings of template0
-# from the old to the new cluster.
+my $old_provider_field;
+my $old_datlocale_field;
 
-my $original_encoding = "6";    # UTF-8
-my $original_provider = "c";
-my $original_locale = "C";
-my $original_datlocale = "";
-my $provider_field = "'c' AS datlocprovider";
-my $old_datlocale_field = "NULL AS datlocale";
-if ($oldnode->pg_version >= 15 && $ENV{with_icu} eq 'yes')
+# account for field additions and changes
+if ($oldnode->pg_version >= 15)
 {
-	$provider_field = "datlocprovider";
+	$old_provider_field = "datlocprovider";
 	if ($oldnode->pg_version >= '17devel')
 	{
 		$old_datlocale_field = "datlocale";
@@ -125,18 +119,65 @@ if ($oldnode->pg_version >= 15 && $ENV{with_icu} eq 'yes')
 	{
 		$old_datlocale_field = "daticulocale AS datlocale";
 	}
+}
+else
+{
+	$old_provider_field = "'c' AS datlocprovider";
+	$old_datlocale_field = "NULL AS datlocale";
+}
+
+# Set up the locale settings for the original cluster, so that we
+# can test that pg_upgrade copies the locale settings of template0
+# from the old to the new cluster.
+
+my $original_enc_name;
+my $original_provider;
+my $original_datcollate = "C";
+my $original_datctype = "C";
+my $original_datlocale;
+
+if ($oldnode->pg_version >= '17devel')
+{
+	$original_enc_name = "UTF-8";
+	$original_provider = "b";
+	$original_datlocale = "C";
+}
+elsif ($oldnode->pg_version >= 15 && $ENV{with_icu} eq 'yes')
+{
+	$original_enc_name = "UTF-8";
 	$original_provider = "i";
 	$original_datlocale = "fr-CA";
 }
+else
+{
+	$original_enc_name = "SQL_ASCII";
+	$original_provider = "c";
+	$original_datlocale = "";
+}
+
+my %encodings = ('UTF-8' => 6, 'SQL_ASCII' => 0);
+my $original_encoding = $encodings{$original_enc_name};
 
 my @initdb_params = @custom_opts;
 
-push @initdb_params, ('--encoding', 'UTF-8');
-push @initdb_params, ('--locale', $original_locale);
-if ($original_provider eq "i")
+push @initdb_params, ('--encoding', $original_enc_name);
+push @initdb_params, ('--lc-collate', $original_datcollate);
+push @initdb_params, ('--lc-ctype', $original_datctype);
+
+# add --locale-provider, if supported
+my %provider_name = ('b' => 'builtin', 'i' => 'icu', 'c' => 'libc');
+if ($oldnode->pg_version >= 15)
 {
-	push @initdb_params, ('--locale-provider', 'icu');
-	push @initdb_params, ('--icu-locale', 'fr-CA');
+	push @initdb_params,
+	  ('--locale-provider', $provider_name{$original_provider});
+	if ($original_provider eq 'b')
+	{
+		push @initdb_params, ('--builtin-locale', $original_datlocale);
+	}
+	elsif ($original_provider eq 'i')
+	{
+		push @initdb_params, ('--icu-locale', $original_datlocale);
+	}
 }
 
 $node_params{extra} = \@initdb_params;
@@ -146,10 +187,10 @@ $oldnode->start;
 my $result;
 $result = $oldnode->safe_psql(
 	'postgres',
-	"SELECT encoding, $provider_field, datcollate, datctype, $old_datlocale_field
+	"SELECT encoding, $old_provider_field, datcollate, datctype, $old_datlocale_field
                  FROM pg_database WHERE datname='template0'");
 is( $result,
-	"$original_encoding|$original_provider|$original_locale|$original_locale|$original_datlocale",
+	"$original_encoding|$original_provider|$original_datcollate|$original_datctype|$original_datlocale",
 	"check locales in original cluster");
 
 # The default location of the source code is the root of this directory.
@@ -433,10 +474,10 @@ if (-d $log_path)
 # Test that upgraded cluster has original locale settings.
 $result = $newnode->safe_psql(
 	'postgres',
-	"SELECT encoding, $provider_field, datcollate, datctype, datlocale
+	"SELECT encoding, datlocprovider, datcollate, datctype, datlocale
                  FROM pg_database WHERE datname='template0'");
 is( $result,
-	"$original_encoding|$original_provider|$original_locale|$original_locale|$original_datlocale",
+	"$original_encoding|$original_provider|$original_datcollate|$original_datctype|$original_datlocale",
 	"check that locales in new cluster match original cluster");
 
 # Second dump from the upgraded instance.
diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index 68b2ea8872..1ab80eb7ca 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -926,7 +926,7 @@ listAllDbs(const char *pattern, bool verbose)
 					  gettext_noop("Encoding"));
 	if (pset.sversion >= 150000)
 		appendPQExpBuffer(&buf,
-						  "  CASE d.datlocprovider WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n",
+						  "  CASE d.datlocprovider WHEN 'b' THEN 'builtin' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n",
 						  gettext_noop("Locale Provider"));
 	else
 		appendPQExpBuffer(&buf,
@@ -4974,7 +4974,7 @@ listCollations(const char *pattern, bool verbose, bool showSystem)
 
 	if (pset.sversion >= 100000)
 		appendPQExpBuffer(&buf,
-						  "  CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n",
+						  "  CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'b' THEN 'builtin' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n",
 						  gettext_noop("Provider"));
 	else
 		appendPQExpBuffer(&buf,
diff --git a/src/bin/scripts/createdb.c b/src/bin/scripts/createdb.c
index 14970a6a5f..4af4b98181 100644
--- a/src/bin/scripts/createdb.c
+++ b/src/bin/scripts/createdb.c
@@ -40,8 +40,9 @@ main(int argc, char *argv[])
 		{"locale", required_argument, NULL, 'l'},
 		{"maintenance-db", required_argument, NULL, 3},
 		{"locale-provider", required_argument, NULL, 4},
-		{"icu-locale", required_argument, NULL, 5},
-		{"icu-rules", required_argument, NULL, 6},
+		{"builtin-locale", required_argument, NULL, 5},
+		{"icu-locale", required_argument, NULL, 6},
+		{"icu-rules", required_argument, NULL, 7},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -67,6 +68,7 @@ main(int argc, char *argv[])
 	char	   *lc_ctype = NULL;
 	char	   *locale = NULL;
 	char	   *locale_provider = NULL;
+	char	   *builtin_locale = NULL;
 	char	   *icu_locale = NULL;
 	char	   *icu_rules = NULL;
 
@@ -134,9 +136,12 @@ main(int argc, char *argv[])
 				locale_provider = pg_strdup(optarg);
 				break;
 			case 5:
-				icu_locale = pg_strdup(optarg);
+				builtin_locale = pg_strdup(optarg);
 				break;
 			case 6:
+				icu_locale = pg_strdup(optarg);
+				break;
+			case 7:
 				icu_rules = pg_strdup(optarg);
 				break;
 			default:
@@ -216,6 +221,11 @@ main(int argc, char *argv[])
 		appendPQExpBufferStr(&sql, " LOCALE ");
 		appendStringLiteralConn(&sql, locale, conn);
 	}
+	if (builtin_locale)
+	{
+		appendPQExpBufferStr(&sql, " BUILTIN_LOCALE ");
+		appendStringLiteralConn(&sql, builtin_locale, conn);
+	}
 	if (lc_collate)
 	{
 		appendPQExpBufferStr(&sql, " LC_COLLATE ");
@@ -296,7 +306,7 @@ help(const char *progname)
 	printf(_("      --lc-ctype=LOCALE        LC_CTYPE setting for the database\n"));
 	printf(_("      --icu-locale=LOCALE      ICU locale setting for the database\n"));
 	printf(_("      --icu-rules=RULES        ICU rules setting for the database\n"));
-	printf(_("      --locale-provider={libc|icu}\n"
+	printf(_("      --locale-provider={builtin|libc|icu}\n"
 			 "                               locale provider for the database's default collation\n"));
 	printf(_("  -O, --owner=OWNER            database user to own the new database\n"));
 	printf(_("  -S, --strategy=STRATEGY      database creation strategy wal_log or file_copy\n"));
diff --git a/src/bin/scripts/t/020_createdb.pl b/src/bin/scripts/t/020_createdb.pl
index 37e47b0078..dfd635bfab 100644
--- a/src/bin/scripts/t/020_createdb.pl
+++ b/src/bin/scripts/t/020_createdb.pl
@@ -105,6 +105,66 @@ else
 		'create database with ICU fails since no ICU support');
 }
 
+$node->command_fails(
+	[
+		'createdb', '-T',
+		'template0', '--locale-provider=builtin',
+		'tbuiltin1'
+	],
+	'create database with provider "builtin" fails without --locale');
+
+$node->command_ok(
+	[
+		'createdb', '-T',
+		'template0', '--locale-provider=builtin',
+		'--locale=C', 'tbuiltin2'
+	],
+	'create database with provider "builtin" and locale "C"');
+
+$node->command_ok(
+	[
+		'createdb', '-T',
+		'template0', '--locale-provider=builtin',
+		'--locale=C', '--lc-collate=C',
+		'tbuiltin3'
+	],
+	'create database with provider "builtin" and LC_COLLATE=C');
+
+$node->command_ok(
+	[
+		'createdb', '-T',
+		'template0', '--locale-provider=builtin',
+		'--locale=C', '--lc-ctype=C',
+		'tbuiltin4'
+	],
+	'create database with provider "builtin" and LC_CTYPE=C');
+
+$node->command_fails(
+	[
+		'createdb', '-T',
+		'template0', '--locale-provider=builtin',
+		'--locale=C', '--icu-locale=en',
+		'tbuiltin7'
+	],
+	'create database with provider "builtin" and ICU_LOCALE="en"');
+
+$node->command_fails(
+	[
+		'createdb', '-T',
+		'template0', '--locale-provider=builtin',
+		'--locale=C', '--icu-rules=""',
+		'tbuiltin8'
+	],
+	'create database with provider "builtin" and ICU_RULES=""');
+
+$node->command_fails(
+	[
+		'createdb', '-T',
+		'template1', '--locale-provider=builtin',
+		'--locale=C', 'tbuiltin9'
+	],
+	'create database with provider "builtin" not matching template');
+
 $node->command_fails([ 'createdb', 'foobar1' ],
 	'fails if database already exists');
 
diff --git a/src/include/catalog/pg_collation.dat b/src/include/catalog/pg_collation.dat
index 7396ff10c4..938432e8a4 100644
--- a/src/include/catalog/pg_collation.dat
+++ b/src/include/catalog/pg_collation.dat
@@ -23,9 +23,9 @@
   descr => 'standard POSIX collation',
   collname => 'POSIX', collprovider => 'c', collencoding => '-1',
   collcollate => 'POSIX', collctype => 'POSIX' },
-{ oid => '962', descr => 'sorts by Unicode code point',
-  collname => 'ucs_basic', collprovider => 'c', collencoding => '6',
-  collcollate => 'C', collctype => 'C' },
+{ oid => '962', descr => 'sorts by Unicode code point, C character semantics',
+  collname => 'ucs_basic', collprovider => 'b', collencoding => '6',
+  colllocale => 'C' },
 { oid => '963',
   descr => 'sorts using the Unicode Collation Algorithm with default settings',
   collname => 'unicode', collprovider => 'i', collencoding => '-1',
diff --git a/src/include/catalog/pg_collation.h b/src/include/catalog/pg_collation.h
index a3e196fb53..5ce289d74b 100644
--- a/src/include/catalog/pg_collation.h
+++ b/src/include/catalog/pg_collation.h
@@ -68,6 +68,7 @@ MAKE_SYSCACHE(COLLOID, pg_collation_oid_index, 8);
 #ifdef EXPOSE_TO_CLIENT_CODE
 
 #define COLLPROVIDER_DEFAULT	'd'
+#define COLLPROVIDER_BUILTIN	'b'
 #define COLLPROVIDER_ICU		'i'
 #define COLLPROVIDER_LIBC		'c'
 
@@ -76,6 +77,8 @@ collprovider_name(char c)
 {
 	switch (c)
 	{
+		case COLLPROVIDER_BUILTIN:
+			return "builtin";
 		case COLLPROVIDER_ICU:
 			return "icu";
 		case COLLPROVIDER_LIBC:
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 28c925b5af..2fd0c5157b 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -76,6 +76,10 @@ struct pg_locale_struct
 	bool		deterministic;
 	union
 	{
+		struct
+		{
+			const char *locale;
+		}			builtin;
 		locale_t	lt;
 #ifdef USE_ICU
 		struct
@@ -112,7 +116,7 @@ extern size_t pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
 								pg_locale_t locale);
 extern size_t pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
 								 size_t srclen, pg_locale_t locale);
-
+extern const char *builtin_validate_locale(int encoding, const char *loc_str);
 extern void icu_validate_locale(const char *loc_str);
 extern char *icu_language_tag(const char *loc_str, int elevel);
 
diff --git a/src/test/icu/t/010_database.pl b/src/test/icu/t/010_database.pl
index 8a1fc12ec6..5f8ef16803 100644
--- a/src/test/icu/t/010_database.pl
+++ b/src/test/icu/t/010_database.pl
@@ -27,9 +27,8 @@ CREATE TABLE icu (def text, en text COLLATE "en-x-icu", upfirst text COLLATE upp
 INSERT INTO icu VALUES ('a', 'a', 'a'), ('b', 'b', 'b'), ('A', 'A', 'A'), ('B', 'B', 'B');
 });
 
-is( $node1->safe_psql('dbicu', q{SELECT icu_unicode_version() IS NOT NULL}),
-	qq(t),
-	'ICU unicode version defined');
+is($node1->safe_psql('dbicu', q{SELECT icu_unicode_version() IS NOT NULL}),
+	qq(t), 'ICU unicode version defined');
 
 is( $node1->safe_psql('dbicu', q{SELECT def FROM icu ORDER BY def}),
 	qq(A
@@ -63,14 +62,13 @@ is( $node1->psql(
 	0,
 	"C locale works for ICU");
 
-# Test that LOCALE works for ICU locales if LC_COLLATE and LC_CTYPE
-# are specified
-is( $node1->psql(
-		'postgres',
-		q{CREATE DATABASE dbicu2 LOCALE_PROVIDER icu LOCALE '@colStrength=primary'
-      LC_COLLATE='C' LC_CTYPE='C' TEMPLATE template0 ENCODING UTF8}
-	),
-	0,
-	"LOCALE works for ICU locales if LC_COLLATE and LC_CTYPE are specified");
+my ($ret, $stdout, $stderr) = $node1->psql('postgres',
+	q{CREATE DATABASE dbicu LOCALE_PROVIDER builtin LOCALE 'C' TEMPLATE dbicu}
+);
+isnt($ret, 0, "locale provider must match template: exit code not 0");
+like(
+	$stderr,
+	qr/ERROR:  new locale provider \(builtin\) does not match locale provider of the template database \(icu\)/,
+	"locale provider must match template: error message");
 
 done_testing();
diff --git a/src/test/regress/expected/collate.out b/src/test/regress/expected/collate.out
index 0649564485..593a622637 100644
--- a/src/test/regress/expected/collate.out
+++ b/src/test/regress/expected/collate.out
@@ -650,6 +650,22 @@ EXPLAIN (COSTS OFF)
 (3 rows)
 
 -- CREATE/DROP COLLATION
+CREATE COLLATION builtin_c ( PROVIDER = builtin, LOCALE = "C" );
+SELECT b FROM collate_test1 ORDER BY b COLLATE builtin_c;
+  b  
+-----
+ ABD
+ Abc
+ abc
+ bbc
+(4 rows)
+
+CREATE COLLATION builtin2 ( PROVIDER = builtin ); -- fails
+ERROR:  parameter "locale" must be specified
+CREATE COLLATION builtin2 ( PROVIDER = builtin, LOCALE = "en_US" ); -- fails
+ERROR:  invalid locale name "en_US" for builtin provider
+CREATE COLLATION builtin2 ( PROVIDER = builtin, LC_CTYPE = "C", LC_COLLATE = "C" ); -- fails
+ERROR:  parameter "locale" must be specified
 CREATE COLLATION mycoll1 FROM "C";
 CREATE COLLATION mycoll2 ( LC_COLLATE = "POSIX", LC_CTYPE = "POSIX" );
 CREATE COLLATION mycoll3 FROM "default";  -- intentionally unsupported
@@ -754,7 +770,7 @@ DETAIL:  FROM cannot be specified together with any other options.
 -- must get rid of them.
 --
 DROP SCHEMA collate_tests CASCADE;
-NOTICE:  drop cascades to 19 other objects
+NOTICE:  drop cascades to 20 other objects
 DETAIL:  drop cascades to table collate_test1
 drop cascades to table collate_test_like
 drop cascades to table collate_test2
@@ -771,6 +787,7 @@ drop cascades to function dup(anyelement)
 drop cascades to table collate_test20
 drop cascades to table collate_test21
 drop cascades to table collate_test22
+drop cascades to collation builtin_c
 drop cascades to collation mycoll2
 drop cascades to table collate_test23
 drop cascades to view collate_on_int
diff --git a/src/test/regress/sql/collate.sql b/src/test/regress/sql/collate.sql
index c3d40fc195..4b0e4472c3 100644
--- a/src/test/regress/sql/collate.sql
+++ b/src/test/regress/sql/collate.sql
@@ -244,6 +244,14 @@ EXPLAIN (COSTS OFF)
 
 -- CREATE/DROP COLLATION
 
+CREATE COLLATION builtin_c ( PROVIDER = builtin, LOCALE = "C" );
+
+SELECT b FROM collate_test1 ORDER BY b COLLATE builtin_c;
+
+CREATE COLLATION builtin2 ( PROVIDER = builtin ); -- fails
+CREATE COLLATION builtin2 ( PROVIDER = builtin, LOCALE = "en_US" ); -- fails
+CREATE COLLATION builtin2 ( PROVIDER = builtin, LC_CTYPE = "C", LC_COLLATE = "C" ); -- fails
+
 CREATE COLLATION mycoll1 FROM "C";
 CREATE COLLATION mycoll2 ( LC_COLLATE = "POSIX", LC_CTYPE = "POSIX" );
 CREATE COLLATION mycoll3 FROM "default";  -- intentionally unsupported
-- 
2.34.1

