From 09fc2ae65c8a8b483d2d8c2a7b977d80fe27ebae Mon Sep 17 00:00:00 2001
From: Corey Huinker <corey.huinker@gmail.com>
Date: Wed, 5 Nov 2025 01:13:04 -0500
Subject: [PATCH v8 7/7] Include Extended Statistics in pg_dump.

Incorporate the new pg_restore_extended_stats() function into pg_dump.
This detects the existence of extended statistics statistics (i.e.
pg_statistic_ext_data rows).

This handles many of the changes that have happened to extended
statistic statistics over the various versions, including:

* Format change for pg_ndistinct and pg_dependencies in current
  development version. Earlier versions have the format translated via
  the pg_dump SQL statement.

* Inherited extended statistics were introduced in v15.

* Expressions were introduced to extended statistics in v14.

* MCV extended statistics were introduced in v13.

* pg_statistic_ext_data and pg_stats_ext introduced in v12, prior to
  that ndstinct and depdendencies data (the only kind of stats that
  existed were directly on pg_statistic_ext.

* Extended Statistics were introduced in v10, so there is no support for
  prior versions necessary.
---
 src/bin/pg_dump/pg_backup.h          |   1 +
 src/bin/pg_dump/pg_backup_archiver.c |   3 +-
 src/bin/pg_dump/pg_dump.c            | 252 +++++++++++++++++++++++++++
 src/bin/pg_dump/t/002_pg_dump.pl     |  28 +++
 4 files changed, 283 insertions(+), 1 deletion(-)

diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index d9041dad7206..df708e4ced69 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -68,6 +68,7 @@ enum _dumpPreparedQueries
 	PREPQUERY_DUMPCOMPOSITETYPE,
 	PREPQUERY_DUMPDOMAIN,
 	PREPQUERY_DUMPENUMTYPE,
+	PREPQUERY_DUMPEXTSTATSSTATS,
 	PREPQUERY_DUMPFUNC,
 	PREPQUERY_DUMPOPR,
 	PREPQUERY_DUMPRANGETYPE,
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 59eaecb4ed71..1bfd296e0ee9 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -3008,7 +3008,8 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH)
 		strcmp(te->desc, "SEARCHPATH") == 0)
 		return REQ_SPECIAL;
 
-	if (strcmp(te->desc, "STATISTICS DATA") == 0)
+	if ((strcmp(te->desc, "STATISTICS DATA") == 0) ||
+		(strcmp(te->desc, "EXTENDED STATISTICS DATA") == 0))
 	{
 		if (!ropt->dumpStatistics)
 			return 0;
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index a00918bacb40..8c5850f9e9b3 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -324,6 +324,7 @@ static void dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo);
 static void dumpIndex(Archive *fout, const IndxInfo *indxinfo);
 static void dumpIndexAttach(Archive *fout, const IndexAttachInfo *attachinfo);
 static void dumpStatisticsExt(Archive *fout, const StatsExtInfo *statsextinfo);
+static void dumpStatisticsExtStats(Archive *fout, const StatsExtInfo *statsextinfo);
 static void dumpConstraint(Archive *fout, const ConstraintInfo *coninfo);
 static void dumpTableConstraintComment(Archive *fout, const ConstraintInfo *coninfo);
 static void dumpTSParser(Archive *fout, const TSParserInfo *prsinfo);
@@ -8258,6 +8259,9 @@ getExtendedStatistics(Archive *fout)
 
 		/* Decide whether we want to dump it */
 		selectDumpableStatisticsObject(&(statsextinfo[i]), fout);
+
+		if (fout->dopt->dumpStatistics)
+			statsextinfo[i].dobj.components |= DUMP_COMPONENT_STATISTICS;
 	}
 
 	PQclear(res);
@@ -11712,6 +11716,7 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj)
 			break;
 		case DO_STATSEXT:
 			dumpStatisticsExt(fout, (const StatsExtInfo *) dobj);
+			dumpStatisticsExtStats(fout, (const StatsExtInfo *) dobj);
 			break;
 		case DO_REFRESH_MATVIEW:
 			refreshMatViewData(fout, (const TableDataInfo *) dobj);
@@ -18514,6 +18519,253 @@ dumpStatisticsExt(Archive *fout, const StatsExtInfo *statsextinfo)
 	free(qstatsextname);
 }
 
+/*
+ * dumpStatisticsExtStats
+ *	  write out to fout the stats for an extended statistics object
+ */
+static void
+dumpStatisticsExtStats(Archive *fout, const StatsExtInfo *statsextinfo)
+{
+	DumpOptions *dopt = fout->dopt;
+	PQExpBuffer query;
+	PGresult   *res;
+	int			nstats;
+
+	/* Do nothing if not dumping statistics */
+	if (!dopt->dumpStatistics)
+		return;
+
+	if (!fout->is_prepared[PREPQUERY_DUMPEXTSTATSSTATS])
+	{
+		PQExpBuffer pq = createPQExpBuffer();
+
+		/*
+		 * Set up query for constraint-specific details.
+		 *
+		 * 19+: query pg_stats_ext and pg_stats_ext_exprs as-is 15-18: query
+		 * pg_stats_ext translating the ndistinct and depdendencies, 14:
+		 * inherited is always NULL 12-13: no pg_stats_ext_exprs 10-11: no
+		 * pg_stats_ext, join pg_statistic_ext and pg_namespace
+		 */
+
+		appendPQExpBufferStr(pq,
+							 "PREPARE getExtStatsStats(pg_catalog.name, pg_catalog.name) AS\n"
+							 "SELECT ");
+
+		/*
+		 * Versions 15+ have inherited stats.
+		 *
+		 * Create this column in all version because we need to order by it later.
+		 */
+		if (fout->remoteVersion >= 150000)
+			appendPQExpBufferStr(pq, "e.inherited, ");
+		else
+			appendPQExpBufferStr(pq, "false AS inherited, ");
+
+		/*
+		 * Versions < 19 use the old ndistintinct and depdendencies formats
+		 *
+		 * These transformations may look scary, but all we're doing is translating
+		 *
+		 * {"3, 4": 11, "3, 6": 11, "4, 6": 11, "3, 4, 6": 11}
+		 *
+		 * to
+		 *
+		 * [{"ndistinct": 11, "attributes": [3,4]},
+		 *  {"ndistinct": 11, "attributes": [3,6]},
+		 *  {"ndistinct": 11, "attributes": [4,6]},
+		 *  {"ndistinct": 11, "attributes": [3,4,6]}]
+		 *
+		 * and
+		 * {"3 => 4": 1.000000, "3 => 6": 1.000000, "4 => 6": 1.000000,
+		 *  "3, 4 => 6": 1.000000, "3, 6 => 4": 1.000000}
+		 *
+		 *  to
+		 *
+		 * [{"degree": 1.000000, "attributes": [3], "dependency": 4},
+		 *  {"degree": 1.000000, "attributes": [3], "dependency": 6},
+		 *  {"degree": 1.000000, "attributes": [4], "dependency": 6},
+		 *  {"degree": 1.000000, "attributes": [3,4], "dependency": 6},
+		 *  {"degree": 1.000000, "attributes": [3,6], "dependency": 4}]
+		 */
+		if (fout->remoteVersion >= 190000)
+			appendPQExpBufferStr(pq, "e.n_distinct, e.dependencies, ");
+		else
+			appendPQExpBufferStr(pq,
+								 "( "
+								 "SELECT json_agg( "
+								 "    json_build_object( "
+								 "        'attributes', "
+								 "         string_to_array(kv.key, ', ')::integer[], "
+								 "        'ndistinct', "
+								 "        kv.value::bigint )) "
+								 "FROM json_each_text(e.n_distinct::text::json) AS kv"
+								 ") AS n_distinct, "
+								 "( "
+								 "SELECT json_agg( "
+								 "    json_build_object( "
+								 "        'attributes', "
+								 "        string_to_array( "
+								 "            split_part(kv.key, ' => ', 1), "
+								 "            ', ')::integer[], "
+								 "        'dependency', "
+								 "        split_part(kv.key, ' => ', 2)::integer, "
+								 "        'degree', "
+								 "        kv.value::double precision )) "
+								 "FROM json_each_text(e.dependencies::text::json) AS kv "
+								 ") AS dependencies, ");
+
+		/* Versions < 12 do not have MCV */
+		if (fout->remoteVersion >= 130000)
+			appendPQExpBufferStr(pq,
+								 "e.most_common_vals, e.most_common_val_nulls, "
+								 "e.most_common_freqs, e.most_common_base_freqs, ");
+		else
+			appendPQExpBufferStr(pq,
+								 "NULL AS most_common_vals, NULL AS most_common_val_nulls, "
+								 "NULL AS most_common_freqs, NULL AS most_common_base_freqs, ");
+
+		/* Expressions were introduced in v14 */
+		if (fout->remoteVersion >= 140000)
+		{
+			appendPQExpBufferStr(pq,
+								 "( "
+								 "SELECT array_agg( "
+								 "    ARRAY[ee.null_frac::text, ee.avg_width::text, "
+								 "          ee.n_distinct::text, ee.most_common_vals::text, "
+								 "          ee.most_common_freqs::text, ee.histogram_bounds::text, "
+								 "          ee.correlation::text, ee.most_common_elems::text, "
+								 "          ee.most_common_elem_freqs::text, "
+								 "          ee.elem_count_histogram::text]) "
+								 "FROM pg_stats_ext_exprs AS ee "
+								 "WHERE ee.statistics_schemaname = $1 "
+								 "AND ee.statistics_name = $2 ");
+
+			/* Inherited expressions introduced in v15 */
+			if (fout->remoteVersion >= 150000)
+				appendPQExpBufferStr(pq, "AND ee.inherited = e.inherited");
+
+			appendPQExpBufferStr(pq, ") AS exprs ");
+		}
+		else
+			appendPQExpBufferStr(pq, "NULL AS exprs ");
+
+		/* pg_stats_ext introduced in v12 */
+		if (fout->remoteVersion >= 120000)
+			appendPQExpBufferStr(pq,
+								 "FROM pg_catalog.pg_stats_ext AS e "
+								 "WHERE e.statistics_schemaname = $1 "
+								 "AND e.statistics_name = $2 ");
+		else
+			appendPQExpBufferStr(pq,
+								 "FROM ( "
+								 "SELECT s.stxndistinct AS n_distinct, "
+								 "    s.stxdependencies AS dependencies "
+								 "FROM pg_catalog.pg_statistics_ext AS s "
+								 "JOIN pg_catalog.pg_namespace AS n "
+								 "ON n.oid = s.stxnamespace "
+								 "WHERE n.nspname = $1 "
+								 "AND e.stxname = $2 "
+								 ") AS e ");
+
+		/* we always have an inherited column, but it may be a constant */
+		appendPQExpBufferStr(pq, "ORDER BY inherited");
+
+		ExecuteSqlStatement(fout, pq->data);
+
+		fout->is_prepared[PREPQUERY_DUMPEXTSTATSSTATS] = true;
+
+		destroyPQExpBuffer(pq);
+	}
+
+	query = createPQExpBuffer();
+
+	appendPQExpBufferStr(query, "EXECUTE getExtStatsStats(");
+	appendStringLiteralAH(query, statsextinfo->dobj.namespace->dobj.name, fout);
+	appendPQExpBufferStr(query, "::pg_catalog.name, ");
+	appendStringLiteralAH(query, statsextinfo->dobj.name, fout);
+	appendPQExpBufferStr(query, "::pg_catalog.name)");
+
+	res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
+
+	destroyPQExpBuffer(query);
+
+	nstats = PQntuples(res);
+
+	if (nstats > 0)
+	{
+		PQExpBuffer out = createPQExpBuffer();
+
+		int			i_inherited = PQfnumber(res, "inherited");
+		int			i_ndistinct = PQfnumber(res, "n_distinct");
+		int			i_dependencies = PQfnumber(res, "dependencies");
+		int			i_mcv = PQfnumber(res, "most_common_vals");
+		int			i_mcv_nulls = PQfnumber(res, "most_common_val_nulls");
+		int			i_mcf = PQfnumber(res, "most_common_freqs");
+		int			i_mcbf = PQfnumber(res, "most_common_base_freqs");
+		int			i_exprs = PQfnumber(res, "exprs");
+
+		for (int i = 0; i < nstats; i++)
+		{
+			if (PQgetisnull(res, i, i_inherited))
+				pg_fatal("inherited cannot be NULL");
+
+			appendPQExpBufferStr(out,
+								 "SELECT * FROM pg_catalog.pg_restore_extended_stats(\n");
+			appendPQExpBuffer(out, "\t'version', '%d'::integer,\n",
+							  fout->remoteVersion);
+			appendPQExpBufferStr(out, "\t'statistics_schemaname', ");
+			appendStringLiteralAH(out, statsextinfo->dobj.namespace->dobj.name, fout);
+			appendPQExpBufferStr(out, ",\n\t'statistics_name', ");
+			appendStringLiteralAH(out, statsextinfo->dobj.name, fout);
+			appendNamedArgument(out, fout, "inherited", "boolean",
+								PQgetvalue(res, i, i_inherited));
+
+			if (!PQgetisnull(res, i, i_ndistinct))
+				appendNamedArgument(out, fout, "n_distinct", "pg_ndistinct",
+									PQgetvalue(res, i, i_ndistinct));
+
+			if (!PQgetisnull(res, i, i_dependencies))
+				appendNamedArgument(out, fout, "dependencies", "pg_dependencies",
+									PQgetvalue(res, i, i_dependencies));
+
+			if (!PQgetisnull(res, i, i_mcv))
+				appendNamedArgument(out, fout, "most_common_vals", "text[]",
+									PQgetvalue(res, i, i_mcv));
+
+			if (!PQgetisnull(res, i, i_mcv_nulls))
+				appendNamedArgument(out, fout, "most_common_val_nulls", "boolean[]",
+									PQgetvalue(res, i, i_mcv_nulls));
+
+			if (!PQgetisnull(res, i, i_mcf))
+				appendNamedArgument(out, fout, "most_common_freqs", "double precision[]",
+									PQgetvalue(res, i, i_mcf));
+
+			if (!PQgetisnull(res, i, i_mcbf))
+				appendNamedArgument(out, fout, "most_common_base_freqs", "double precision[]",
+									PQgetvalue(res, i, i_mcbf));
+
+			if (!PQgetisnull(res, i, i_exprs))
+				appendNamedArgument(out, fout, "exprs", "text[]",
+									PQgetvalue(res, i, i_exprs));
+
+			appendPQExpBufferStr(out, "\n);\n");
+		}
+
+		ArchiveEntry(fout, nilCatalogId, createDumpId(),
+					 ARCHIVE_OPTS(.tag = statsextinfo->dobj.name,
+								  .namespace = statsextinfo->dobj.namespace->dobj.name,
+								  .owner = statsextinfo->rolname,
+								  .description = "EXTENDED STATISTICS DATA",
+								  .section = SECTION_POST_DATA,
+								  .createStmt = out->data,
+								  .deps = &statsextinfo->dobj.dumpId,
+								  .nDeps = 1));
+		destroyPQExpBuffer(out);
+	}
+	PQclear(res);
+}
+
 /*
  * dumpConstraint
  *	  write out to fout a user-defined constraint
diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl
index 445a541abf63..6681265974f6 100644
--- a/src/bin/pg_dump/t/002_pg_dump.pl
+++ b/src/bin/pg_dump/t/002_pg_dump.pl
@@ -4772,6 +4772,34 @@ my %tests = (
 		},
 	},
 
+	#
+	# EXTENDED stats will end up in SECTION_POST_DATA.
+	#
+	'extended_statistics_import' => {
+		create_sql => '
+			CREATE TABLE dump_test.has_ext_stats
+			AS SELECT g.g AS x, g.g / 2 AS y FROM generate_series(1,100) AS g(g);
+			CREATE STATISTICS dump_test.es1 ON x, (y % 2) FROM dump_test.has_ext_stats;
+			ANALYZE dump_test.has_ext_stats;',
+		regexp => qr/^
+			\QSELECT * FROM pg_catalog.pg_restore_extended_stats(\E\s+/xm,
+		like => {
+			%full_runs,
+			%dump_test_schema_runs,
+			no_data_no_schema => 1,
+			no_schema => 1,
+			section_post_data => 1,
+			statistics_only => 1,
+			schema_only_with_statistics => 1,
+		},
+		unlike => {
+			exclude_dump_test_schema => 1,
+			no_statistics => 1,
+			only_dump_measurement => 1,
+			schema_only => 1,
+		},
+	},
+
 	#
 	# While attribute stats (aka pg_statistic stats) only appear for tables
 	# that have been analyzed, all tables will have relation stats because
-- 
2.51.0

