From 22e6eace8ce10dd017513ca935d2e86c04b400b6 Mon Sep 17 00:00:00 2001
From: Yugo Nagata <nagata@sraoss.co.jp>
Date: Fri, 18 Jul 2025 09:58:56 +0900
Subject: [PATCH v6] Allow creating extended statistics on virtual generated
 columns

This allows users to define extended statistics on virtual generated
columns. Expressions including such columns are stored in the catalog
as-is, and expanded at ANALYZE time. Extended statistics on a single
virtual generated column are also allowed, since it is treated as a
single expression.

To enable the optimizer to make use of these statistics, expressions
are also expanded at planning time.
---
 doc/src/sgml/ref/alter_table.sgml       |  9 +----
 src/backend/commands/statscmds.c        | 49 ++++++++++++++-----------
 src/backend/optimizer/util/plancat.c    |  5 +++
 src/backend/statistics/extended_stats.c | 15 ++++++--
 src/test/regress/expected/stats_ext.out | 25 ++++---------
 src/test/regress/sql/stats_ext.sql      | 21 ++++-------
 6 files changed, 59 insertions(+), 65 deletions(-)

diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml
index 8591a6b5014..f8006e9d72a 100644
--- a/doc/src/sgml/ref/alter_table.sgml
+++ b/doc/src/sgml/ref/alter_table.sgml
@@ -216,8 +216,6 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
       When this form is used, the column's statistics are removed,
       so running <link linkend="sql-analyze"><command>ANALYZE</command></link>
       on the table afterwards is recommended.
-      For a virtual generated column, <command>ANALYZE</command>
-      is not necessary because such columns never have statistics.
      </para>
     </listitem>
    </varlistentry>
@@ -284,12 +282,9 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
      </para>
 
      <para>
-      When this form is used on a stored generated column, its statistics
-      are removed, so running
-      <link linkend="sql-analyze"><command>ANALYZE</command></link>
+      When this form is used, the column's statistics are removed,
+      so running <link linkend="sql-analyze"><command>ANALYZE</command></link>
       on the table afterwards is recommended.
-      For a virtual generated column, <command>ANALYZE</command>
-      is not necessary because such columns never have statistics.
      </para>
     </listitem>
    </varlistentry>
diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c
index c1da79f36ba..8a910ba1cd0 100644
--- a/src/backend/commands/statscmds.c
+++ b/src/backend/commands/statscmds.c
@@ -28,6 +28,7 @@
 #include "commands/comment.h"
 #include "commands/defrem.h"
 #include "miscadmin.h"
+#include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "optimizer/optimizer.h"
 #include "statistics/statistics.h"
@@ -268,12 +269,6 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights)
 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 						 errmsg("statistics creation on system columns is not supported")));
 
-			/* Disallow use of virtual generated columns in extended stats */
-			if (attForm->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
-				ereport(ERROR,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("statistics creation on virtual generated columns is not supported")));
-
 			/* Disallow data types without a less-than operator */
 			type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
 			if (type->lt_opr == InvalidOid)
@@ -282,8 +277,22 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights)
 						 errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
 								attname, format_type_be(attForm->atttypid))));
 
-			attnums[nattnums] = attForm->attnum;
-			nattnums++;
+			/* Treat virtual generated columns as expressions */
+			if (attForm->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
+			{
+				Node *expr = (Node *) makeVar(1,
+											  attForm->attnum,
+											  attForm->atttypid,
+											  attForm->atttypmod,
+											  attForm->attcollation,
+											  0);
+				stxexprs = lappend(stxexprs, expr);
+			}
+			else
+			{
+				attnums[nattnums] = attForm->attnum;
+				nattnums++;
+			}
 			ReleaseSysCache(atttuple);
 		}
 		else if (IsA(selem->expr, Var)) /* column reference in parens */
@@ -297,12 +306,6 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights)
 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 						 errmsg("statistics creation on system columns is not supported")));
 
-			/* Disallow use of virtual generated columns in extended stats */
-			if (get_attgenerated(relid, var->varattno) == ATTRIBUTE_GENERATED_VIRTUAL)
-				ereport(ERROR,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("statistics creation on virtual generated columns is not supported")));
-
 			/* Disallow data types without a less-than operator */
 			type = lookup_type_cache(var->vartype, TYPECACHE_LT_OPR);
 			if (type->lt_opr == InvalidOid)
@@ -311,8 +314,16 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights)
 						 errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
 								get_attname(relid, var->varattno, false), format_type_be(var->vartype))));
 
-			attnums[nattnums] = var->varattno;
-			nattnums++;
+			/* Treat virtual generated columns as expressions */
+			if (get_attgenerated(relid, var->varattno) == ATTRIBUTE_GENERATED_VIRTUAL)
+			{
+				stxexprs = lappend(stxexprs, (Node *) var);
+			}
+			else
+			{
+				attnums[nattnums] = var->varattno;
+				nattnums++;
+			}
 		}
 		else					/* expression */
 		{
@@ -336,12 +347,6 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights)
 					ereport(ERROR,
 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 							 errmsg("statistics creation on system columns is not supported")));
-
-				/* Disallow use of virtual generated columns in extended stats */
-				if (get_attgenerated(relid, attnum) == ATTRIBUTE_GENERATED_VIRTUAL)
-					ereport(ERROR,
-							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-							 errmsg("statistics creation on virtual generated columns is not supported")));
 			}
 
 			/*
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index b2fbd6a082b..ea8faeff2ac 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -1804,6 +1804,11 @@ get_relation_statistics(PlannerInfo *root, RelOptInfo *rel,
 				if (varno != 1)
 					ChangeVarNodes((Node *) exprs, 1, varno, 0);
 
+				/*
+				 * Expand virtual generated columns in the expressions.
+				 */
+				exprs = (List *) expand_generated_columns_in_expr((Node *) exprs, relation, 1);
+
 				/*
 				 * Run the expressions through eval_const_expressions. This is
 				 * not just an optimization, but is necessary, because the
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 334c6498581..9225a168a8a 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -32,6 +32,7 @@
 #include "parser/parsetree.h"
 #include "pgstat.h"
 #include "postmaster/autovacuum.h"
+#include "rewrite/rewriteHandler.h"
 #include "statistics/extended_stats_internal.h"
 #include "statistics/statistics.h"
 #include "utils/acl.h"
@@ -73,7 +74,7 @@ typedef struct StatExtEntry
 } StatExtEntry;
 
 
-static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
+static List *fetch_statentries_for_relation(Relation pg_statext, Relation rel);
 static VacAttrStats **lookup_var_attr_stats(Bitmapset *attrs, List *exprs,
 											int nvacatts, VacAttrStats **vacatts);
 static void statext_store(Oid statOid, bool inh,
@@ -125,7 +126,7 @@ BuildRelationExtStatistics(Relation onerel, bool inh, double totalrows,
 
 	/* the list of stats has to be allocated outside the memory context */
 	pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock);
-	statslist = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
+	statslist = fetch_statentries_for_relation(pg_stext, onerel);
 
 	/* memory context for building each statistics object */
 	cxt = AllocSetContextCreate(CurrentMemoryContext,
@@ -279,7 +280,7 @@ ComputeExtStatisticsRows(Relation onerel,
 	oldcxt = MemoryContextSwitchTo(cxt);
 
 	pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock);
-	lstats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
+	lstats = fetch_statentries_for_relation(pg_stext, onerel);
 
 	foreach(lc, lstats)
 	{
@@ -416,12 +417,13 @@ statext_is_kind_built(HeapTuple htup, char type)
  * Return a list (of StatExtEntry) of statistics objects for the given relation.
  */
 static List *
-fetch_statentries_for_relation(Relation pg_statext, Oid relid)
+fetch_statentries_for_relation(Relation pg_statext, Relation rel)
 {
 	SysScanDesc scan;
 	ScanKeyData skey;
 	HeapTuple	htup;
 	List	   *result = NIL;
+	Oid			relid = RelationGetRelid(rel);
 
 	/*
 	 * Prepare to scan pg_statistic_ext for entries having stxrelid = this
@@ -491,6 +493,11 @@ fetch_statentries_for_relation(Relation pg_statext, Oid relid)
 
 			pfree(exprsString);
 
+			/*
+			 * Expand virtual generated columns in the expressions.
+			 */
+			exprs = (List *) expand_generated_columns_in_expr((Node *) exprs, rel, 1);
+
 			/*
 			 * Run the expressions through eval_const_expressions. This is not
 			 * just an optimization, but is necessary, because the planner
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index b6431d1ee95..b1c472eb716 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -25,7 +25,7 @@ begin
 end;
 $$;
 -- Verify failures
-CREATE TABLE ext_stats_test (x text, y int, z int);
+CREATE TABLE ext_stats_test (x text, y int, z int, w xid);
 CREATE STATISTICS tst;
 ERROR:  syntax error at or near ";"
 LINE 1: CREATE STATISTICS tst;
@@ -84,30 +84,19 @@ CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; -- tuple expression
 ERROR:  syntax error at or near ","
 LINE 1: CREATE STATISTICS tst ON (x, y) FROM ext_stats_test;
                                    ^
-DROP TABLE ext_stats_test;
--- statistics on virtual generated column not allowed
-CREATE TABLE ext_stats_test1 (x int, y int, z int GENERATED ALWAYS AS (x+y) VIRTUAL, w xid);
-CREATE STATISTICS tst on z from ext_stats_test1;
-ERROR:  statistics creation on virtual generated columns is not supported
-CREATE STATISTICS tst on (z) from ext_stats_test1;
-ERROR:  statistics creation on virtual generated columns is not supported
-CREATE STATISTICS tst on (z+1) from ext_stats_test1;
-ERROR:  statistics creation on virtual generated columns is not supported
-CREATE STATISTICS tst (ndistinct) ON z from ext_stats_test1;
-ERROR:  statistics creation on virtual generated columns is not supported
 -- statistics on system column not allowed
-CREATE STATISTICS tst on tableoid from ext_stats_test1;
+CREATE STATISTICS tst on tableoid from ext_stats_test;
 ERROR:  statistics creation on system columns is not supported
-CREATE STATISTICS tst on (tableoid) from ext_stats_test1;
+CREATE STATISTICS tst on (tableoid) from ext_stats_test;
 ERROR:  statistics creation on system columns is not supported
-CREATE STATISTICS tst on (tableoid::int+1) from ext_stats_test1;
+CREATE STATISTICS tst on (tableoid::int+1) from ext_stats_test;
 ERROR:  statistics creation on system columns is not supported
-CREATE STATISTICS tst (ndistinct) ON xmin from ext_stats_test1;
+CREATE STATISTICS tst (ndistinct) ON xmin from ext_stats_test;
 ERROR:  statistics creation on system columns is not supported
 -- statistics without a less-than operator not supported
-CREATE STATISTICS tst (ndistinct) ON w from ext_stats_test1;
+CREATE STATISTICS tst (ndistinct) ON w from ext_stats_test;
 ERROR:  column "w" cannot be used in statistics because its type xid has no default btree operator class
-DROP TABLE ext_stats_test1;
+DROP TABLE ext_stats_test;
 -- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it
 CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
 CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1;
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 9dcce3440c8..22ffd74a654 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -28,7 +28,7 @@ end;
 $$;
 
 -- Verify failures
-CREATE TABLE ext_stats_test (x text, y int, z int);
+CREATE TABLE ext_stats_test (x text, y int, z int, w xid);
 CREATE STATISTICS tst;
 CREATE STATISTICS tst ON a, b;
 CREATE STATISTICS tst FROM sometab;
@@ -56,21 +56,14 @@ DROP FUNCTION tftest;
 CREATE STATISTICS tst ON (y) FROM ext_stats_test; -- single column reference
 CREATE STATISTICS tst ON y + z FROM ext_stats_test; -- missing parentheses
 CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; -- tuple expression
-DROP TABLE ext_stats_test;
--- statistics on virtual generated column not allowed
-CREATE TABLE ext_stats_test1 (x int, y int, z int GENERATED ALWAYS AS (x+y) VIRTUAL, w xid);
-CREATE STATISTICS tst on z from ext_stats_test1;
-CREATE STATISTICS tst on (z) from ext_stats_test1;
-CREATE STATISTICS tst on (z+1) from ext_stats_test1;
-CREATE STATISTICS tst (ndistinct) ON z from ext_stats_test1;
 -- statistics on system column not allowed
-CREATE STATISTICS tst on tableoid from ext_stats_test1;
-CREATE STATISTICS tst on (tableoid) from ext_stats_test1;
-CREATE STATISTICS tst on (tableoid::int+1) from ext_stats_test1;
-CREATE STATISTICS tst (ndistinct) ON xmin from ext_stats_test1;
+CREATE STATISTICS tst on tableoid from ext_stats_test;
+CREATE STATISTICS tst on (tableoid) from ext_stats_test;
+CREATE STATISTICS tst on (tableoid::int+1) from ext_stats_test;
+CREATE STATISTICS tst (ndistinct) ON xmin from ext_stats_test;
 -- statistics without a less-than operator not supported
-CREATE STATISTICS tst (ndistinct) ON w from ext_stats_test1;
-DROP TABLE ext_stats_test1;
+CREATE STATISTICS tst (ndistinct) ON w from ext_stats_test;
+DROP TABLE ext_stats_test;
 
 -- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it
 CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
-- 
2.43.0

