From 9a50a138d3f62f373b152ce797079051ce648e4f Mon Sep 17 00:00:00 2001
From: Yugo Nagata <nagata@sraoss.co.jp>
Date: Fri, 18 Jul 2025 09:58:56 +0900
Subject: [PATCH] Allow to collect statistics on virtual generated columns

During ANALYZE, generation expressions are expanded, and statistics are
computed using compute_expr_stats(). To support this, both compute_expr_stats()
and AnlExprData are now exported from extended_stats.c.

To enable the optimizer to make use of these statistics, a new field named
virtual_gencols is added to RelOptInfo. This field holds the expressions of
virtual generated columns in the table. In examine_variable(), if an
expression in a WHERE clause matches a virtual generated column, the
corresponding statistics are used for that expression.
---
 src/backend/commands/analyze.c                | 32 ++++++----
 src/backend/optimizer/util/plancat.c          | 60 +++++++++++++++++++
 src/backend/statistics/extended_stats.c       | 11 +---
 src/backend/utils/adt/selfuncs.c              | 44 +++++++++++++-
 src/include/nodes/pathnodes.h                 | 19 ++++++
 .../statistics/extended_stats_internal.h      |  9 +++
 6 files changed, 154 insertions(+), 21 deletions(-)

diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 7111d5d5334..553f1bb52db 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -41,6 +41,7 @@
 #include "parser/parse_oper.h"
 #include "parser/parse_relation.h"
 #include "pgstat.h"
+#include "rewrite/rewriteHandler.h"
 #include "statistics/extended_stats_internal.h"
 #include "statistics/statistics.h"
 #include "storage/bufmgr.h"
@@ -559,13 +560,28 @@ do_analyze_rel(Relation onerel, const VacuumParams params,
 		{
 			VacAttrStats *stats = vacattrstats[i];
 			AttributeOpts *aopt;
+			Form_pg_attribute attr = TupleDescAttr(onerel->rd_att, stats->tupattnum - 1);
 
-			stats->rows = rows;
-			stats->tupDesc = onerel->rd_att;
-			stats->compute_stats(stats,
-								 std_fetch_func,
-								 numrows,
-								 totalrows);
+			/*
+			 * For a virtual generated column, compute statistics for the expression value.
+			 */
+			if (attr->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
+			{
+				AnlExprData *exprdata = (AnlExprData *) palloc0(sizeof(AnlExprData));
+
+				exprdata->expr = build_generation_expression(onerel, stats->tupattnum);
+				exprdata->vacattrstat = stats;
+				compute_expr_stats(onerel, exprdata, 1, rows, numrows);
+			}
+			else
+			{
+				stats->rows = rows;
+				stats->tupDesc = onerel->rd_att;
+				stats->compute_stats(stats,
+									 std_fetch_func,
+									 numrows,
+									 totalrows);
+			}
 
 			/*
 			 * If the appropriate flavor of the n_distinct option is
@@ -1049,10 +1065,6 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr)
 	if (attr->attisdropped)
 		return NULL;
 
-	/* Don't analyze virtual generated columns */
-	if (attr->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
-		return NULL;
-
 	/*
 	 * Get attstattarget value.  Set to -1 if null.  (Analyze functions expect
 	 * -1 to mean use default_statistics_target; see for example
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index c6a58afc5e5..68f7349ffc8 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -42,6 +42,7 @@
 #include "parser/parse_relation.h"
 #include "parser/parsetree.h"
 #include "partitioning/partdesc.h"
+#include "rewrite/rewriteHandler.h"
 #include "rewrite/rewriteManip.h"
 #include "statistics/statistics.h"
 #include "storage/bufmgr.h"
@@ -77,6 +78,7 @@ static List *get_relation_constraints(PlannerInfo *root,
 									  bool include_partition);
 static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index,
 							   Relation heapRelation);
+static List *get_relation_virtual_gencols(RelOptInfo *rel, Relation relation);
 static List *get_relation_statistics(RelOptInfo *rel, Relation relation);
 static void set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel,
 										Relation relation);
@@ -508,6 +510,9 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 
 	rel->indexlist = indexinfos;
 
+	/* Make list of virtual generated columns */
+	rel->virtual_gencols = get_relation_virtual_gencols(rel, relation);
+
 	rel->statlist = get_relation_statistics(rel, relation);
 
 	/* Grab foreign-table info using the relcache, while we have it */
@@ -1482,6 +1487,61 @@ get_relation_constraints(PlannerInfo *root,
 	return result;
 }
 
+/*
+ * get_relation_virtual_gencols
+ *		Retrieve virtual generated columns defined on the table.
+ *
+ * Returns a List (possibly empty) of VirtualGeneratedColumnInfoInfo objects
+ * containing the generation expressions. Each one has been processed by
+ * eval_const_expressions(), and its Vars are changed to have the varno
+ * indicated by rel->relid.  This allows the expressions to be easily
+ * compared to expressions taken from WHERE.
+ */
+static List *get_relation_virtual_gencols(RelOptInfo *rel, Relation relation)
+{
+	TupleDesc	tupdesc = RelationGetDescr(relation);
+	Index		varno = rel->relid;
+	List	   *virtual_gencols = NIL;
+
+	if (tupdesc->constr && tupdesc->constr->has_generated_virtual)
+	{
+		for (int i = 0; i < tupdesc->natts; i++)
+		{
+			Form_pg_attribute attr = TupleDescAttr(tupdesc, i);
+
+			if (attr->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
+			{
+				VirtualGeneratedColumnInfo *info;
+
+				info = makeNode(VirtualGeneratedColumnInfo);
+				info->attno = attr->attnum;
+				info->expr = build_generation_expression(relation, attr->attnum);
+
+				/*
+				 * Run the expressions through eval_const_expressions. This is
+				 * not just an optimization, but is necessary, because the
+				 * planner will be comparing them to similarly-processed qual
+				 * clauses, and may fail to detect valid matches without this.
+				 * We must not use canonicalize_qual, however, since these
+				 * aren't qual expressions.
+				 */
+				info->expr = eval_const_expressions(NULL, info->expr);
+
+				/* May as well fix opfuncids too */
+				fix_opfuncids(info->expr);
+
+				/* Fix Vars to have the desired varno */
+				if (varno != 1)
+					ChangeVarNodes((Node *) info->expr, 1, varno, 0);
+
+				virtual_gencols = lappend(virtual_gencols, info);
+			}
+		}
+	}
+
+	return virtual_gencols;
+}
+
 /*
  * Try loading data for the statistics object.
  *
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index a8b63ec0884..e26352cacec 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -82,15 +82,6 @@ static void statext_store(Oid statOid, bool inh,
 static int	statext_compute_stattarget(int stattarget,
 									   int nattrs, VacAttrStats **stats);
 
-/* Information needed to analyze a single simple expression. */
-typedef struct AnlExprData
-{
-	Node	   *expr;			/* expression to analyze */
-	VacAttrStats *vacattrstat;	/* statistics attrs to analyze */
-} AnlExprData;
-
-static void compute_expr_stats(Relation onerel, AnlExprData *exprdata,
-							   int nexprs, HeapTuple *rows, int numrows);
 static Datum serialize_expr_stats(AnlExprData *exprdata, int nexprs);
 static Datum expr_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
 static AnlExprData *build_expr_data(List *exprs, int stattarget);
@@ -2103,7 +2094,7 @@ examine_opclause_args(List *args, Node **exprp, Const **cstp,
 /*
  * Compute statistics about expressions of a relation.
  */
-static void
+void
 compute_expr_stats(Relation onerel, AnlExprData *exprdata, int nexprs,
 				   HeapTuple *rows, int numrows)
 {
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 17fbfa9b410..00e3972ff4f 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -5408,6 +5408,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
 		 */
 		ListCell   *ilist;
 		ListCell   *slist;
+		ListCell   *vlist;
 		Oid			userid;
 
 		/*
@@ -5576,6 +5577,46 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
 				break;
 		}
 
+		/*
+		 * Search virtual generated columns for one with a matching expression
+		 * and use the statistics collected for it if we have.
+		 */
+		foreach(vlist, onerel->virtual_gencols)
+		{
+			VirtualGeneratedColumnInfo *info = (VirtualGeneratedColumnInfo *) lfirst(vlist);
+			Node *expr = info->expr;
+
+			/*
+			 * Stop once we've found statistics for the expression (either
+			 * for a virtual generated columns or an index in the preceding
+			 * loop).
+			 */
+			if (vardata->statsTuple)
+				break;
+
+			/* strip RelabelType before comparing it */
+			if (expr && IsA(expr, RelabelType))
+				expr = (Node *) ((RelabelType *) expr)->arg;
+
+			if (equal(node, expr))
+			{
+				Var	*var = makeVar(onerel->relid,
+								   info->attno,
+								   vardata->atttype,
+								   vardata->atttypmod,
+								   exprCollation(node),
+								   0);
+				/*
+				 * There cannot be a unique constraint on a virtual generated column.
+				 * Other fields other than the stats tuple must be already set.
+				 */
+				vardata->isunique = false;
+
+				/* Try to locate some stats */
+				examine_simple_variable(root, var, vardata);
+			}
+		}
+
 		/*
 		 * Search extended statistics for one with a matching expression.
 		 * There might be multiple ones, so just grab the first one. In the
@@ -5591,7 +5632,8 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
 
 			/*
 			 * Stop once we've found statistics for the expression (either
-			 * from extended stats, or for an index in the preceding loop).
+			 * from extended stats, or for an index or a virtual generated
+			 * column in the preceding loop).
 			 */
 			if (vardata->statsTuple)
 				break;
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index e5dd15098f6..0c173f307b7 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -970,6 +970,8 @@ typedef struct RelOptInfo
 	List	   *indexlist;
 	/* list of StatisticExtInfo */
 	List	   *statlist;
+	/* list of VirtualGeneratedColumnInfo */
+	List	   *virtual_gencols;
 	/* size estimates derived from pg_class */
 	BlockNumber pages;
 	Cardinality tuples;
@@ -1322,6 +1324,23 @@ typedef struct StatisticExtInfo
 	List	   *exprs;
 } StatisticExtInfo;
 
+/*
+ * VirtualGeneratedColumnInfo
+ *		Information about virtual generated columns for planning/optimization
+ */
+typedef struct VirtualGeneratedColumnInfo
+{
+	pg_node_attr(no_copy_equal, no_read, no_query_jumble)
+
+	NodeTag		type;
+
+	/* attribute number of virtual generated column */
+	AttrNumber	attno;
+
+	/* generation expression */
+	Node	   *expr;
+} VirtualGeneratedColumnInfo;
+
 /*
  * JoinDomains
  *
diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h
index efcb7dc3546..8eaea3b7566 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -68,6 +68,12 @@ typedef struct StatsBuildData
 	bool	  **nulls;
 } StatsBuildData;
 
+/* Information needed to analyze a single simple expression. */
+typedef struct AnlExprData
+{
+	Node	   *expr;			/* expression to analyze */
+	VacAttrStats *vacattrstat;	/* statistics attrs to analyze */
+} AnlExprData;
 
 extern MVNDistinct *statext_ndistinct_build(double totalrows, StatsBuildData *data);
 extern bytea *statext_ndistinct_serialize(MVNDistinct *ndistinct);
@@ -127,4 +133,7 @@ extern Selectivity mcv_clause_selectivity_or(PlannerInfo *root,
 											 Selectivity *overlap_basesel,
 											 Selectivity *totalsel);
 
+extern void
+compute_expr_stats(Relation onerel, AnlExprData *exprdata, int nexprs,
+				   HeapTuple *rows, int numrows);
 #endif							/* EXTENDED_STATS_INTERNAL_H */
-- 
2.43.0

