From 907046c93d8f20e8edcc4fb0334feed86d194b81 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sat, 25 Sep 2021 19:42:41 -0500
Subject: [PATCH v3 1/6] Do not use extended statistics on inheritence trees..

Since 859b3003de, inherited ext stats are not built.
However, the non-inherited stats stats were incorrectly used during planning of
queries with inheritence heirarchies.

Since the ext stats do not include child tables, they can lead to worse
estimates.  This is remarkably similar to 427c6b5b9, which affected column
statistics 15 years ago.

choose_best_statistics is handled a bit differently (in the calling function),
because it isn't passed rel nor rel->inh, and it's an exported function, so
avoid changing its signature in back branches.

https://www.postgresql.org/message-id/flat/20210925223152.GA7877@telsasoft.com

Backpatch to v10
---
 src/backend/statistics/dependencies.c   |  5 +++++
 src/backend/statistics/extended_stats.c |  5 +++++
 src/backend/utils/adt/selfuncs.c        |  9 +++++++++
 src/test/regress/expected/stats_ext.out | 23 +++++++++++++++++++++++
 src/test/regress/sql/stats_ext.sql      | 14 ++++++++++++++
 5 files changed, 56 insertions(+)

diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c
index 8bf80db8e4..015b9e28f6 100644
--- a/src/backend/statistics/dependencies.c
+++ b/src/backend/statistics/dependencies.c
@@ -1410,11 +1410,16 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 	int			ndependencies;
 	int			i;
 	AttrNumber	attnum_offset;
+	RangeTblEntry *rte = root->simple_rte_array[rel->relid];
 
 	/* unique expressions */
 	Node	  **unique_exprs;
 	int			unique_exprs_cnt;
 
+	/* If it's an inheritence tree, skip statistics (which do not include child stats) */
+	if (rte->inh)
+		return 1.0;
+
 	/* check if there's any stats that might be useful for us. */
 	if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
 		return 1.0;
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 69ca52094f..b9e755f44e 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -1694,6 +1694,11 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 	List	  **list_exprs;		/* expressions matched to any statistic */
 	int			listidx;
 	Selectivity sel = (is_or) ? 0.0 : 1.0;
+	RangeTblEntry *rte = root->simple_rte_array[rel->relid];
+
+	/* If it's an inheritence tree, skip statistics (which do not include child stats) */
+	if (rte->inh)
+		return sel;
 
 	/* check if there's any stats that might be useful for us. */
 	if (!has_stats_of_kind(rel->statlist, STATS_EXT_MCV))
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 10895fb287..a0932e39e1 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3913,6 +3913,11 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
 	Oid			statOid = InvalidOid;
 	MVNDistinct *stats;
 	StatisticExtInfo *matched_info = NULL;
+	RangeTblEntry		*rte = root->simple_rte_array[rel->relid];
+
+	/* If it's an inheritence tree, skip statistics (which do not include child stats) */
+	if (rte->inh)
+		return false;
 
 	/* bail out immediately if the table has no extended statistics */
 	if (!rel->statlist)
@@ -5232,6 +5237,10 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
 			if (vardata->statsTuple)
 				break;
 
+			/* If it's an inheritence tree, skip statistics (which do not include child stats) */
+			if (planner_rt_fetch(onerel->relid, root)->inh)
+				break;
+
 			/* skip stats without per-expression stats */
 			if (info->kind != STATS_EXT_EXPRESSIONS)
 				continue;
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index c60ba45aba..5c15e44bd6 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -176,6 +176,29 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
 ANALYZE ab1;
 DROP TABLE ab1 CASCADE;
 NOTICE:  drop cascades to table ab1c
+-- Ensure non-inherited stats are not applied to inherited query
+CREATE TABLE stxdinh(i int, j int);
+CREATE TABLE stxdinh1() INHERITS(stxdinh);
+INSERT INTO stxdinh SELECT a, a/10 FROM generate_series(1,9)a;
+INSERT INTO stxdinh1 SELECT a, a FROM generate_series(1,999)a;
+VACUUM ANALYZE stxdinh, stxdinh1;
+-- Without stats object, it looks like this
+SELECT * FROM check_estimated_rows('SELECT * FROM stxdinh* GROUP BY 1,2');
+ estimated | actual 
+-----------+--------
+      1000 |   1008
+(1 row)
+
+CREATE STATISTICS stxdinh ON i,j FROM stxdinh;
+VACUUM ANALYZE stxdinh, stxdinh1;
+-- Since the stats object does not include inherited stats, it should not affect the estimates
+SELECT * FROM check_estimated_rows('SELECT * FROM stxdinh* GROUP BY 1,2');
+ estimated | actual 
+-----------+--------
+      1000 |   1008
+(1 row)
+
+DROP TABLE stxdinh, stxdinh1;
 -- basic test for statistics on expressions
 CREATE TABLE ab1 (a INTEGER, b INTEGER, c TIMESTAMP, d TIMESTAMPTZ);
 -- expression stats may be built on a single expression column
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 6fb37962a7..610f7ed17f 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -112,6 +112,20 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
 ANALYZE ab1;
 DROP TABLE ab1 CASCADE;
 
+-- Ensure non-inherited stats are not applied to inherited query
+CREATE TABLE stxdinh(i int, j int);
+CREATE TABLE stxdinh1() INHERITS(stxdinh);
+INSERT INTO stxdinh SELECT a, a/10 FROM generate_series(1,9)a;
+INSERT INTO stxdinh1 SELECT a, a FROM generate_series(1,999)a;
+VACUUM ANALYZE stxdinh, stxdinh1;
+-- Without stats object, it looks like this
+SELECT * FROM check_estimated_rows('SELECT * FROM stxdinh* GROUP BY 1,2');
+CREATE STATISTICS stxdinh ON i,j FROM stxdinh;
+VACUUM ANALYZE stxdinh, stxdinh1;
+-- Since the stats object does not include inherited stats, it should not affect the estimates
+SELECT * FROM check_estimated_rows('SELECT * FROM stxdinh* GROUP BY 1,2');
+DROP TABLE stxdinh, stxdinh1;
+
 -- basic test for statistics on expressions
 CREATE TABLE ab1 (a INTEGER, b INTEGER, c TIMESTAMP, d TIMESTAMPTZ);
 
-- 
2.17.0

