From b8f9e453b6cd05718a7a149f7e472d6c2c28c8a6 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sat, 25 Sep 2021 19:42:41 -0500
Subject: [PATCH 1/5] Do not use extended statistics on inheritence trees..

Since 859b3003de, inherited ext stats are not built.
However, the non-inherited stats stats were incorrectly used during planning of
queries with inheritence heirarchies.

Since the ext stats do not include child tables, they can lead to worse
estimates.

choose_best_statistics is handled a bit differently (in the calling function),
because it isn't passed rel nor rel->inh, and it's an exported function, so
avoid changing its signature in back branches.

https://www.postgresql.org/message-id/flat/20210925223152.GA7877@telsasoft.com

Backpatch to v10
---
 src/backend/statistics/dependencies.c   |  5 +++++
 src/backend/statistics/extended_stats.c |  5 +++++
 src/backend/utils/adt/selfuncs.c        |  9 +++++++++
 src/test/regress/expected/stats_ext.out | 23 +++++++++++++++++++++++
 src/test/regress/sql/stats_ext.sql      | 14 ++++++++++++++
 5 files changed, 56 insertions(+)

diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c
index 8bf80db8e4..b2e33329c7 100644
--- a/src/backend/statistics/dependencies.c
+++ b/src/backend/statistics/dependencies.c
@@ -1593,6 +1593,11 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 		int			nexprs;
 		int			k;
 		MVDependencies *deps;
+		RangeTblEntry	*rte = root->simple_rte_array[rel->relid];
+
+		/* If it's an inheritence tree, skip statistics (which do not include child stats) */
+		if (rte->inh)
+			break;
 
 		/* skip statistics that are not of the correct type */
 		if (stat->kind != STATS_EXT_DEPENDENCIES)
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 0a7d12d467..eea38a5bc7 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -1744,6 +1744,11 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 		StatisticExtInfo *stat;
 		List	   *stat_clauses;
 		Bitmapset  *simple_clauses;
+		RangeTblEntry *rte = root->simple_rte_array[rel->relid];
+
+		/* If it's an inheritence tree, skip statistics (which do not include child stats) */
+		if (rte->inh)
+			break;
 
 		/* find the best suited statistics object for these attnums */
 		stat = choose_best_statistics(rel->statlist, STATS_EXT_MCV,
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index abcb628a39..7533574fdc 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3913,6 +3913,11 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
 	Oid			statOid = InvalidOid;
 	MVNDistinct *stats;
 	StatisticExtInfo *matched_info = NULL;
+	RangeTblEntry		*rte = root->simple_rte_array[rel->relid];
+
+	/* If it's an inheritence tree, skip statistics (which do not include child stats) */
+	if (rte->inh)
+		return false;
 
 	/* bail out immediately if the table has no extended statistics */
 	if (!rel->statlist)
@@ -5232,6 +5237,10 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
 			if (vardata->statsTuple)
 				break;
 
+			/* If it's an inheritence tree, skip statistics (which do not include child stats) */
+			if (planner_rt_fetch(onerel->relid, root)->inh)
+				break;
+
 			/* skip stats without per-expression stats */
 			if (info->kind != STATS_EXT_EXPRESSIONS)
 				continue;
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index c60ba45aba..5c15e44bd6 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -176,6 +176,29 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
 ANALYZE ab1;
 DROP TABLE ab1 CASCADE;
 NOTICE:  drop cascades to table ab1c
+-- Ensure non-inherited stats are not applied to inherited query
+CREATE TABLE stxdinh(i int, j int);
+CREATE TABLE stxdinh1() INHERITS(stxdinh);
+INSERT INTO stxdinh SELECT a, a/10 FROM generate_series(1,9)a;
+INSERT INTO stxdinh1 SELECT a, a FROM generate_series(1,999)a;
+VACUUM ANALYZE stxdinh, stxdinh1;
+-- Without stats object, it looks like this
+SELECT * FROM check_estimated_rows('SELECT * FROM stxdinh* GROUP BY 1,2');
+ estimated | actual 
+-----------+--------
+      1000 |   1008
+(1 row)
+
+CREATE STATISTICS stxdinh ON i,j FROM stxdinh;
+VACUUM ANALYZE stxdinh, stxdinh1;
+-- Since the stats object does not include inherited stats, it should not affect the estimates
+SELECT * FROM check_estimated_rows('SELECT * FROM stxdinh* GROUP BY 1,2');
+ estimated | actual 
+-----------+--------
+      1000 |   1008
+(1 row)
+
+DROP TABLE stxdinh, stxdinh1;
 -- basic test for statistics on expressions
 CREATE TABLE ab1 (a INTEGER, b INTEGER, c TIMESTAMP, d TIMESTAMPTZ);
 -- expression stats may be built on a single expression column
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 6fb37962a7..610f7ed17f 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -112,6 +112,20 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
 ANALYZE ab1;
 DROP TABLE ab1 CASCADE;
 
+-- Ensure non-inherited stats are not applied to inherited query
+CREATE TABLE stxdinh(i int, j int);
+CREATE TABLE stxdinh1() INHERITS(stxdinh);
+INSERT INTO stxdinh SELECT a, a/10 FROM generate_series(1,9)a;
+INSERT INTO stxdinh1 SELECT a, a FROM generate_series(1,999)a;
+VACUUM ANALYZE stxdinh, stxdinh1;
+-- Without stats object, it looks like this
+SELECT * FROM check_estimated_rows('SELECT * FROM stxdinh* GROUP BY 1,2');
+CREATE STATISTICS stxdinh ON i,j FROM stxdinh;
+VACUUM ANALYZE stxdinh, stxdinh1;
+-- Since the stats object does not include inherited stats, it should not affect the estimates
+SELECT * FROM check_estimated_rows('SELECT * FROM stxdinh* GROUP BY 1,2');
+DROP TABLE stxdinh, stxdinh1;
+
 -- basic test for statistics on expressions
 CREATE TABLE ab1 (a INTEGER, b INTEGER, c TIMESTAMP, d TIMESTAMPTZ);
 
-- 
2.17.0

