From 3bdd48382fdcf0bd8eaaa6a328cde8a47eeee1ac Mon Sep 17 00:00:00 2001
From: Enrique Sanchez Cardoso <enriqueesanchz@gmail.com>
Date: Sun, 24 May 2026 13:32:24 +0200
Subject: [PATCH 2/4] Add support for IN/ANY clauses in multi-column MCV cap

Extend the MCV-based cap to handle IN and ANY clauses, allowing the
selectivity cap to apply when these clauses are used alongside equality
filters.
---
 src/backend/statistics/mcv.c            | 60 +++++++++++++++++++------
 src/test/regress/expected/stats_ext.out |  6 +--
 src/test/regress/sql/stats_ext.sql      |  4 +-
 3 files changed, 52 insertions(+), 18 deletions(-)

diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index df70d00cc3d..2e75f19d8bd 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -1525,14 +1525,19 @@ pg_mcv_list_send(PG_FUNCTION_ARGS)
 }
 
 /*
- * mcv_is_all_equality_clauses
- *		Check if all clauses are simple equality conditions (OpExpr with eqsel
- *		restriction estimator).  This mirrors the check done by
- *		dependency_is_compatible_clause() in dependencies.c.
+ * mcv_cap_multiplier
+ *		Compute a multiplier for capping combined selectivity to the least
+ *		common MCV frequency when no MCV items matched.
+ *
+ * Returns 0 if the cap should not be applied (unsupported clause types).
+ * Returns >= 1 as the number of distinct value combinations the clauses
+ * could match: 1 for each equality clause, N for each IN/ANY clause with
+ * N elements.
  */
-static bool
-mcv_is_all_equality_clauses(List *clauses)
+static int64
+mcv_cap_multiplier(List *clauses)
 {
+	int64		multiplier = 1;
 	ListCell   *lc;
 
 	foreach(lc, clauses)
@@ -1542,12 +1547,34 @@ mcv_is_all_equality_clauses(List *clauses)
 		if (IsA(clause, RestrictInfo))
 			clause = (Node *) ((RestrictInfo *) clause)->clause;
 
-		if (!is_opclause(clause) ||
-			get_oprrest(((OpExpr *) clause)->opno) != F_EQSEL)
-			return false;
+		if (is_opclause(clause))
+		{
+			/* Simple equality: factor 1 */
+			if (get_oprrest(((OpExpr *) clause)->opno) != F_EQSEL)
+				return 0;
+		}
+		else if (IsA(clause, ScalarArrayOpExpr))
+		{
+			ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
+			Node	   *arg;
+			ArrayType  *arr;
+
+			/* Only ANY/IN with equality operator */
+			if (!saop->useOr || get_oprrest(saop->opno) != F_EQSEL)
+				return 0;
+
+			arg = (Node *) lsecond(saop->args);
+			if (!IsA(arg, Const) || ((Const *) arg)->constisnull)
+				return 0;
+
+			arr = DatumGetArrayTypeP(((Const *) arg)->constvalue);
+			multiplier *= ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
+		}
+		else
+			return 0;			/* unsupported clause type */
 	}
 
-	return true;
+	return multiplier;
 }
 
 /*
@@ -2115,9 +2142,16 @@ mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,
 	 * combination is not among the most common, so it can't be more frequent
 	 * than the least common tracked combination.
 	 */
-	if (s == 0.0 && mcv->ndimensions == list_length(clauses) &&
-		mcv_is_all_equality_clauses(clauses))
-		*cap = mcv->items[mcv->nitems - 1].frequency;
+	if (s == 0.0 && mcv->ndimensions == list_length(clauses))
+	{
+		int64		cap_mult = mcv_cap_multiplier(clauses);
+
+		if (cap_mult > 0)
+		{
+			*cap = cap_mult * mcv->items[mcv->nitems - 1].frequency;
+			CLAMP_PROBABILITY(*cap);
+		}
+	}
 
 	return s;
 }
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 1ca26669bb1..82ad9edb568 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -2965,18 +2965,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_cap WHERE a = 0 AND b = 0'
        100 |      0
 (1 row)
 
--- IN/ANY equality clauses are not supported, partial MCV match (a=0, b=99)
+-- partial MCV match (a=0, b=99)
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_cap WHERE a = 0 AND b IN (0, 99)');
  estimated | actual 
 -----------+--------
       5050 |    100
 (1 row)
 
--- IN/ANY equality clauses are not supported, no MCV match
+-- no MCV match
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_cap WHERE a = 0 AND b IN (0, 100)');
  estimated | actual 
 -----------+--------
-      4950 |      0
+       200 |      0
 (1 row)
 
 DROP TABLE mcv_cap;
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 0f67363cd6d..8e0b8c0eb5c 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -1491,10 +1491,10 @@ ANALYZE mcv_cap;
 -- with MCV statistics: bounded by least MCV frequency
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_cap WHERE a = 0 AND b = 0');
 
--- IN/ANY equality clauses are not supported, partial MCV match (a=0, b=99)
+-- partial MCV match (a=0, b=99)
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_cap WHERE a = 0 AND b IN (0, 99)');
 
--- IN/ANY equality clauses are not supported, no MCV match
+-- no MCV match
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_cap WHERE a = 0 AND b IN (0, 100)');
 
 DROP TABLE mcv_cap;
-- 
2.43.0

