From aef713df81c613a56855a8d46aac198090ab1a5d Mon Sep 17 00:00:00 2001
From: jian he <jian.universality@gmail.com>
Date: Tue, 13 Jan 2026 10:15:13 +0800
Subject: [PATCH v1 1/1] proper handling examine_variable RelabelType

discussion: https://postgr.es/m/
---
 src/backend/utils/adt/selfuncs.c              | 62 +++++++++++++++++--
 .../regress/expected/collate.icu.utf8.out     | 49 +++++++++++++++
 src/test/regress/sql/collate.icu.utf8.sql     | 32 ++++++++++
 3 files changed, 138 insertions(+), 5 deletions(-)

diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 29fec655593..d6a26c48384 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -5661,8 +5661,34 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
 	 * RelabelType nodes here, because the prior stripping of PlaceHolderVars
 	 * may have brought separate RelabelTypes into adjacency.
 	 */
-	while (IsA(basenode, RelabelType))
-		basenode = (Node *) ((RelabelType *) basenode)->arg;
+	if (IsA(basenode, RelabelType))
+	{
+		RelabelType *relabel = castNode(RelabelType, basenode);
+		Expr	   *expr = (Expr *) relabel;
+
+		while (expr && IsA(expr, RelabelType))
+			expr = ((RelabelType *) expr)->arg;
+
+		if (exprCollation((Node *) expr) == relabel->resultcollid)
+			basenode = (Node *) expr;
+		else
+		{
+			/*
+			 * A RelabelType is required when the input expression’s
+			 * collation differs from the resulting resultcollid.
+			 */
+			RelabelType *newrelabel = makeNode(RelabelType);
+
+			newrelabel->arg = (Expr *) expr;
+			newrelabel->resulttype = relabel->resulttype;
+			newrelabel->resulttypmod = relabel->resulttypmod;
+			newrelabel->resultcollid = relabel->resultcollid;
+			newrelabel->relabelformat = relabel->relabelformat;
+			newrelabel->location = relabel->location;
+
+			basenode = (Node *) newrelabel;
+		}
+	}
 
 	/* Fast path for a simple Var */
 	if (IsA(basenode, Var) &&
@@ -5909,9 +5935,35 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
 
 				Assert(expr);
 
-				/* strip RelabelType before comparing it */
-				if (expr && IsA(expr, RelabelType))
-					expr = (Node *) ((RelabelType *) expr)->arg;
+				if (IsA(expr, RelabelType))
+				{
+					RelabelType *relabel = castNode(RelabelType, expr);
+					Expr	   *rexpr = (Expr *) relabel;
+
+					while (rexpr && IsA(rexpr, RelabelType))
+						rexpr = ((RelabelType *) rexpr)->arg;
+
+					if (exprCollation((Node *) rexpr) == relabel->resultcollid)
+						expr = (Node *) rexpr;
+					else
+					{
+						/*
+						 * A RelabelType is required when the input
+						 * expression’s collation differs from the resulting
+						 * resultcollid.
+						 */
+						RelabelType *newrelabel = makeNode(RelabelType);
+
+						newrelabel->arg = (Expr *) rexpr;
+						newrelabel->resulttype = relabel->resulttype;
+						newrelabel->resulttypmod = relabel->resulttypmod;
+						newrelabel->resultcollid = relabel->resultcollid;
+						newrelabel->relabelformat = relabel->relabelformat;
+						newrelabel->location = relabel->location;
+
+						expr = (Node *) newrelabel;
+					}
+				}
 
 				/* found a match, see if we can extract pg_statistic row */
 				if (equal(node, expr))
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index 1325e123877..263e7ce8ae4 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -11,6 +11,25 @@ SELECT getdatabaseencoding() <> 'UTF8' OR
 SET client_encoding TO UTF8;
 CREATE SCHEMA collate_tests;
 SET search_path = collate_tests;
+create function check_estimated_rows(text) returns table (estimated int, actual int)
+language plpgsql as
+$$
+declare
+    ln text;
+    tmp text[];
+    first_row bool := true;
+begin
+    for ln in
+        execute format('explain analyze %s', $1)
+    loop
+        if first_row then
+            first_row := false;
+            tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
+            return query select tmp[1]::int, tmp[2]::int;
+        end if;
+    end loop;
+end;
+$$;
 CREATE TABLE collate_test1 (
     a int,
     b text COLLATE "en-x-icu" NOT NULL
@@ -1461,6 +1480,36 @@ CREATE COLLATION case_sensitive (provider = icu, locale = '');
 NOTICE:  using standard form "und" for ICU locale ""
 CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=secondary', deterministic = false);
 NOTICE:  using standard form "und-u-ks-level2" for ICU locale "@colStrength=secondary"
+CREATE TABLE test_stats_ext_coll (a text, b text, c int);
+INSERT INTO test_stats_ext_coll SELECT chr(65 + g % 52) FROM generate_series(1, 500) g;
+CREATE STATISTICS test_stats_ext_coll_stx ON (a COLLATE case_insensitive) FROM test_stats_ext_coll;
+ANALYZE test_stats_ext_coll;
+SELECT * FROM check_estimated_rows ($$ SELECT count(*) FROM test_stats_ext_coll GROUP BY a COLLATE "C" $$);
+ estimated | actual 
+-----------+--------
+        52 |     52
+(1 row)
+
+SELECT * FROM check_estimated_rows ($$SELECT count(*) FROM test_stats_ext_coll GROUP BY a COLLATE "POSIX" $$);
+ estimated | actual 
+-----------+--------
+        52 |     52
+(1 row)
+
+SELECT * FROM check_estimated_rows ($$SELECT count(*) FROM test_stats_ext_coll GROUP BY a COLLATE case_insensitive $$);
+ estimated | actual 
+-----------+--------
+        32 |     32
+(1 row)
+
+SELECT * FROM check_estimated_rows ($$SELECT count(*) FROM test_stats_ext_coll GROUP BY a $$);
+ estimated | actual 
+-----------+--------
+        52 |     52
+(1 row)
+
+DROP FUNCTION check_estimated_rows;
+DROP TABLE test_stats_ext_coll;
 SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
  ?column? | ?column? 
 ----------+----------
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index b6c54503d21..b5c22bebfa6 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -16,6 +16,26 @@ CREATE SCHEMA collate_tests;
 SET search_path = collate_tests;
 
 
+create function check_estimated_rows(text) returns table (estimated int, actual int)
+language plpgsql as
+$$
+declare
+    ln text;
+    tmp text[];
+    first_row bool := true;
+begin
+    for ln in
+        execute format('explain analyze %s', $1)
+    loop
+        if first_row then
+            first_row := false;
+            tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
+            return query select tmp[1]::int, tmp[2]::int;
+        end if;
+    end loop;
+end;
+$$;
+
 CREATE TABLE collate_test1 (
     a int,
     b text COLLATE "en-x-icu" NOT NULL
@@ -561,6 +581,18 @@ SELECT * FROM test6a WHERE b = ARRAY['äbc'] COLLATE ctest_nondet;
 CREATE COLLATION case_sensitive (provider = icu, locale = '');
 CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=secondary', deterministic = false);
 
+CREATE TABLE test_stats_ext_coll (a text, b text, c int);
+INSERT INTO test_stats_ext_coll SELECT chr(65 + g % 52) FROM generate_series(1, 500) g;
+CREATE STATISTICS test_stats_ext_coll_stx ON (a COLLATE case_insensitive) FROM test_stats_ext_coll;
+ANALYZE test_stats_ext_coll;
+
+SELECT * FROM check_estimated_rows ($$ SELECT count(*) FROM test_stats_ext_coll GROUP BY a COLLATE "C" $$);
+SELECT * FROM check_estimated_rows ($$SELECT count(*) FROM test_stats_ext_coll GROUP BY a COLLATE "POSIX" $$);
+SELECT * FROM check_estimated_rows ($$SELECT count(*) FROM test_stats_ext_coll GROUP BY a COLLATE case_insensitive $$);
+SELECT * FROM check_estimated_rows ($$SELECT count(*) FROM test_stats_ext_coll GROUP BY a $$);
+DROP FUNCTION check_estimated_rows;
+DROP TABLE test_stats_ext_coll;
+
 SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
 SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
 
-- 
2.34.1