| From: | Feng Wu <wufengwufengwufeng(at)gmail(dot)com> |
|---|---|
| To: | pgsql-hackers(at)lists(dot)postgresql(dot)org |
| Subject: | [PATCH] Avoid collation lookup for "char" statistics |
| Date: | 2026-06-27 02:08:59 |
| Message-ID: | CACK3muq6s-O1Wc3w4dRL1Fe8YQ-Fz1zJbezeQwhuLgNxGNEFiA@mail.gmail.com |
| Views: | Whole Thread | Raw Message | Download mbox | Resend email |
| Thread: | |
| Lists: | pgsql-hackers |
From e9acd79085a88981f800e982f65e373167828359 Mon Sep 17 00:00:00 2001
From: Feng Wu <wufengwufengwufeng(at)gmail(dot)com>
Date: Sat, 27 Jun 2026 10:06:37 +0800
Subject: [PATCH] Avoid collation lookup for "char" statistics
mergejoinscansel() can estimate merge join scan fractions from histogram
statistics via scalarineqsel(). When the join key has the internal
"char" type, convert_to_scalar() treats the histogram values as
string-like values. convert_string_datum() then attempted to look up
the input collation.
The internal "char" type is not collatable, and its btree operators
order the byte value directly. Return the one-byte string for CHAROID
without consulting collation state, avoiding a lookup of InvalidOid as
collation 0.
Add a regression test that exercises merge join selectivity estimation
with histogram statistics on "char" columns.
---
src/backend/utils/adt/selfuncs.c | 7 ++++++
src/test/regress/expected/join.out | 38 ++++++++++++++++++++++++++++++
src/test/regress/sql/join.sql | 21 +++++++++++++++++
3 files changed, 66 insertions(+)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index d6efd070..7318dda8 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -5295,6 +5295,13 @@ convert_string_datum(Datum value, Oid typid,
Oid collid, bool *failure)
return NULL;
}
+ /*
+ * The internal "char" type is not collatable, so the byte value above is
+ * already in the comparison order used by its btree operators.
+ */
+ if (typid == CHAROID)
+ return val;
+
mylocale = pg_newlocale_from_collation(collid);
if (!mylocale->collate_is_c)
diff --git a/src/test/regress/expected/join.out
b/src/test/regress/expected/join.out
index ed946abe..98355dd0 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -10165,3 +10165,41 @@ SELECT COUNT(*) FROM onek t1 LEFT JOIN tenk1 t2
19000
(1 row)
+-- Test merge join selectivity estimation with non-collatable "char" stats.
+BEGIN;
+CREATE TEMP TABLE char_stats_1 (c "char");
+CREATE TEMP TABLE char_stats_2 (c "char");
+INSERT INTO char_stats_1
+SELECT v::"char"
+FROM unnest(array['I','S','c','i','m','p','r','t','v']) AS v,
+ generate_series(1, CASE WHEN v IN ('i','v','r','t') THEN 50 ELSE 1 END);
+INSERT INTO char_stats_2
+SELECT v::"char"
+FROM unnest(array['a','e','i']) AS v,
+ generate_series(1, CASE WHEN v = 'i' THEN 50 ELSE 5 END);
+ANALYZE char_stats_1;
+ANALYZE char_stats_2;
+SET LOCAL enable_hashjoin = off;
+SET LOCAL enable_nestloop = off;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM char_stats_1 s1 JOIN char_stats_2 s2 ON s1.c = s2.c;
+ QUERY PLAN
+-----------------------------------------------
+ Aggregate
+ -> Merge Join
+ Merge Cond: (s2.c = s1.c)
+ -> Sort
+ Sort Key: s2.c
+ -> Seq Scan on char_stats_2 s2
+ -> Sort
+ Sort Key: s1.c
+ -> Seq Scan on char_stats_1 s1
+(9 rows)
+
+SELECT count(*) FROM char_stats_1 s1 JOIN char_stats_2 s2 ON s1.c = s2.c;
+ count
+-------
+ 2500
+(1 row)
+
+ROLLBACK;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 78f7b4f5..c405480b 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -3903,3 +3903,24 @@ SELECT COUNT(*) FROM onek t1 LEFT JOIN tenk1 t2
ON (t2.thousand = t1.tenthous OR t2.thousand = t1.thousand);
SELECT COUNT(*) FROM onek t1 LEFT JOIN tenk1 t2
ON (t2.thousand = t1.tenthous OR t2.thousand = t1.thousand);
+
+-- Test merge join selectivity estimation with non-collatable "char" stats.
+BEGIN;
+CREATE TEMP TABLE char_stats_1 (c "char");
+CREATE TEMP TABLE char_stats_2 (c "char");
+INSERT INTO char_stats_1
+SELECT v::"char"
+FROM unnest(array['I','S','c','i','m','p','r','t','v']) AS v,
+ generate_series(1, CASE WHEN v IN ('i','v','r','t') THEN 50 ELSE 1 END);
+INSERT INTO char_stats_2
+SELECT v::"char"
+FROM unnest(array['a','e','i']) AS v,
+ generate_series(1, CASE WHEN v = 'i' THEN 50 ELSE 5 END);
+ANALYZE char_stats_1;
+ANALYZE char_stats_2;
+SET LOCAL enable_hashjoin = off;
+SET LOCAL enable_nestloop = off;
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM char_stats_1 s1 JOIN char_stats_2 s2 ON s1.c = s2.c;
+SELECT count(*) FROM char_stats_1 s1 JOIN char_stats_2 s2 ON s1.c = s2.c;
+ROLLBACK;
--
2.53.0
| From | Date | Subject | |
|---|---|---|---|
| Next Message | Michael Paquier | 2026-06-27 02:09:57 | Re: Handle concurrent drop when doing whole database vacuum |
| Previous Message | Xuneng Zhou | 2026-06-27 01:56:16 | Re: 048_vacuum_horizon_floor.pl hangs due to wakeup lost inside LockBufferForCleanup |