From 327d6bf7e01d98332e562ad6694ad7445023b6ce Mon Sep 17 00:00:00 2001 From: "Andrei V. Lepikhov" Date: Fri, 5 Jun 2026 11:07:21 +0200 Subject: [PATCH v0] Show hashed ScalarArrayOpExpr decision in EXPLAIN When the planner converts a ScalarArrayOpExpr to hash-table evaluation (convert_saop_to_hashed_saop), the resulting node deparses identically to a linear one, so EXPLAIN gives no indication of which strategy the executor will use. That made the hashed path invisible and awkward to test. Mark the hashed case in ruleutils.c by emitting "hashed" before ANY/ALL when the node's hashfuncid is set. Since hashfuncid is filled in only for a finished plan tree, this never appears in deparsed views, rules, or other stored expressions. Now that the choice is visible, add regression tests covering when a hashed ScalarArrayOpExpr is and is not selected. These also backfill the regression coverage omitted by commit 06e94eccfd9, which taught the planner to re-check hashability of container-type equality (record, array, range and multirange) but added no tests. --- src/backend/utils/adt/ruleutils.c | 10 ++++++- src/test/regress/expected/expressions.out | 19 ++++++++++++ src/test/regress/expected/rangetypes.out | 12 ++++++++ src/test/regress/expected/rowtypes.out | 36 +++++++++++++++++++++++ src/test/regress/sql/expressions.sql | 8 +++++ src/test/regress/sql/rangetypes.sql | 7 +++++ src/test/regress/sql/rowtypes.sql | 20 +++++++++++++ 7 files changed, 111 insertions(+), 1 deletion(-) diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 88de5c0481c..0567ba0886a 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -9989,10 +9989,18 @@ get_rule_expr(Node *node, deparse_context *context, if (!PRETTY_PAREN(context)) appendStringInfoChar(buf, '('); get_rule_expr_paren(arg1, context, true, node); - appendStringInfo(buf, " %s %s (", + + /* + * Surface hashed decision in EXPLAIN. + * hashfuncid is only ever set in a finished plan tree, so this + * never appears in deparsed views, rules, or other stored + * expressions. + */ + appendStringInfo(buf, " %s %s%s (", generate_operator_name(expr->opno, exprType(arg1), get_base_element_type(exprType(arg2))), + OidIsValid(expr->hashfuncid) ? "hashed " : "", expr->useOr ? "ANY" : "ALL"); get_rule_expr_paren(arg2, context, true, node); diff --git a/src/test/regress/expected/expressions.out b/src/test/regress/expected/expressions.out index 730f7bc7eba..32f58fc7268 100644 --- a/src/test/regress/expected/expressions.out +++ b/src/test/regress/expected/expressions.out @@ -327,6 +327,25 @@ select return_text_input('a') not in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i f (1 row) +-- Check that EXPLAIN marks the hashed decision. +explain (verbose, costs off) +select return_int_input(1) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1); + QUERY PLAN +------------------------------------------------------------------------------------ + Result + Output: (return_int_input(1) = hashed ANY ('{10,9,2,8,3,7,4,6,5,1}'::integer[])) +(2 rows) + +-- A cross-type "=" is legal and hash-registered (int4 = int8), but the two input +-- types have different hash functions, so it cannot be hashed +explain (verbose, costs off) +select return_int_input(1) = any (array[1, 2, 3, 4, 5, 6, 7, 8, 9]::int8[]); + QUERY PLAN +------------------------------------------------------------------------- + Result + Output: (return_int_input(1) = ANY ('{1,2,3,4,5,6,7,8,9}'::bigint[])) +(2 rows) + rollback; -- Test with non-strict equality function. -- We need to create our own type for this. diff --git a/src/test/regress/expected/rangetypes.out b/src/test/regress/expected/rangetypes.out index e062a4e5c2c..cc4a828bf27 100644 --- a/src/test/regress/expected/rangetypes.out +++ b/src/test/regress/expected/rangetypes.out @@ -677,6 +677,18 @@ select * from numrange_test2 where nr = numrange(1.1, 2.3); ---- (0 rows) +-- numrange's subtype (numeric) is hashable, so NOT IN uses hashed evaluation +explain (costs off) +select * from numrange_test2 +where nr not in (numrange(1,2), numrange(2,3), numrange(3,4), numrange(4,5), + numrange(5,6), numrange(6,7), numrange(7,8), numrange(8,9), + numrange(9,10)); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------- + Seq Scan on numrange_test2 + Filter: (nr <> hashed ALL ('{"[1,2)","[2,3)","[3,4)","[4,5)","[5,6)","[6,7)","[7,8)","[8,9)","[9,10)"}'::numrange[])) +(2 rows) + set enable_nestloop=t; set enable_hashjoin=f; set enable_mergejoin=f; diff --git a/src/test/regress/expected/rowtypes.out b/src/test/regress/expected/rowtypes.out index 956bc2d02fc..f1af3eb4e27 100644 --- a/src/test/regress/expected/rowtypes.out +++ b/src/test/regress/expected/rowtypes.out @@ -482,6 +482,42 @@ where i8 in (row(123,456)::int8_tbl, '(4567890123456789,123)'); 4567890123456789 | 123 (2 rows) +-- A hashed SAOP is allowed when each column of the record is hashable +explain (verbose, costs off) +select * from int8_tbl i8 +where i8 = any (array[(1,2),(3,4),(5,6),(7,8),(9,10), + (11,12),(13,14),(15,16),(17,18)]::int8_tbl[]); + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------- + Seq Scan on public.int8_tbl i8 + Output: q1, q2 + Filter: (i8.* = hashed ANY ('{"(1,2)","(3,4)","(5,6)","(7,8)","(9,10)","(11,12)","(13,14)","(15,16)","(17,18)"}'::int8_tbl[])) +(3 rows) + +-- tsvector is comparable but not hashable, so this SAOP must not be hashed +create temp table hash_ts_row (a int, b tsvector); +explain (costs off) +select * from hash_ts_row t +where t = any (array['(1,w1)','(2,w2)','(3,w3)','(4,w4)','(5,w5)', + '(6,w6)','(7,w7)','(8,w8)','(9,w9)']::hash_ts_row[]); + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------- + Seq Scan on hash_ts_row t + Filter: (t.* = ANY ('{"(1,''w1'')","(2,''w2'')","(3,''w3'')","(4,''w4'')","(5,''w5'')","(6,''w6'')","(7,''w7'')","(8,''w8'')","(9,''w9'')"}'::hash_ts_row[])) +(2 rows) + +drop table hash_ts_row; +-- An anonymous record's column types can't be checked for hashing at plan time +explain (costs off) +select * from int8_tbl i8 +where (q1, q2) = any (array[(1,2),(3,4),(5,6),(7,8),(9,10), + (11,12),(13,14),(15,16),(17,18)]); + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------- + Seq Scan on int8_tbl i8 + Filter: (ROW(q1, q2) = ANY ('{"(1,2)","(3,4)","(5,6)","(7,8)","(9,10)","(11,12)","(13,14)","(15,16)","(17,18)"}'::record[])) +(2 rows) + -- Check ability to select columns from an anonymous rowtype select (row(1, 2.0)).f1; f1 diff --git a/src/test/regress/sql/expressions.sql b/src/test/regress/sql/expressions.sql index 3b3048f9731..1b0730754f0 100644 --- a/src/test/regress/sql/expressions.sql +++ b/src/test/regress/sql/expressions.sql @@ -131,6 +131,14 @@ select return_int_input(1) not in (null, null, null, null, null, null, null, nul select return_int_input(null::int) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1); select return_int_input(null::int) not in (10, 9, 2, 8, 3, 7, 4, 6, 5, null); select return_text_input('a') not in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'); +-- Check that EXPLAIN marks the hashed decision. +explain (verbose, costs off) +select return_int_input(1) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1); + +-- A cross-type "=" is legal and hash-registered (int4 = int8), but the two input +-- types have different hash functions, so it cannot be hashed +explain (verbose, costs off) +select return_int_input(1) = any (array[1, 2, 3, 4, 5, 6, 7, 8, 9]::int8[]); rollback; diff --git a/src/test/regress/sql/rangetypes.sql b/src/test/regress/sql/rangetypes.sql index 5c4b0337b7a..8d3c89defa4 100644 --- a/src/test/regress/sql/rangetypes.sql +++ b/src/test/regress/sql/rangetypes.sql @@ -158,6 +158,13 @@ select * from numrange_test2 where nr = 'empty'::numrange; select * from numrange_test2 where nr = numrange(1.1, 2.2); select * from numrange_test2 where nr = numrange(1.1, 2.3); +-- numrange's subtype (numeric) is hashable, so NOT IN uses hashed evaluation +explain (costs off) +select * from numrange_test2 +where nr not in (numrange(1,2), numrange(2,3), numrange(3,4), numrange(4,5), + numrange(5,6), numrange(6,7), numrange(7,8), numrange(8,9), + numrange(9,10)); + set enable_nestloop=t; set enable_hashjoin=f; set enable_mergejoin=f; diff --git a/src/test/regress/sql/rowtypes.sql b/src/test/regress/sql/rowtypes.sql index 174b062144a..d4263b11457 100644 --- a/src/test/regress/sql/rowtypes.sql +++ b/src/test/regress/sql/rowtypes.sql @@ -187,6 +187,26 @@ where i8 in (row(123,456)::int8_tbl, '(4567890123456789,123)'); select * from int8_tbl i8 where i8 in (row(123,456)::int8_tbl, '(4567890123456789,123)'); +-- A hashed SAOP is allowed when each column of the record is hashable +explain (verbose, costs off) +select * from int8_tbl i8 +where i8 = any (array[(1,2),(3,4),(5,6),(7,8),(9,10), + (11,12),(13,14),(15,16),(17,18)]::int8_tbl[]); + +-- tsvector is comparable but not hashable, so this SAOP must not be hashed +create temp table hash_ts_row (a int, b tsvector); +explain (costs off) +select * from hash_ts_row t +where t = any (array['(1,w1)','(2,w2)','(3,w3)','(4,w4)','(5,w5)', + '(6,w6)','(7,w7)','(8,w8)','(9,w9)']::hash_ts_row[]); +drop table hash_ts_row; + +-- An anonymous record's column types can't be checked for hashing at plan time +explain (costs off) +select * from int8_tbl i8 +where (q1, q2) = any (array[(1,2),(3,4),(5,6),(7,8),(9,10), + (11,12),(13,14),(15,16),(17,18)]); + -- Check ability to select columns from an anonymous rowtype select (row(1, 2.0)).f1; select (row(1, 2.0)).f2; -- 2.54.0