From d81d95a0fc08df30a71a33bcc992066d8f95a1ca Mon Sep 17 00:00:00 2001 From: Andrei Lepikhov Date: Fri, 5 Jun 2026 07:11:54 +0000 Subject: [PATCH v0 1/3] Don't hash a record/array SAOP whose input type isn't hashable convert_saop_to_hashed_saop_walker() decided to evaluate a ScalarArrayOpExpr with a hash table whenever get_op_hash_functions() reported the operator as hashable. That function only performs a pg_amop membership test, and since 01e658fa74c added hash/record_ops, it returns true for record_eq unconditionally. But record_eq (like array_eq) is only actually hashable when every column/element type has a hash function. For a composite type with a non-hashable column type (e.g. tsvector) and an array of at least MIN_ARRAY_SIZE_FOR_HASHED_SAOP constant elements, the planner therefore enabled hashing and the executor failed at runtime. This is the same trap that hash_ok_operator() fell into and that 17da9d4c282 fixed for subplan hashing. Give the SAOP planner gate the same treatment. --- src/backend/optimizer/util/clauses.c | 26 +++++++++++++++++++++-- src/backend/utils/cache/lsyscache.c | 6 +++--- src/test/regress/expected/expressions.out | 25 ++++++++++++++++++++++ src/test/regress/sql/expressions.sql | 20 +++++++++++++++++ 4 files changed, 72 insertions(+), 5 deletions(-) diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index cd86311bb0b..d44f674dc76 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -2535,6 +2535,24 @@ convert_saop_to_hashed_saop(Node *node) (void) convert_saop_to_hashed_saop_walker(node, NULL); } +/* + * saop_hashable_for_type + * Can a hashed ScalarArrayOpExpr safely use equality operator 'eqop' + * for left-hand input type 'lefttype'? + * + * get_op_hash_functions() reports record_eq and array_eq as hashable + * unconditionally. But hashability actually depends on the specific input + * type: every column/element type must itself be hashable. Re-check such + * operators through op_hashjoinable(). + */ +static bool +saop_hashable_for_type(Oid eqop, Oid lefttype) +{ + if (eqop == RECORD_EQ_OP || eqop == ARRAY_EQ_OP) + return op_hashjoinable(eqop, lefttype); + return true; +} + static bool convert_saop_to_hashed_saop_walker(Node *node, void *context) { @@ -2554,7 +2572,9 @@ convert_saop_to_hashed_saop_walker(Node *node, void *context) if (saop->useOr) { if (get_op_hash_functions(saop->opno, &lefthashfunc, &righthashfunc) && - lefthashfunc == righthashfunc) + lefthashfunc == righthashfunc && + saop_hashable_for_type(saop->opno, + exprType(linitial(saop->args)))) { Datum arrdatum = ((Const *) arrayarg)->constvalue; ArrayType *arr = (ArrayType *) DatumGetPointer(arrdatum); @@ -2586,7 +2606,9 @@ convert_saop_to_hashed_saop_walker(Node *node, void *context) */ if (OidIsValid(negator) && get_op_hash_functions(negator, &lefthashfunc, &righthashfunc) && - lefthashfunc == righthashfunc) + lefthashfunc == righthashfunc && + saop_hashable_for_type(negator, + exprType(linitial(saop->args)))) { Datum arrdatum = ((Const *) arrayarg)->constvalue; ArrayType *arr = (ArrayType *) DatumGetPointer(arrdatum); diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 036086057d7..3c7fecf526d 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -1659,9 +1659,9 @@ op_mergejoinable(Oid opno, Oid inputtype) * Returns true if the operator is hashjoinable. (There must be a suitable * hash opfamily entry for this operator if it is so marked.) * - * In some cases (currently only array_eq), hashjoinability depends on the - * specific input data type the operator is invoked for, so that must be - * passed as well. We currently assume that only one input's type is needed + * In some cases (currently array_eq and record_eq), hashjoinability depends + * on the specific input data type the operator is invoked for, so that must + * be passed as well. We currently assume that only one input's type is needed * to check this --- by convention, pass the left input's data type. */ bool diff --git a/src/test/regress/expected/expressions.out b/src/test/regress/expected/expressions.out index 730f7bc7eba..c35583cb2ea 100644 --- a/src/test/regress/expected/expressions.out +++ b/src/test/regress/expected/expressions.out @@ -561,3 +561,28 @@ from inttest; (3 rows) rollback; +-- +-- Hashed SAOP must not be selected for a composite type that is not actually +-- hashable. +-- +CREATE TABLE hashed_saop_tbl (a int, b tsvector); +INSERT INTO hashed_saop_tbl + SELECT g, ('w' || g)::tsvector FROM generate_series(1, 1000) g; +ANALYZE hashed_saop_tbl; +-- Throws an ERROR if the hashed strategy has been chosen +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) +SELECT count(*) FROM hashed_saop_tbl +WHERE (a,b) = ANY (ARRAY[ + (1, 'w1'::tsvector), (2, 'w2'::tsvector), (3, 'w3'::tsvector), + (4, 'w4'::tsvector), (5, 'w5'::tsvector), (6, 'w6'::tsvector), + (7, 'w7'::tsvector), (8, 'w8'::tsvector), (9, 'w9'::tsvector) + ]); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Aggregate (actual rows=1.00 loops=1) + -> Seq Scan on hashed_saop_tbl (actual rows=9.00 loops=1) + Filter: (ROW(a, b) = ANY ('{"(1,''w1'')","(2,''w2'')","(3,''w3'')","(4,''w4'')","(5,''w5'')","(6,''w6'')","(7,''w7'')","(8,''w8'')","(9,''w9'')"}'::record[])) + Rows Removed by Filter: 991 +(4 rows) + +DROP TABLE hashed_saop_tbl; diff --git a/src/test/regress/sql/expressions.sql b/src/test/regress/sql/expressions.sql index 3b3048f9731..08691877902 100644 --- a/src/test/regress/sql/expressions.sql +++ b/src/test/regress/sql/expressions.sql @@ -301,3 +301,23 @@ select from inttest; rollback; + +-- +-- Hashed SAOP must not be selected for a composite type that is not actually +-- hashable. +-- + +CREATE TABLE hashed_saop_tbl (a int, b tsvector); +INSERT INTO hashed_saop_tbl + SELECT g, ('w' || g)::tsvector FROM generate_series(1, 1000) g; +ANALYZE hashed_saop_tbl; + +-- Throws an ERROR if the hashed strategy has been chosen +EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) +SELECT count(*) FROM hashed_saop_tbl +WHERE (a,b) = ANY (ARRAY[ + (1, 'w1'::tsvector), (2, 'w2'::tsvector), (3, 'w3'::tsvector), + (4, 'w4'::tsvector), (5, 'w5'::tsvector), (6, 'w6'::tsvector), + (7, 'w7'::tsvector), (8, 'w8'::tsvector), (9, 'w9'::tsvector) + ]); +DROP TABLE hashed_saop_tbl; -- 2.54.0