From 3d67d0b2114b0c0aa271606767ad6e8b33c48d10 Mon Sep 17 00:00:00 2001 From: "Andrei V. Lepikhov" Date: Fri, 5 Jun 2026 14:14:15 +0000 Subject: [PATCH v0 3/3] Recover hashed SAOP for anonymous records with hashable columns The previous commit routed record_eq through op_hashjoinable(), which fixed the runtime failure on composites with non-hashable columns but also disabled hashing for all anonymous-RECORD SAOPs: the typcache refuses to vouch for bare RECORD, so op_hashjoinable() returns false even when every column is hashable. The common "(a, b) = ANY (...)" idiom thus regressed to a linear search. A hashed SAOP always has the array as a Const, so unlike the sub-SELECT case in hash_ok_operator() we can examine the actual data: resolve the concrete rowtype of every array element and re-enable hashing when all their column types are hashable. The rowtype is looked up with the no-error variant so an unregistered typmod degrades to a linear search rather than an ERROR. --- src/backend/optimizer/util/clauses.c | 128 +++++++++++++++++++++- src/test/regress/expected/expressions.out | 10 ++ src/test/regress/sql/expressions.sql | 5 + 3 files changed, 138 insertions(+), 5 deletions(-) diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index d44f674dc76..bbb8b7c3b59 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -52,6 +52,7 @@ #include "rewrite/rewriteManip.h" #include "tcop/tcopprot.h" #include "utils/acl.h" +#include "utils/array.h" #include "utils/builtins.h" #include "utils/datum.h" #include "utils/fmgroids.h" @@ -2535,21 +2536,136 @@ convert_saop_to_hashed_saop(Node *node) (void) convert_saop_to_hashed_saop_walker(node, NULL); } +/* + * record_const_array_is_hashable + * Are all column types of every element of a constant record[] array + * hashable? + * + * Used to recover hashing for an anonymous-RECORD ScalarArrayOpExpr. The + * typcache deliberately refuses to report bare RECORD as field-hashable + * because it cannot know the columns of an arbitrary anonymous record (see + * cache_record_field_properties()). But a hashed SAOP always has the array + * as a Const, so here we do have the concrete records in hand: resolve each + * element's actual rowtype and check that every (non-dropped) column type has + * a hash function. If so, hash_record() cannot fail on this array. + * + * We examine every element, not just the first: an array of RECORD is uniform + * only in element type, and individual elements may carry different blessed + * rowtypes (different typmods). A later element with a non-hashable column + * would otherwise trip the very failure this guards against. + * + * The left-hand input need not be examined separately. record_eq() compares + * by physical columns and errors out on dissimilar column types, so at runtime + * the LHS record either shares the array elements' column layout (hence is + * equally hashable) or the comparison errors regardless of hashing. + */ +static bool +record_const_array_is_hashable(Const *arrayConst) +{ + ArrayType *arr; + int16 elmlen; + bool elmbyval; + char elmalign; + Datum *elems; + bool *nulls; + int nelems; + bool result = true; + int32 lastTypmod = -1; + Oid lastType = InvalidOid; + + Assert(arrayConst != NULL && !arrayConst->constisnull); + + arr = DatumGetArrayTypeP(arrayConst->constvalue); + if (ARR_ELEMTYPE(arr) != RECORDOID) + return false; + + get_typlenbyvalalign(RECORDOID, &elmlen, &elmbyval, &elmalign); + deconstruct_array(arr, RECORDOID, elmlen, elmbyval, elmalign, + &elems, &nulls, &nelems); + + for (int i = 0; i < nelems && result; i++) + { + HeapTupleHeader rec; + Oid tupType; + int32 tupTypmod; + TupleDesc tupdesc; + + if (nulls[i]) + continue; + + rec = DatumGetHeapTupleHeader(elems[i]); + tupType = HeapTupleHeaderGetTypeId(rec); + tupTypmod = HeapTupleHeaderGetTypMod(rec); + + /* Skip the rowtype lookup when this element matches the previous one */ + if (tupType == lastType && tupTypmod == lastTypmod) + continue; + lastType = tupType; + lastTypmod = tupTypmod; + + /* + * Use the no-error variant: an unregistered blessed typmod must never + * turn this planner-time optimization decision into an ERROR. Treat a + * missing tupdesc as "not hashable" and fall back to a linear search. + */ + tupdesc = lookup_rowtype_tupdesc_noerror(tupType, tupTypmod, true); + if (tupdesc == NULL) + { + result = false; + break; + } + for (int j = 0; j < tupdesc->natts; j++) + { + Form_pg_attribute att = TupleDescAttr(tupdesc, j); + TypeCacheEntry *fieldentry; + + if (att->attisdropped) + continue; + fieldentry = lookup_type_cache(att->atttypid, TYPECACHE_HASH_PROC); + if (!OidIsValid(fieldentry->hash_proc)) + { + result = false; + break; + } + } + ReleaseTupleDesc(tupdesc); + } + + pfree(elems); + pfree(nulls); + return result; +} + /* * saop_hashable_for_type * Can a hashed ScalarArrayOpExpr safely use equality operator 'eqop' - * for left-hand input type 'lefttype'? + * for left-hand input type 'lefttype' over constant array 'arrayConst'? * * get_op_hash_functions() reports record_eq and array_eq as hashable * unconditionally. But hashability actually depends on the specific input * type: every column/element type must itself be hashable. Re-check such * operators through op_hashjoinable(). + * + * op_hashjoinable() conservatively returns false for anonymous RECORD, since + * the typcache cannot inspect an arbitrary record's columns. In that one case + * we have more information than the typcache -- the constant array itself -- + * so we examine the actual element rowtypes and allow hashing when they are + * all hashable. */ static bool -saop_hashable_for_type(Oid eqop, Oid lefttype) +saop_hashable_for_type(Oid eqop, Oid lefttype, Const *arrayConst) { - if (eqop == RECORD_EQ_OP || eqop == ARRAY_EQ_OP) + if (eqop == ARRAY_EQ_OP) return op_hashjoinable(eqop, lefttype); + if (eqop == RECORD_EQ_OP) + { + if (op_hashjoinable(eqop, lefttype)) + return true; + /* Recover hashing for anonymous RECORD with hashable columns. */ + if (lefttype == RECORDOID) + return record_const_array_is_hashable(arrayConst); + return false; + } return true; } @@ -2574,7 +2690,8 @@ convert_saop_to_hashed_saop_walker(Node *node, void *context) if (get_op_hash_functions(saop->opno, &lefthashfunc, &righthashfunc) && lefthashfunc == righthashfunc && saop_hashable_for_type(saop->opno, - exprType(linitial(saop->args)))) + exprType(linitial(saop->args)), + (Const *) arrayarg)) { Datum arrdatum = ((Const *) arrayarg)->constvalue; ArrayType *arr = (ArrayType *) DatumGetPointer(arrdatum); @@ -2608,7 +2725,8 @@ convert_saop_to_hashed_saop_walker(Node *node, void *context) get_op_hash_functions(negator, &lefthashfunc, &righthashfunc) && lefthashfunc == righthashfunc && saop_hashable_for_type(negator, - exprType(linitial(saop->args)))) + exprType(linitial(saop->args)), + (Const *) arrayarg)) { Datum arrdatum = ((Const *) arrayarg)->constvalue; ArrayType *arr = (ArrayType *) DatumGetPointer(arrdatum); diff --git a/src/test/regress/expected/expressions.out b/src/test/regress/expected/expressions.out index 9d4db9c3b2a..958d92e0956 100644 --- a/src/test/regress/expected/expressions.out +++ b/src/test/regress/expected/expressions.out @@ -335,6 +335,16 @@ select return_int_input(1) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1); Result Output: (return_int_input(1) = hashed ANY ('{10,9,2,8,3,7,4,6,5,1}'::integer[])) (2 rows) +-- An anonymous-record SAOP whose columns are all hashable also hashes: the +-- planner cannot rely on the typcache for bare RECORD, so it inspects the +-- constant array's actual rowtype and recovers the hashed plan. +explain (verbose, costs off) +select (return_int_input(1), return_int_input(2)) = any (array[(1,2),(3,4),(5,6),(7,8),(9,10),(11,12),(13,14),(15,16),(17,18)]); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Result + Output: (ROW(return_int_input(1), return_int_input(2)) = hashed ANY ('{"(1,2)","(3,4)","(5,6)","(7,8)","(9,10)","(11,12)","(13,14)","(15,16)","(17,18)"}'::record[])) +(2 rows) rollback; -- Test with non-strict equality function. diff --git a/src/test/regress/sql/expressions.sql b/src/test/regress/sql/expressions.sql index fe9c330361b..a8daf14ffc4 100644 --- a/src/test/regress/sql/expressions.sql +++ b/src/test/regress/sql/expressions.sql @@ -134,6 +134,11 @@ select return_text_input('a') not in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i -- Check tha explain marks the hashed decision. explain (verbose, costs off) select return_int_input(1) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1); +-- An anonymous-record SAOP whose columns are all hashable also hashes: the +-- planner cannot rely on the typcache for bare RECORD, so it inspects the +-- constant array's actual rowtype and recovers the hashed plan. +explain (verbose, costs off) +select (return_int_input(1), return_int_input(2)) = any (array[(1,2),(3,4),(5,6),(7,8),(9,10),(11,12),(13,14),(15,16),(17,18)]); rollback; -- 2.54.0