From b8b5ad6cab42762890e48df85fac819397fae128 Mon Sep 17 00:00:00 2001 From: Maxime Schoemans Date: Thu, 4 Jun 2026 20:23:16 +0200 Subject: [PATCH v2 6/6] Support multi-entry opclasses in the GiST sorted build Previously the sorted build was refused for a multi-entry key column, since it formed a single tuple per heap tuple and never called extractValue. Lift that restriction: gistSortedBuildCallback now decomposes the multi-entry column with extractValue and sorts one tuple per sub-entry, mirroring the regular build path. Each tuple is marked with the multi-entry reserved bit (via tuplesort_putindextuplevalues' new argument) when the value produced more than one entry, so scans deduplicate the heap TID; the bit rides through the sort and any tape spill unchanged. NULL and empty extractValue results produce a single NULL entry, as on the insert path. No in-core opclass provides both sortsupport and extractValue, so this path is not reached by the regression tests. It was verified by hand by temporarily attaching range_sortsupport to multirange_me_ops, building the index (confirmed to take the sorted path), and checking that scans match a sequential scan, including deduplication of multi-component multiranges. This is offered as a follow-up to the multi-entry GiST patch for discussion; it could equally be left as a later addition. --- src/backend/access/gist/gistbuild.c | 69 ++++++++++++++++++++++------- 1 file changed, 53 insertions(+), 16 deletions(-) diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c index 6334bc6cdc8..be95aeb7525 100644 --- a/src/backend/access/gist/gistbuild.c +++ b/src/backend/access/gist/gistbuild.c @@ -242,18 +242,6 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) hasallsortsupports = false; break; } - - /* - * The sorted build path forms a single index tuple per heap tuple - * straight from the tuplesort and never calls extractValue, so it - * cannot be used for a multi-entry key column. - */ - if (OidIsValid(index_getprocid(index, i + 1, - GIST_EXTRACTVALUE_PROC))) - { - hasallsortsupports = false; - break; - } } if (hasallsortsupports) buildstate.buildMode = GIST_SORTED_BUILD; @@ -383,13 +371,62 @@ gistSortedBuildCallback(Relation index, void *state) { GISTBuildState *buildstate = (GISTBuildState *) state; + GISTSTATE *giststate = buildstate->giststate; + int mecol = giststate->multiEntryColumn; MemoryContext oldCtx; Datum compressed_values[INDEX_MAX_KEYS]; - oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt); + oldCtx = MemoryContextSwitchTo(giststate->tempCxt); + + /* + * For a multi-entry key column, decompose the value with extractValue and + * sort one tuple per sub-entry, mirroring gistExtractEntries. Each tuple + * gets the multi-entry reserved bit when the value produced more than one + * entry, so scans deduplicate its heap TID. A NULL or empty result falls + * through to the single-entry path below, producing one NULL entry. + */ + if (mecol >= 0 && !isnull[mecol]) + { + Datum *entries; + bool *nullFlags = NULL; + int32 nentries; + + entries = (Datum *) + DatumGetPointer(FunctionCall3Coll(&giststate->extractValueFn[mecol], + giststate->supportCollation[mecol], + values[mecol], + PointerGetDatum(&nentries), + PointerGetDatum(&nullFlags))); + + if (entries != NULL && nentries > 0) + { + if (nullFlags == NULL) + nullFlags = palloc0_array(bool, nentries); + + for (int i = 0; i < nentries; i++) + { + values[mecol] = entries[i]; + isnull[mecol] = nullFlags[i]; + gistCompressValues(giststate, index, values, isnull, + true, compressed_values); + tuplesort_putindextuplevalues(buildstate->sortstate, + buildstate->indexrel, + tid, compressed_values, isnull, + nentries > 1); + } - /* Form an index tuple and point it at the heap tuple */ - gistCompressValues(buildstate->giststate, index, + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(giststate->tempCxt); + return; + } + + /* extractValue produced nothing: store a single NULL entry */ + values[mecol] = (Datum) 0; + isnull[mecol] = true; + } + + /* Form a single index tuple and point it at the heap tuple */ + gistCompressValues(giststate, index, values, isnull, true, compressed_values); @@ -399,7 +436,7 @@ gistSortedBuildCallback(Relation index, compressed_values, isnull, false); MemoryContextSwitchTo(oldCtx); - MemoryContextReset(buildstate->giststate->tempCxt); + MemoryContextReset(giststate->tempCxt); /* Update tuple count. */ buildstate->indtuples += 1; -- 2.50.1 (Apple Git-155)