From f1377d894d5cf9c1feb56ad2e375fb041f6e2e0f Mon Sep 17 00:00:00 2001 From: Maxime Schoemans Date: Fri, 3 Apr 2026 18:43:29 +0200 Subject: [PATCH v1 3/4] Add multi-entry support to SP-GiST Extend SP-GiST to support an optional extractValue support function (SPGIST_EXTRACTVALUE_PROC, number 8) that decomposes a single indexed value into multiple sub-entries, each stored as a separate index entry. This mirrors the multi-entry support previously added to GiST. Infrastructure changes: spgist.h: Define SPGIST_EXTRACTVALUE_PROC (8), bump SPGISTNProc to 8. spgist_private.h: Add tidHash field to SpGistScanOpaqueData for TID deduplication during multi-entry scans. spginsert.c: Add spgExtractEntries() helper. Modify spgistBuildCallback and spginsert to call extractValue and loop over the returned entries. spgscan.c: Implement TID deduplication via simplehash to ensure each heap TID is returned only once when multiple index entries match. Handles both ordered and non-ordered scans. Bitmap scans rely on the bitmap's inherent deduplication. Disable index-only scans for the key column of multi-entry indexes since the stored entries don't represent the original datum. spgvalidate.c: Register extractValue as an optional support function. Allow compress to be absent when extractValue is present (extractValue handles the type conversion). spgutils.c: Accept extractValue as an alternative to compress when the leaf type differs from the input type. --- src/backend/access/spgist/spginsert.c | 97 +++++++++++++--- src/backend/access/spgist/spgscan.c | 146 +++++++++++++++++++++--- src/backend/access/spgist/spgutils.c | 61 +++++++++- src/backend/access/spgist/spgvalidate.c | 12 +- src/include/access/spgist.h | 3 +- src/include/access/spgist_private.h | 9 ++ 6 files changed, 295 insertions(+), 33 deletions(-) diff --git a/src/backend/access/spgist/spginsert.c b/src/backend/access/spgist/spginsert.c index 780ef646a54..0e9e4e845c8 100644 --- a/src/backend/access/spgist/spginsert.c +++ b/src/backend/access/spgist/spginsert.c @@ -44,23 +44,60 @@ spgistBuildCallback(Relation index, ItemPointer tid, Datum *values, SpGistBuildState *buildstate = (SpGistBuildState *) state; MemoryContext oldCtx; - /* Work in temp context, and reset it after each tuple */ - oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); - /* * Even though no concurrent insertions can be happening, we still might * get a buffer-locking failure due to bgwriter or checkpointer taking a * lock on some buffer. So we need to be willing to retry. We can flush * any temp data when retrying. + * + * If the opclass provides an extractValue function, extract multiple + * entries and insert each one. Otherwise, insert a single entry. + * + * We extract entries in the caller's memory context so that the entries + * array survives MemoryContextReset(tmpCtx) in the retry loop. */ - while (!spgdoinsert(index, &buildstate->spgstate, tid, - values, isnull)) + if (OidIsValid(index_getprocid(index, 1, SPGIST_EXTRACTVALUE_PROC))) { - MemoryContextReset(buildstate->tmpCtx); + Datum *entries; + bool *nullFlags; + int32 nentries; + int i; + + entries = spgExtractEntries(index, values[spgKeyColumn], + isnull[spgKeyColumn], + &nentries, &nullFlags); + + oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); + + for (i = 0; i < nentries; i++) + { + values[spgKeyColumn] = entries[i]; + isnull[spgKeyColumn] = nullFlags[i]; + + while (!spgdoinsert(index, &buildstate->spgstate, tid, + values, isnull)) + { + MemoryContextReset(buildstate->tmpCtx); + } + } + + /* Update total tuple count */ + buildstate->indtuples += nentries; } + else + { + /* Work in temp context, and reset it after each tuple */ + oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); + + while (!spgdoinsert(index, &buildstate->spgstate, tid, + values, isnull)) + { + MemoryContextReset(buildstate->tmpCtx); + } - /* Update total tuple count */ - buildstate->indtuples += 1; + /* Update total tuple count */ + buildstate->indtuples += 1; + } MemoryContextSwitchTo(oldCtx); MemoryContextReset(buildstate->tmpCtx); @@ -193,20 +230,54 @@ spginsert(Relation index, Datum *values, bool *isnull, insertCtx = AllocSetContextCreate(CurrentMemoryContext, "SP-GiST insert temporary context", ALLOCSET_DEFAULT_SIZES); - oldCtx = MemoryContextSwitchTo(insertCtx); - - initSpGistState(&spgstate, index); /* * We might have to repeat spgdoinsert() multiple times, if conflicts * occur with concurrent insertions. If so, reset the insertCtx each time * to avoid cumulative memory consumption. That means we also have to * redo initSpGistState(), but it's cheap enough not to matter. + * + * If the opclass provides an extractValue function, extract multiple + * entries and insert each one separately. We extract entries in the + * caller's memory context so that the entries array survives + * MemoryContextReset(insertCtx) in the retry loop. */ - while (!spgdoinsert(index, &spgstate, ht_ctid, values, isnull)) + if (OidIsValid(index_getprocid(index, 1, SPGIST_EXTRACTVALUE_PROC))) + { + Datum *entries; + bool *nullFlags; + int32 nentries; + int i; + + entries = spgExtractEntries(index, values[spgKeyColumn], + isnull[spgKeyColumn], + &nentries, &nullFlags); + + oldCtx = MemoryContextSwitchTo(insertCtx); + initSpGistState(&spgstate, index); + + for (i = 0; i < nentries; i++) + { + values[spgKeyColumn] = entries[i]; + isnull[spgKeyColumn] = nullFlags[i]; + + while (!spgdoinsert(index, &spgstate, ht_ctid, values, isnull)) + { + MemoryContextReset(insertCtx); + initSpGistState(&spgstate, index); + } + } + } + else { - MemoryContextReset(insertCtx); + oldCtx = MemoryContextSwitchTo(insertCtx); initSpGistState(&spgstate, index); + + while (!spgdoinsert(index, &spgstate, ht_ctid, values, isnull)) + { + MemoryContextReset(insertCtx); + initSpGistState(&spgstate, index); + } } SpGistUpdateMetaPage(index); diff --git a/src/backend/access/spgist/spgscan.c b/src/backend/access/spgist/spgscan.c index 2cc5f06f5d7..398a93a7dd9 100644 --- a/src/backend/access/spgist/spgscan.c +++ b/src/backend/access/spgist/spgscan.c @@ -18,6 +18,7 @@ #include "access/genam.h" #include "access/relscan.h" #include "access/spgist_private.h" +#include "common/hashfn.h" #include "executor/instrument_node.h" #include "miscadmin.h" #include "pgstat.h" @@ -28,6 +29,49 @@ #include "utils/memutils.h" #include "utils/rel.h" +/* + * Simplehash implementation for TID deduplication in multi-entry scans. + * + * When an opclass provides an extractValue function, each heap tuple produces + * multiple index entries. During scans, we must deduplicate results so that + * each heap TID is returned only once. + */ + +/* Hash table entry for basic TID dedup */ +typedef struct SPGTIDHashEntry +{ + ItemPointerData tid; /* TID (hashtable key) */ + uint32 hash; /* hash value (cached) */ + char status; /* hash status */ +} SPGTIDHashEntry; + +static inline uint32 +spg_tid_hash_fn(ItemPointerData tid) +{ + uint32 h = murmurhash32(ItemPointerGetBlockNumber(&tid)); + + return murmurhash32(h + ItemPointerGetOffsetNumber(&tid)); +} + +static inline bool +spg_tid_match_fn(ItemPointerData a, ItemPointerData b) +{ + return ItemPointerEquals(&a, &b); +} + +/* --- spgtid hash table (declare + define) --- */ +#define SH_PREFIX spgtid +#define SH_ELEMENT_TYPE SPGTIDHashEntry +#define SH_KEY_TYPE ItemPointerData +#define SH_KEY tid +#define SH_HASH_KEY(tb, key) spg_tid_hash_fn(key) +#define SH_EQUAL(tb, a, b) spg_tid_match_fn(a, b) +#define SH_SCOPE static inline +#define SH_DECLARE +#define SH_DEFINE +#include "lib/simplehash.h" + + typedef void (*storeRes_func) (SpGistScanOpaque so, ItemPointer heapPtr, Datum leafValue, bool isNull, SpGistLeafTuple leafTuple, bool recheck, @@ -158,6 +202,14 @@ resetSpGistScanOpaque(SpGistScanOpaque so) MemoryContextReset(so->traversalCxt); + /* + * For multi-entry indexes, set up TID deduplication hash table. The hash + * table lives in traversalCxt and is destroyed/recreated on each rescan. + */ + if (OidIsValid(index_getprocid(so->state.index, 1, + SPGIST_EXTRACTVALUE_PROC))) + so->tidHash = spgtid_create(so->traversalCxt, 256, NULL); + oldCtx = MemoryContextSwitchTo(so->traversalCxt); /* initialize queue only for distance-ordered scans */ @@ -364,6 +416,9 @@ spgbeginscan(Relation rel, int keysz, int orderbysz) index_getprocinfo(rel, 1, SPGIST_LEAF_CONSISTENT_PROC), CurrentMemoryContext); + /* tidHash will be set up by resetSpGistScanOpaque if needed */ + so->tidHash = NULL; + so->indexCollation = rel->rd_indcollation[0]; scan->opaque = so; @@ -571,25 +626,53 @@ spgLeafTest(SpGistScanOpaque so, SpGistSearchItem *item, { /* the scan is ordered -> add the item to the queue */ MemoryContext oldCxt = MemoryContextSwitchTo(so->traversalCxt); - SpGistSearchItem *heapItem = spgNewHeapItem(so, item->level, - leafTuple, - leafValue, - recheck, - recheckDistances, - isnull, - distances); - - spgAddSearchItemToQueue(so, heapItem); - MemoryContextSwitchTo(oldCxt); + /* + * For multi-entry ordered scans, skip leaf entries whose TIDs + * have already been returned by the ordered dequeue path. We + * use lookup (not insert) here: a TID must remain enqueueable + * until it is actually dequeued, so that the pairing heap can + * pick the copy with the smallest distance. + */ + if (so->tidHash && + spgtid_lookup(so->tidHash, leafTuple->heapPtr)) + { + MemoryContextSwitchTo(oldCxt); + } + else + { + SpGistSearchItem *heapItem = spgNewHeapItem(so, item->level, + leafTuple, + leafValue, + recheck, + recheckDistances, + isnull, + distances); + + spgAddSearchItemToQueue(so, heapItem); + MemoryContextSwitchTo(oldCxt); + } } else { - /* non-ordered scan, so report the item right away */ - Assert(!recheckDistances); - storeRes(so, &leafTuple->heapPtr, leafValue, isnull, - leafTuple, recheck, false, NULL); - *reportedSome = true; + /* + * Non-ordered scan, so report the item right away. + * + * For multi-entry indexes, check the TID hash table to avoid + * returning duplicate heap TIDs. + */ + bool found = false; + + if (so->tidHash) + spgtid_insert(so->tidHash, leafTuple->heapPtr, &found); + + if (!found) + { + Assert(!recheckDistances); + storeRes(so, &leafTuple->heapPtr, leafValue, isnull, + leafTuple, recheck, false, NULL); + *reportedSome = true; + } } } @@ -830,6 +913,25 @@ redirect: { /* We store heap items in the queue only in case of ordered search */ Assert(so->numberOfNonNullOrderBys > 0); + + /* + * For multi-entry ordered scans, deduplicate using tidHash. + * The pairing heap ensures we see the smallest distance first; + * tidHash skips subsequent duplicates for the same TID. + */ + if (so->tidHash) + { + bool found; + + spgtid_insert(so->tidHash, item->heapPtr, &found); + if (found) + { + spgFreeSearchItem(so, item); + MemoryContextReset(so->tempCxt); + continue; /* already returned this TID */ + } + } + storeRes(so, &item->heapPtr, item->value, item->isNull, item->leafTuple, item->recheck, item->recheckDistances, item->distances); @@ -921,7 +1023,11 @@ redirect: } -/* storeRes subroutine for getbitmap case */ +/* + * storeRes subroutine for getbitmap case. + * The bitmap itself handles deduplication, so no extra work needed for + * multi-entry. + */ static void storeBitmap(SpGistScanOpaque so, ItemPointer heapPtr, Datum leafValue, bool isnull, @@ -1083,6 +1189,14 @@ spgcanreturn(Relation index, int attno) if (attno > 1) return true; + /* + * Multi-entry indexes store decomposed sub-entries in the key column, + * not the original datum, so the key column cannot be returned in an + * index-only scan. + */ + if (OidIsValid(index_getprocid(index, 1, SPGIST_EXTRACTVALUE_PROC))) + return false; + /* We can do it if the opclass config function says so */ cache = spgGetCache(index); diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c index f2ee333f60d..ce30d76e8f5 100644 --- a/src/backend/access/spgist/spgutils.c +++ b/src/backend/access/spgist/spgutils.c @@ -246,10 +246,11 @@ spgGetCache(Relation index) if (cache->config.leafType != atttype) { - if (!OidIsValid(index_getprocid(index, 1, SPGIST_COMPRESS_PROC))) + if (!OidIsValid(index_getprocid(index, 1, SPGIST_COMPRESS_PROC)) && + !OidIsValid(index_getprocid(index, 1, SPGIST_EXTRACTVALUE_PROC))) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("compress method must be defined when leaf type is different from input type"))); + errmsg("compress or extractValue method must be defined when leaf type is different from input type"))); fillTypeDesc(&cache->attLeafType, cache->config.leafType); } @@ -1365,3 +1366,59 @@ spgproperty(Oid index_oid, int attno, return true; } + +/* + * spgExtractEntries -- extract multiple index entries from one heap tuple. + * + * Calls the opclass's extractValue function to decompose the indexed datum + * into multiple sub-entries. Returns an array of Datum values and sets + * *nentries to the count. *nullFlags is set to a boolean array indicating + * which entries are NULL (or NULL if none are). + * + * If the datum is NULL or extractValue returns no entries, a single NULL + * entry is produced. + */ +Datum * +spgExtractEntries(Relation index, Datum value, bool isnull, + int32 *nentries, bool **nullFlags) +{ + Datum *entries; + + /* NULL datum produces a single NULL entry */ + if (isnull) + { + *nentries = 1; + entries = palloc(sizeof(Datum)); + entries[0] = (Datum) 0; + *nullFlags = palloc(sizeof(bool)); + (*nullFlags)[0] = true; + return entries; + } + + /* Call the opclass's extractValue function */ + *nullFlags = NULL; + entries = (Datum *) + DatumGetPointer(FunctionCall3Coll(index_getprocinfo(index, 1, + SPGIST_EXTRACTVALUE_PROC), + index->rd_indcollation[0], + value, + PointerGetDatum(nentries), + PointerGetDatum(nullFlags))); + + /* Handle empty or NULL result: produce a single NULL entry */ + if (entries == NULL || *nentries <= 0) + { + *nentries = 1; + entries = palloc(sizeof(Datum)); + entries[0] = (Datum) 0; + *nullFlags = palloc(sizeof(bool)); + (*nullFlags)[0] = true; + return entries; + } + + /* Create nullFlags array if the function didn't */ + if (*nullFlags == NULL) + *nullFlags = palloc0_array(bool, *nentries); + + return entries; +} diff --git a/src/backend/access/spgist/spgvalidate.c b/src/backend/access/spgist/spgvalidate.c index 27c855921e6..34111058f24 100644 --- a/src/backend/access/spgist/spgvalidate.c +++ b/src/backend/access/spgist/spgvalidate.c @@ -175,6 +175,11 @@ spgvalidate(Oid opclassoid) case SPGIST_OPTIONS_PROC: ok = check_amoptsproc_signature(procform->amproc); break; + case SPGIST_EXTRACTVALUE_PROC: + ok = check_amproc_signature(procform->amproc, INTERNALOID, true, + 3, 3, INTERNALOID, INTERNALOID, + INTERNALOID); + break; default: ereport(INFO, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), @@ -287,8 +292,12 @@ spgvalidate(Oid opclassoid) { if ((thisgroup->functionset & (((uint64) 1) << i)) != 0) continue; /* got it */ - if (i == SPGIST_OPTIONS_PROC) + if (i == SPGIST_OPTIONS_PROC || + i == SPGIST_EXTRACTVALUE_PROC) continue; /* optional method */ + if (i == SPGIST_COMPRESS_PROC && + (thisgroup->functionset & (((uint64) 1) << SPGIST_EXTRACTVALUE_PROC)) != 0) + continue; /* extractValue handles type conversion */ ereport(INFO, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("operator family \"%s\" of access method %s is missing support function %d for type %s", @@ -367,6 +376,7 @@ spgadjustmembers(Oid opfamilyoid, break; case SPGIST_COMPRESS_PROC: case SPGIST_OPTIONS_PROC: + case SPGIST_EXTRACTVALUE_PROC: /* Optional, so force it to be a soft family dependency */ op->ref_is_hard = false; op->ref_is_family = true; diff --git a/src/include/access/spgist.h b/src/include/access/spgist.h index 083d93f8ffd..efa7aa2d63b 100644 --- a/src/include/access/spgist.h +++ b/src/include/access/spgist.h @@ -27,8 +27,9 @@ #define SPGIST_LEAF_CONSISTENT_PROC 5 #define SPGIST_COMPRESS_PROC 6 #define SPGIST_OPTIONS_PROC 7 +#define SPGIST_EXTRACTVALUE_PROC 8 #define SPGISTNRequiredProc 5 -#define SPGISTNProc 7 +#define SPGISTNProc 8 /* * Argument structs for spg_config method diff --git a/src/include/access/spgist_private.h b/src/include/access/spgist_private.h index ec6d6f5f74d..f6e29acea8a 100644 --- a/src/include/access/spgist_private.h +++ b/src/include/access/spgist_private.h @@ -193,6 +193,13 @@ typedef struct SpGistScanOpaqueData MemoryContext tempCxt; /* short-lived memory context */ MemoryContext traversalCxt; /* single scan lifetime memory context */ + /* + * For multi-entry indexes: hash table for TID deduplication. Each heap + * tuple produces multiple index entries, so we track which TIDs have been + * returned. NULL for standard (non-multi-entry) indexes. + */ + struct spgtid_hash *tidHash; + /* Control flags showing whether to search nulls and/or non-nulls */ bool searchNulls; /* scan matches (all) null entries */ bool searchNonNulls; /* scan matches (some) non-null entries */ @@ -532,6 +539,8 @@ extern OffsetNumber SpGistPageAddNewItem(SpGistState *state, Page page, extern bool spgproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull); +extern Datum *spgExtractEntries(Relation index, Datum value, bool isnull, + int32 *nentries, bool **nullFlags); /* spgdoinsert.c */ extern void spgUpdateNodeLink(SpGistInnerTuple tup, int nodeN, -- 2.50.1 (Apple Git-155)