From 7388571d02681075ed7e9b00507fd02b3103e44c Mon Sep 17 00:00:00 2001 From: Maxime Schoemans Date: Thu, 2 Apr 2026 18:11:44 +0200 Subject: [PATCH v1 1/4] Add multi-entry support to GiST Add infrastructure for GiST indexes to store multiple index entries per heap tuple, similar to how GIN decomposes values via extractValue but within GiST's R-tree framework. A new optional support function, extractValue (support procedure 13), is added. When an opclass provides it, the function is called during insert and index build to decompose each datum into multiple sub-entries, each stored as a separate index tuple pointing to the same heap TID. On the scan side, a simplehash-based TID deduplication hash table ensures each heap tuple is returned only once despite having multiple index entries. Three scan modes are handled: - Bitmap scans: the TIDBitmap handles deduplication inherently. - Non-ordered scans: the hash table filters duplicates in pageData. - Ordered (KNN) scans: the hash table filters duplicates both when enqueuing leaf items and when dequeuing from the pairing heap, ensuring the first (nearest) distance wins. Other changes: - gistcanreturn() disables index-only scans on key columns that use extractValue, since the original datum cannot be reconstructed from a single component. - Multi-entry is restricted to single-key-column indexes (INCLUDE columns are allowed). Multi-column support is left for future work. - gistvalidate.c marks extractValue as optional and validates its signature (internal, internal, internal) -> internal. --- src/backend/access/gist/gist.c | 58 ++++++++++- src/backend/access/gist/gistbuild.c | 72 +++++++++----- src/backend/access/gist/gistget.c | 130 ++++++++++++++++++++++++- src/backend/access/gist/gistscan.c | 4 + src/backend/access/gist/gistutil.c | 68 +++++++++++++ src/backend/access/gist/gistvalidate.c | 9 +- src/include/access/gist.h | 3 +- src/include/access/gist_private.h | 14 +++ 8 files changed, 327 insertions(+), 31 deletions(-) diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 8565e225be7..bb280e6ea5a 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -161,6 +161,10 @@ gistbuildempty(Relation index) * * This is the public interface routine for tuple insertion in GiSTs. * It doesn't do any work; just locks the relation and passes the buck. + * + * If the opclass provides an extractValue function (multi-entry mode), + * a single heap tuple may produce multiple index entries. Each entry + * is inserted separately, all pointing to the same heap TID. */ bool gistinsert(Relation r, Datum *values, bool *isnull, @@ -170,7 +174,6 @@ gistinsert(Relation r, Datum *values, bool *isnull, IndexInfo *indexInfo) { GISTSTATE *giststate = (GISTSTATE *) indexInfo->ii_AmCache; - IndexTuple itup; MemoryContext oldCxt; /* Initialize GISTSTATE cache if first call in this statement */ @@ -185,10 +188,31 @@ gistinsert(Relation r, Datum *values, bool *isnull, oldCxt = MemoryContextSwitchTo(giststate->tempCxt); - itup = gistFormTuple(giststate, r, values, isnull, true); - itup->t_tid = *ht_ctid; + /* + * If the opclass provides an extractValue function, extract multiple + * entries and insert each one separately. + */ + if (OidIsValid(giststate->extractValueFn[0].fn_oid)) + { + IndexTuple *itups; + int32 nitups; + int i; - gistdoinsert(r, itup, 0, giststate, heapRel, false); + itups = gistExtractEntries(giststate, r, values, isnull, &nitups); + for (i = 0; i < nitups; i++) + { + itups[i]->t_tid = *ht_ctid; + gistdoinsert(r, itups[i], 0, giststate, heapRel, false); + } + } + else + { + IndexTuple itup; + + itup = gistFormTuple(giststate, r, values, isnull, true); + itup->t_tid = *ht_ctid; + gistdoinsert(r, itup, 0, giststate, heapRel, false); + } /* cleanup */ MemoryContextSwitchTo(oldCxt); @@ -1623,6 +1647,14 @@ initGISTstate(Relation index) else giststate->fetchFn[i].fn_oid = InvalidOid; + /* opclasses are not required to provide an ExtractValue method */ + if (OidIsValid(index_getprocid(index, i + 1, GIST_EXTRACTVALUE_PROC))) + fmgr_info_copy(&(giststate->extractValueFn[i]), + index_getprocinfo(index, i + 1, GIST_EXTRACTVALUE_PROC), + scanCxt); + else + giststate->extractValueFn[i].fn_oid = InvalidOid; + /* * If the index column has a specified collation, we should honor that * while doing comparisons. However, we may have a collatable storage @@ -1640,6 +1672,23 @@ initGISTstate(Relation index) giststate->supportCollation[i] = DEFAULT_COLLATION_OID; } + /* + * Multi-entry indexes (those with extractValue) are currently only + * supported for single-column indexes. The semantics of decomposing + * multiple columns simultaneously are unclear (cross product? parallel + * arrays?), so we disallow it for now. + */ + if (IndexRelationGetNumberOfKeyAttributes(index) > 1) + { + for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(index); i++) + { + if (OidIsValid(giststate->extractValueFn[i].fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("multi-entry GiST indexes do not support multiple key columns"))); + } + } + /* No opclass information for INCLUDE attributes */ for (; i < index->rd_att->natts; i++) { @@ -1652,6 +1701,7 @@ initGISTstate(Relation index) giststate->equalFn[i].fn_oid = InvalidOid; giststate->distanceFn[i].fn_oid = InvalidOid; giststate->fetchFn[i].fn_oid = InvalidOid; + giststate->extractValueFn[i].fn_oid = InvalidOid; giststate->supportCollation[i] = InvalidOid; } diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c index 7f57c787f4c..ef786682b84 100644 --- a/src/backend/access/gist/gistbuild.c +++ b/src/backend/access/gist/gistbuild.c @@ -827,21 +827,8 @@ gistBuildCallback(Relation index, void *state) { GISTBuildState *buildstate = (GISTBuildState *) state; - IndexTuple itup; MemoryContext oldCtx; - oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt); - - /* form an index tuple and point it at the heap tuple */ - itup = gistFormTuple(buildstate->giststate, index, - values, isnull, - true); - itup->t_tid = *tid; - - /* Update tuple count and total size. */ - buildstate->indtuples += 1; - buildstate->indtuplesSize += IndexTupleSize(itup); - /* * XXX In buffering builds, the tempCxt is also reset down inside * gistProcessEmptyingQueue(). This is not great because it risks @@ -850,20 +837,61 @@ gistBuildCallback(Relation index, * better that a memory context be "owned" by only one function. However, * currently this isn't causing issues so it doesn't seem worth the amount * of refactoring that would be needed to avoid it. + * + * If the opclass provides an extractValue function, extract multiple + * entries and insert each one. Otherwise, form a single index tuple. + * + * We extract entries in the caller's memory context so that the itups + * array survives MemoryContextReset(tempCxt) inside + * gistProcessEmptyingQueue during buffering builds. */ - if (buildstate->buildMode == GIST_BUFFERING_ACTIVE) + if (OidIsValid(buildstate->giststate->extractValueFn[0].fn_oid)) { - /* We have buffers, so use them. */ - gistBufferingBuildInsert(buildstate, itup); + IndexTuple *itups; + int32 nitups; + int i; + + itups = gistExtractEntries(buildstate->giststate, index, + values, isnull, &nitups); + + oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt); + + for (i = 0; i < nitups; i++) + { + itups[i]->t_tid = *tid; + + /* Update tuple count and total size */ + buildstate->indtuples += 1; + buildstate->indtuplesSize += IndexTupleSize(itups[i]); + + if (buildstate->buildMode == GIST_BUFFERING_ACTIVE) + gistBufferingBuildInsert(buildstate, itups[i]); + else + gistdoinsert(index, itups[i], buildstate->freespace, + buildstate->giststate, buildstate->heaprel, true); + } } else { - /* - * There's no buffers (yet). Since we already have the index relation - * locked, we call gistdoinsert directly. - */ - gistdoinsert(index, itup, buildstate->freespace, - buildstate->giststate, buildstate->heaprel, true); + IndexTuple itup; + + oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt); + + /* form an index tuple and point it at the heap tuple */ + itup = gistFormTuple(buildstate->giststate, index, + values, isnull, + true); + itup->t_tid = *tid; + + /* Update tuple count and total size. */ + buildstate->indtuples += 1; + buildstate->indtuplesSize += IndexTupleSize(itup); + + if (buildstate->buildMode == GIST_BUFFERING_ACTIVE) + gistBufferingBuildInsert(buildstate, itup); + else + gistdoinsert(index, itup, buildstate->freespace, + buildstate->giststate, buildstate->heaprel, true); } MemoryContextSwitchTo(oldCtx); diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index 4d7c100d737..a039cfd4575 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -17,6 +17,7 @@ #include "access/genam.h" #include "access/gist_private.h" #include "access/relscan.h" +#include "common/hashfn.h" #include "executor/instrument_node.h" #include "lib/pairingheap.h" #include "miscadmin.h" @@ -26,6 +27,49 @@ #include "utils/memutils.h" #include "utils/rel.h" +/* + * Simplehash implementation for TID deduplication in multi-entry scans. + * + * When an opclass provides an extractValue function, each heap tuple produces + * multiple index entries. During scans, we must deduplicate results so that + * each heap TID is returned only once. + */ + +/* Hash table entry for basic TID dedup */ +typedef struct GISTTIDHashEntry +{ + ItemPointerData tid; /* TID (hashtable key) */ + uint32 hash; /* hash value (cached) */ + char status; /* hash status */ +} GISTTIDHashEntry; + +static inline uint32 +gist_tid_hash_fn(ItemPointerData tid) +{ + uint32 h = murmurhash32(ItemPointerGetBlockNumber(&tid)); + + return murmurhash32(h + ItemPointerGetOffsetNumber(&tid)); +} + +static inline bool +gist_tid_match_fn(ItemPointerData a, ItemPointerData b) +{ + return ItemPointerEquals(&a, &b); +} + +/* --- gisttid hash table (declare + define) --- */ +#define SH_PREFIX gisttid +#define SH_ELEMENT_TYPE GISTTIDHashEntry +#define SH_KEY_TYPE ItemPointerData +#define SH_KEY tid +#define SH_HASH_KEY(tb, key) gist_tid_hash_fn(key) +#define SH_EQUAL(tb, a, b) gist_tid_match_fn(a, b) +#define SH_SCOPE static inline +#define SH_DECLARE +#define SH_DEFINE +#include "lib/simplehash.h" + + /* * gistkillitems() -- set LP_DEAD state for items an indexscan caller has * told us were killed. @@ -456,7 +500,8 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, { /* * getbitmap scan, so just push heap tuple TIDs into the bitmap - * without worrying about ordering + * without worrying about ordering. The bitmap itself handles + * deduplication, so no extra work needed for multi-entry. */ tbm_add_tuples(tbm, &it->t_tid, 1, recheck); (*ntids)++; @@ -464,8 +509,20 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, else if (scan->numberOfOrderBys == 0 && GistPageIsLeaf(page)) { /* - * Non-ordered scan, so report tuples in so->pageData[] + * Non-ordered scan, so report tuples in so->pageData[]. + * + * For multi-entry indexes, check the TID hash table to avoid + * returning duplicate heap TIDs. */ + if (so->tidHash) + { + bool found; + + gisttid_insert(so->tidHash, it->t_tid, &found); + if (found) + continue; /* already seen this TID */ + } + so->pageData[so->nPageData].heapPtr = it->t_tid; so->pageData[so->nPageData].recheck = recheck; so->pageData[so->nPageData].offnum = i; @@ -495,6 +552,20 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, oldcxt = MemoryContextSwitchTo(so->queueCxt); + /* + * For multi-entry ordered scans, skip heap tuples whose TIDs + * were already returned by getNextNearest. We use lookup + * (not insert) here: a TID must remain enqueueable until it + * is actually dequeued, so that the pairing heap can pick the + * copy with the smallest distance. + */ + if (GistPageIsLeaf(page) && + so->tidHash && gisttid_lookup(so->tidHash, it->t_tid)) + { + MemoryContextSwitchTo(oldcxt); + continue; + } + /* Create new GISTSearchItem for this item */ item = palloc(SizeOfGISTSearchItem(scan->numberOfOrderBys)); @@ -587,7 +658,27 @@ getNextNearest(IndexScanDesc scan) if (GISTSearchItemIsHeap(*item)) { - /* found a heap item at currently minimal distance */ + /* + * Found a heap item at currently minimal distance. + * + * For multi-entry ordered scans, deduplicate using tidHash to + * ensure each TID is returned only once. Duplicate entries + * for the same TID may exist in the queue with different + * distances; the pairing heap ensures we see the smallest + * distance first, and tidHash skips subsequent duplicates. + */ + if (so->tidHash) + { + bool found; + + gisttid_insert(so->tidHash, item->data.heap.heapPtr, &found); + if (found) + { + pfree(item); + continue; /* already returned this TID */ + } + } + scan->xs_heaptid = item->data.heap.heapPtr; scan->xs_recheck = item->data.heap.recheck; @@ -643,6 +734,30 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir) if (so->pageDataCxt) MemoryContextReset(so->pageDataCxt); + /* + * For multi-entry indexes, set up TID deduplication hash tables. + * We check column 0 for extractValueFn as a proxy for multi-entry. + */ + if (OidIsValid(so->giststate->extractValueFn[0].fn_oid)) + { + MemoryContext oldHashCxt; + + /* + * Create a dedicated context for the hash tables so they can + * be reset independently. + */ + if (so->tidHashCxt == so->giststate->scanCxt) + so->tidHashCxt = AllocSetContextCreate(so->giststate->scanCxt, + "GiST TID hash context", + ALLOCSET_DEFAULT_SIZES); + else + MemoryContextReset(so->tidHashCxt); + + oldHashCxt = MemoryContextSwitchTo(so->tidHashCxt); + so->tidHash = gisttid_create(so->tidHashCxt, 256, NULL); + MemoryContextSwitchTo(oldHashCxt); + } + fakeItem.blkno = GIST_ROOT_BLKNO; memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN)); gistScanPage(scan, &fakeItem, NULL, NULL, NULL); @@ -805,6 +920,15 @@ gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm) bool gistcanreturn(Relation index, int attno) { + /* + * Multi-entry indexes store decomposed sub-entries in key columns, not the + * original datum, so key columns cannot be returned in an index-only scan. + * INCLUDE columns are still returnable. + */ + if (attno <= IndexRelationGetNumberOfKeyAttributes(index) && + OidIsValid(index_getprocid(index, attno, GIST_EXTRACTVALUE_PROC))) + return false; + if (attno > IndexRelationGetNumberOfKeyAttributes(index) || OidIsValid(index_getprocid(index, attno, GIST_FETCH_PROC)) || !OidIsValid(index_getprocid(index, attno, GIST_COMPRESS_PROC))) diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c index c65f93abdae..4d9a4a148cd 100644 --- a/src/backend/access/gist/gistscan.c +++ b/src/backend/access/gist/gistscan.c @@ -96,6 +96,10 @@ gistbeginscan(Relation r, int nkeys, int norderbys) so->queue = NULL; so->queueCxt = giststate->scanCxt; /* see gistrescan */ + /* Initialize multi-entry TID dedup fields (NULL if not multi-entry) */ + so->tidHash = NULL; + so->tidHashCxt = giststate->scanCxt; + /* workspaces with size dependent on numberOfOrderBys: */ so->distances = palloc(sizeof(so->distances[0]) * scan->numberOfOrderBys); so->qual_ok = true; /* in case there are zero keys */ diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 0f58f61879f..42ab801f7bf 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -1007,6 +1007,74 @@ gistproperty(Oid index_oid, int attno, return true; } +/* + * gistExtractEntries -- extract multiple index entries from one heap tuple. + * + * Calls the opclass's extractValue function to decompose the indexed datum + * into multiple sub-entries. Returns an array of IndexTuples, one per + * sub-entry. + * + * Currently only single-key-column indexes are supported (enforced by + * initGISTstate). INCLUDE columns are preserved on every entry. + * If the datum is NULL or extractValue returns no entries, a single NULL + * index entry is produced. + */ +IndexTuple * +gistExtractEntries(GISTSTATE *giststate, Relation index, + Datum *values, bool *isnull, int32 *nentries) +{ + Datum *entries; + bool *nullFlags; + IndexTuple *result; + int i; + + Assert(IndexRelationGetNumberOfKeyAttributes(index) == 1); + + /* NULL datum produces a single NULL index entry */ + if (isnull[0]) + { + *nentries = 1; + result = palloc(sizeof(IndexTuple)); + result[0] = gistFormTuple(giststate, index, values, isnull, true); + return result; + } + + /* Call the opclass's extractValue function */ + nullFlags = NULL; + entries = (Datum *) + DatumGetPointer(FunctionCall3Coll(&giststate->extractValueFn[0], + giststate->supportCollation[0], + values[0], + PointerGetDatum(nentries), + PointerGetDatum(&nullFlags))); + + /* Handle empty or NULL result: produce a single NULL entry */ + if (entries == NULL || *nentries <= 0) + { + *nentries = 1; + values[0] = (Datum) 0; + isnull[0] = true; + result = palloc(sizeof(IndexTuple)); + result[0] = gistFormTuple(giststate, index, values, isnull, true); + return result; + } + + /* Create nullFlags array if the function didn't */ + if (nullFlags == NULL) + nullFlags = palloc0_array(bool, *nentries); + + /* Form one index tuple per extracted entry */ + result = palloc_array(IndexTuple, *nentries); + for (i = 0; i < *nentries; i++) + { + values[0] = entries[i]; + isnull[0] = nullFlags[i]; + result[i] = gistFormTuple(giststate, index, values, isnull, true); + } + + return result; +} + /* * This is a stratnum translation support function for GiST opclasses that use * the RT*StrategyNumber constants. diff --git a/src/backend/access/gist/gistvalidate.c b/src/backend/access/gist/gistvalidate.c index 56feb8d8400..bffb048b1a9 100644 --- a/src/backend/access/gist/gistvalidate.c +++ b/src/backend/access/gist/gistvalidate.c @@ -144,6 +144,11 @@ gistvalidate(Oid opclassoid) procform->amproclefttype == ANYOID && procform->amprocrighttype == ANYOID; break; + case GIST_EXTRACTVALUE_PROC: + ok = check_amproc_signature(procform->amproc, INTERNALOID, true, + 3, 3, INTERNALOID, INTERNALOID, + INTERNALOID); + break; default: ereport(INFO, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), @@ -265,7 +270,8 @@ gistvalidate(Oid opclassoid) if (i == GIST_DISTANCE_PROC || i == GIST_FETCH_PROC || i == GIST_COMPRESS_PROC || i == GIST_DECOMPRESS_PROC || i == GIST_OPTIONS_PROC || i == GIST_SORTSUPPORT_PROC || - i == GIST_TRANSLATE_CMPTYPE_PROC) + i == GIST_TRANSLATE_CMPTYPE_PROC || + i == GIST_EXTRACTVALUE_PROC) continue; /* optional methods */ ereport(INFO, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), @@ -337,6 +343,7 @@ gistadjustmembers(Oid opfamilyoid, case GIST_OPTIONS_PROC: case GIST_SORTSUPPORT_PROC: case GIST_TRANSLATE_CMPTYPE_PROC: + case GIST_EXTRACTVALUE_PROC: /* Optional, so force it to be a soft family dependency */ op->ref_is_hard = false; op->ref_is_family = true; diff --git a/src/include/access/gist.h b/src/include/access/gist.h index 9b385b13a88..68f8eca550e 100644 --- a/src/include/access/gist.h +++ b/src/include/access/gist.h @@ -41,7 +41,8 @@ #define GIST_OPTIONS_PROC 10 #define GIST_SORTSUPPORT_PROC 11 #define GIST_TRANSLATE_CMPTYPE_PROC 12 -#define GISTNProcs 12 +#define GIST_EXTRACTVALUE_PROC 13 +#define GISTNProcs 13 /* * Page opaque data in a GiST index page. diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index 44514f1cb8d..5caf7a1956e 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -92,6 +92,7 @@ typedef struct GISTSTATE FmgrInfo equalFn[INDEX_MAX_KEYS]; FmgrInfo distanceFn[INDEX_MAX_KEYS]; FmgrInfo fetchFn[INDEX_MAX_KEYS]; + FmgrInfo extractValueFn[INDEX_MAX_KEYS]; /* Collations to pass to the support functions */ Oid supportCollation[INDEX_MAX_KEYS]; @@ -156,6 +157,14 @@ typedef struct GISTScanOpaqueData GISTSTATE *giststate; /* index information, see above */ Oid *orderByTypes; /* datatypes of ORDER BY expressions */ + /* + * For multi-entry indexes: hash table for TID deduplication. Each heap + * tuple produces multiple index entries, so we track which TIDs have been + * returned. NULL for standard (non-multi-entry) indexes. + */ + struct gisttid_hash *tidHash; + MemoryContext tidHashCxt; /* context holding the hash table */ + pairingheap *queue; /* queue of unvisited items */ MemoryContext queueCxt; /* context holding the queue */ bool qual_ok; /* false if qual can never be satisfied */ @@ -547,6 +556,11 @@ extern void gistSplitByKey(Relation r, Page page, IndexTuple *itup, extern IndexBuildResult *gistbuild(Relation heap, Relation index, struct IndexInfo *indexInfo); +/* gistutil.c */ +extern IndexTuple *gistExtractEntries(GISTSTATE *giststate, Relation index, + Datum *values, bool *isnull, + int32 *nentries); + /* gistbuildbuffers.c */ extern GISTBuildBuffers *gistInitBuildBuffers(int pagesPerBuffer, int levelStep, int maxLevel); -- 2.50.1 (Apple Git-155)