From 0fcccd4785b8015326d24ef8b7205b44704da9b3 Mon Sep 17 00:00:00 2001 From: Mikhail Nikalayeu Date: Sat, 21 Dec 2024 18:36:10 +0100 Subject: [PATCH v26 3/8] Add STIR access method and flags related to auxiliary indexes This patch provides infrastructure for following enhancements to concurrent index builds by: - ii_Auxiliary in IndexInfo: indicates that an index is an auxiliary index used during concurrent index build - validate_index in IndexVacuumInfo: set if index_bulk_delete called during the validation phase of concurrent index build - STIR(Short-Term Index Replacement) access method is introduced, intended solely for short-lived, auxiliary usage STIR functions designed as an ephemeral helper during concurrent index builds, temporarily storing TIDs without providing the full features of a typical access method. As such, it raises warnings or errors when accessed outside its specialized usage path. Planned to be used in following commits. --- contrib/pgstattuple/pgstattuple.c | 3 + src/backend/access/Makefile | 2 +- src/backend/access/heap/vacuumlazy.c | 2 + src/backend/access/meson.build | 1 + src/backend/access/stir/Makefile | 18 + src/backend/access/stir/meson.build | 5 + src/backend/access/stir/stir.c | 581 +++++++++++++++++++++++ src/backend/catalog/index.c | 1 + src/backend/catalog/toasting.c | 1 + src/backend/commands/analyze.c | 1 + src/backend/commands/vacuumparallel.c | 1 + src/backend/nodes/makefuncs.c | 1 + src/include/access/genam.h | 1 + src/include/access/reloptions.h | 3 +- src/include/access/stir.h | 117 +++++ src/include/catalog/pg_am.dat | 3 + src/include/catalog/pg_opclass.dat | 4 + src/include/catalog/pg_opfamily.dat | 2 + src/include/catalog/pg_proc.dat | 4 + src/include/nodes/execnodes.h | 7 +- src/include/utils/index_selfuncs.h | 8 + src/test/regress/expected/amutils.out | 8 +- src/test/regress/expected/opr_sanity.out | 7 +- src/test/regress/expected/psql.out | 24 +- 24 files changed, 786 insertions(+), 19 deletions(-) create mode 100644 src/backend/access/stir/Makefile create mode 100644 src/backend/access/stir/meson.build create mode 100644 src/backend/access/stir/stir.c create mode 100644 src/include/access/stir.h diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c index 6a7f8cb4a7c..5b5984e3aa2 100644 --- a/contrib/pgstattuple/pgstattuple.c +++ b/contrib/pgstattuple/pgstattuple.c @@ -285,6 +285,9 @@ pgstat_relation(Relation rel, FunctionCallInfo fcinfo) case SPGIST_AM_OID: err = "spgist index"; break; + case STIR_AM_OID: + err = "stir index"; + break; case BRIN_AM_OID: err = "brin index"; break; diff --git a/src/backend/access/Makefile b/src/backend/access/Makefile index 1932d11d154..cd6524a54ab 100644 --- a/src/backend/access/Makefile +++ b/src/backend/access/Makefile @@ -9,6 +9,6 @@ top_builddir = ../../.. include $(top_builddir)/src/Makefile.global SUBDIRS = brin common gin gist hash heap index nbtree rmgrdesc spgist \ - sequence table tablesample transam + stir sequence table tablesample transam include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index deb9a3dc0d1..0b6ffd6ec6e 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -3121,6 +3121,7 @@ lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat, ivinfo.message_level = DEBUG2; ivinfo.num_heap_tuples = reltuples; ivinfo.strategy = vacrel->bstrategy; + ivinfo.validate_index = false; /* * Update error traceback information. @@ -3172,6 +3173,7 @@ lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat, ivinfo.num_heap_tuples = reltuples; ivinfo.strategy = vacrel->bstrategy; + ivinfo.validate_index = false; /* * Update error traceback information. diff --git a/src/backend/access/meson.build b/src/backend/access/meson.build index 7a2d0ddb689..a156cddff35 100644 --- a/src/backend/access/meson.build +++ b/src/backend/access/meson.build @@ -11,6 +11,7 @@ subdir('nbtree') subdir('rmgrdesc') subdir('sequence') subdir('spgist') +subdir('stir') subdir('table') subdir('tablesample') subdir('transam') diff --git a/src/backend/access/stir/Makefile b/src/backend/access/stir/Makefile new file mode 100644 index 00000000000..fae5898b8d7 --- /dev/null +++ b/src/backend/access/stir/Makefile @@ -0,0 +1,18 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for access/stir +# +# IDENTIFICATION +# src/backend/access/stir/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/backend/access/stir +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global + +OBJS = \ + stir.o + +include $(top_srcdir)/src/backend/common.mk \ No newline at end of file diff --git a/src/backend/access/stir/meson.build b/src/backend/access/stir/meson.build new file mode 100644 index 00000000000..39c6eca848d --- /dev/null +++ b/src/backend/access/stir/meson.build @@ -0,0 +1,5 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +backend_sources += files( + 'stir.c', +) \ No newline at end of file diff --git a/src/backend/access/stir/stir.c b/src/backend/access/stir/stir.c new file mode 100644 index 00000000000..2e083d952d8 --- /dev/null +++ b/src/backend/access/stir/stir.c @@ -0,0 +1,581 @@ +/*------------------------------------------------------------------------- + * + * stir.c + * Implementation of Short-Term Index Replacement. + * + * STIR is a specialized access method type designed for temporary storage + * of TID values during concurernt index build operations. + * + * The typical lifecycle of a STIR index is: + * 1. created as an auxiliary index for CIC/RIC + * 2. accepts inserts for a period + * 3. stirbulkdelete called during index validation phase + * 5. gets dropped + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/access/stir/stir.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/stir.h" +#include "miscadmin.h" +#include "access/amvalidate.h" +#include "access/htup_details.h" +#include "access/tableam.h" +#include "catalog/index.h" +#include "catalog/pg_amop.h" +#include "catalog/pg_opclass.h" +#include "catalog/pg_opfamily.h" +#include "commands/vacuum.h" +#include "storage/bufmgr.h" +#include "utils/catcache.h" +#include "utils/fmgrprotos.h" +#include "utils/index_selfuncs.h" +#include "utils/memutils.h" +#include "utils/regproc.h" +#include "utils/syscache.h" + +/* + * Stir handler function: return IndexAmRoutine with access method parameters + * and callbacks. + */ +Datum +stirhandler(PG_FUNCTION_ARGS) +{ + IndexAmRoutine *amroutine = makeNode(IndexAmRoutine); + + /* Set STIR-specific strategy and procedure numbers */ + amroutine->amstrategies = STIR_NSTRATEGIES; + amroutine->amsupport = STIR_NPROC; + amroutine->amoptsprocnum = STIR_OPTIONS_PROC; + + /* STIR doesn't support most index operations */ + amroutine->amcanorder = false; + amroutine->amcanorderbyop = false; + amroutine->amcanbackward = false; + amroutine->amcanunique = false; + amroutine->amcanmulticol = true; + amroutine->amoptionalkey = true; + amroutine->amsearcharray = false; + amroutine->amsearchnulls = false; + amroutine->amstorage = false; + amroutine->amclusterable = false; + amroutine->ampredlocks = false; + amroutine->amcanparallel = false; + amroutine->amcanbuildparallel = false; + amroutine->amcaninclude = true; + amroutine->amusemaintenanceworkmem = false; + amroutine->amparallelvacuumoptions = + VACUUM_OPTION_PARALLEL_BULKDEL | VACUUM_OPTION_PARALLEL_CLEANUP; + amroutine->amkeytype = InvalidOid; + + /* Set up function callbacks */ + amroutine->ambuild = stirbuild; + amroutine->ambuildempty = stirbuildempty; + amroutine->aminsert = stirinsert; + amroutine->aminsertcleanup = NULL; + amroutine->ambulkdelete = stirbulkdelete; + amroutine->amvacuumcleanup = stirvacuumcleanup; + amroutine->amcanreturn = NULL; + amroutine->amcostestimate = stircostestimate; + amroutine->amoptions = stiroptions; + amroutine->amproperty = NULL; + amroutine->ambuildphasename = NULL; + amroutine->amvalidate = stirvalidate; + amroutine->amadjustmembers = NULL; + amroutine->ambeginscan = stirbeginscan; + amroutine->amrescan = stirrescan; + amroutine->amgettuple = NULL; + amroutine->amgetbitmap = NULL; + amroutine->amendscan = stirendscan; + amroutine->ammarkpos = NULL; + amroutine->amrestrpos = NULL; + amroutine->amestimateparallelscan = NULL; + amroutine->aminitparallelscan = NULL; + amroutine->amparallelrescan = NULL; + + PG_RETURN_POINTER(amroutine); +} + +/* + * Validates operator class for STIR index. + * + * STIR is not an real index, so validatio may be skipped. + * But we do it just for consistency. + */ +bool +stirvalidate(Oid opclassoid) +{ + bool result = true; + HeapTuple classtup; + Form_pg_opclass classform; + Oid opfamilyoid; + HeapTuple familytup; + Form_pg_opfamily familyform; + char *opfamilyname; + CatCList *proclist, + *oprlist; + int i; + + /* Fetch opclass information */ + classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclassoid)); + if (!HeapTupleIsValid(classtup)) + elog(ERROR, "cache lookup failed for operator class %u", opclassoid); + classform = (Form_pg_opclass) GETSTRUCT(classtup); + + opfamilyoid = classform->opcfamily; + + + /* Fetch opfamily information */ + familytup = SearchSysCache1(OPFAMILYOID, ObjectIdGetDatum(opfamilyoid)); + if (!HeapTupleIsValid(familytup)) + elog(ERROR, "cache lookup failed for operator family %u", opfamilyoid); + familyform = (Form_pg_opfamily) GETSTRUCT(familytup); + + opfamilyname = NameStr(familyform->opfname); + + /* Fetch all operators and support functions of the opfamily */ + oprlist = SearchSysCacheList1(AMOPSTRATEGY, ObjectIdGetDatum(opfamilyoid)); + proclist = SearchSysCacheList1(AMPROCNUM, ObjectIdGetDatum(opfamilyoid)); + + /* Check individual operators */ + for (i = 0; i < oprlist->n_members; i++) + { + HeapTuple oprtup = &oprlist->members[i]->tuple; + Form_pg_amop oprform = (Form_pg_amop) GETSTRUCT(oprtup); + + /* Check it's allowed strategy for stir */ + if (oprform->amopstrategy < 1 || + oprform->amopstrategy > STIR_NSTRATEGIES) + { + ereport(INFO, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("stir opfamily %s contains operator %s with invalid strategy number %d", + opfamilyname, + format_operator(oprform->amopopr), + oprform->amopstrategy))); + result = false; + } + + /* stir doesn't support ORDER BY operators */ + if (oprform->amoppurpose != AMOP_SEARCH || + OidIsValid(oprform->amopsortfamily)) + { + ereport(INFO, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("stir opfamily %s contains invalid ORDER BY specification for operator %s", + opfamilyname, + format_operator(oprform->amopopr)))); + result = false; + } + + /* Check operator signature --- same for all stir strategies */ + if (!check_amop_signature(oprform->amopopr, BOOLOID, + oprform->amoplefttype, + oprform->amoprighttype)) + { + ereport(INFO, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("stir opfamily %s contains operator %s with wrong signature", + opfamilyname, + format_operator(oprform->amopopr)))); + result = false; + } + } + + + ReleaseCatCacheList(proclist); + ReleaseCatCacheList(oprlist); + ReleaseSysCache(familytup); + ReleaseSysCache(classtup); + + return result; +} + + +/* + * Initialize metapage of a STIR index. + * The skipInserts flag determines if new inserts will be accepted or skipped. + */ +void +StirFillMetapage(Relation index, Page metaPage, bool skipInserts) +{ + StirMetaPageData *metadata; + + StirInitPage(metaPage, STIR_META); + metadata = StirPageGetMeta(metaPage); + memset(metadata, 0, sizeof(StirMetaPageData)); + metadata->magickNumber = STIR_MAGICK_NUMBER; + metadata->skipInserts = skipInserts; + ((PageHeader) metaPage)->pd_lower += sizeof(StirMetaPageData); +} + +/* + * Create and initialize the metapage for a STIR index. + * This is called during index creation. + */ +void +StirInitMetapage(Relation index, ForkNumber forknum) +{ + Buffer metaBuffer; + Page metaPage; + + Assert(!RelationNeedsWAL(index)); + /* + * Make a new page; since it is first page it should be associated with + * block number 0 (STIR_METAPAGE_BLKNO). No need to hold the extension + * lock because there cannot be concurrent inserters yet. + */ + metaBuffer = ReadBufferExtended(index, forknum, P_NEW, RBM_NORMAL, NULL); + START_CRIT_SECTION(); + LockBuffer(metaBuffer, BUFFER_LOCK_EXCLUSIVE); + Assert(BufferGetBlockNumber(metaBuffer) == STIR_METAPAGE_BLKNO); + + metaPage = BufferGetPage(metaBuffer); + StirFillMetapage(index, metaPage, forknum == INIT_FORKNUM); + + MarkBufferDirty(metaBuffer); + END_CRIT_SECTION(); + UnlockReleaseBuffer(metaBuffer); +} + +/* + * Initialize any page of a stir index. + */ +void +StirInitPage(Page page, uint16 flags) +{ + StirPageOpaque opaque; + + PageInit(page, BLCKSZ, sizeof(StirPageOpaqueData)); + + opaque = StirPageGetOpaque(page); + opaque->flags = flags; + opaque->stir_page_id = STIR_PAGE_ID; +} + +/* + * Add a tuple to a STIR page. Returns false if tuple doesn't fit. + * The tuple is added to the end of the page. + */ +static bool +StirPageAddItem(Page page, StirTuple *tuple) +{ + StirTuple *itup; + StirPageOpaque opaque; + Pointer ptr; + + /* We shouldn't be pointed to an invalid page */ + Assert(!PageIsNew(page)); + + /* Does new tuple fit on the page? */ + if (StirPageGetFreeSpace(state, page) < sizeof(StirTuple)) + return false; + + /* Copy new tuple to the end of page */ + opaque = StirPageGetOpaque(page); + itup = StirPageGetTuple(page, opaque->maxoff + 1); + memcpy((Pointer) itup, (Pointer) tuple, sizeof(StirTuple)); + + /* Adjust maxoff and pd_lower */ + opaque->maxoff++; + ptr = (Pointer) StirPageGetTuple(page, opaque->maxoff + 1); + ((PageHeader) page)->pd_lower = ptr - page; + + /* Assert we didn't overrun available space */ + Assert(((PageHeader) page)->pd_lower <= ((PageHeader) page)->pd_upper); + return true; +} + +/* + * Insert a new tuple into a STIR index. + */ +bool +stirinsert(Relation index, Datum *values, bool *isnull, + ItemPointer ht_ctid, Relation heapRel, + IndexUniqueCheck checkUnique, + bool indexUnchanged, + struct IndexInfo *indexInfo) +{ + StirTuple *itup; + MemoryContext oldCtx; + MemoryContext insertCtx; + StirMetaPageData *metaData; + Buffer buffer, + metaBuffer; + Page page; + uint16 blkNo; + + /* Create temporary context for insert operation */ + insertCtx = AllocSetContextCreate(CurrentMemoryContext, + "Stir insert temporary context", + ALLOCSET_DEFAULT_SIZES); + + oldCtx = MemoryContextSwitchTo(insertCtx); + + /* Create new tuple with heap pointer */ + itup = (StirTuple *) palloc0(sizeof(StirTuple)); + itup->heapPtr = *ht_ctid; + + Assert(!RelationNeedsWAL(index)); + metaBuffer = ReadBuffer(index, STIR_METAPAGE_BLKNO); + + for (;;) + { + LockBuffer(metaBuffer, BUFFER_LOCK_SHARE); + metaData = StirPageGetMeta(BufferGetPage(metaBuffer)); + /* Check if inserts are allowed */ + if (metaData->skipInserts) + { + UnlockReleaseBuffer(metaBuffer); + return false; + } + blkNo = metaData->lastBlkNo; + /* Don't hold metabuffer lock while doing insert */ + LockBuffer(metaBuffer, BUFFER_LOCK_UNLOCK); + + if (blkNo > 0) + { + buffer = ReadBuffer(index, blkNo); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + START_CRIT_SECTION(); + + page = BufferGetPage(buffer); + + Assert(!PageIsNew(page)); + + /* Try to add tuple to existing page */ + if (StirPageAddItem(page, itup)) + { + /* Success! Apply the change, clean up, and exit */ + MarkBufferDirty(buffer); + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buffer); + ReleaseBuffer(metaBuffer); + MemoryContextSwitchTo(oldCtx); + MemoryContextDelete(insertCtx); + return false; + } + + END_CRIT_SECTION(); + UnlockReleaseBuffer(buffer); + } + + /* Need to add new page - get exclusive lock on meta page */ + LockBuffer(metaBuffer, BUFFER_LOCK_EXCLUSIVE); + + metaData = StirPageGetMeta(BufferGetPage(metaBuffer)); + /* Check if another backend already extended the index */ + + if (blkNo != metaData->lastBlkNo) + { + Assert(blkNo < metaData->lastBlkNo); + /* Someone else inserted the new page into the index, lets try again */ + LockBuffer(metaBuffer, BUFFER_LOCK_UNLOCK); + continue; + } + else + { + /* Must extend the file */ + buffer = ExtendBufferedRel(BMR_REL(index), MAIN_FORKNUM, NULL, + EB_LOCK_FIRST); + page = BufferGetPage(buffer); + START_CRIT_SECTION(); + + StirInitPage(page, 0); + + if (!StirPageAddItem(page, itup)) + { + /* We shouldn't be here since we're inserting to an empty page */ + elog(ERROR, "could not add new stir tuple to empty page"); + } + + /* Update meta page with new last block number */ + metaData->lastBlkNo = BufferGetBlockNumber(buffer); + + MarkBufferDirty(metaBuffer); + MarkBufferDirty(buffer); + + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buffer); + UnlockReleaseBuffer(metaBuffer); + + MemoryContextSwitchTo(oldCtx); + MemoryContextDelete(insertCtx); + + return false; + } + } +} + +/* + * STIR doesn't support scans - these functions all error out + */ +IndexScanDesc +stirbeginscan(Relation r, int nkeys, int norderbys) +{ + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("\"%s\" is not a not implemented", __func__))); +} + +void +stirrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, + ScanKey orderbys, int norderbys) +{ + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("\"%s\" is not a not implemented", __func__))); +} + +void stirendscan(IndexScanDesc scan) +{ + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("\"%s\" is not a not implemented", __func__))); +} + +/* + * Build a STIR index - only allowed for auxiliary indexes. + * Just initializes the meta page without any heap scans. + */ +IndexBuildResult * +stirbuild(Relation heap, Relation index, + struct IndexInfo *indexInfo) +{ + IndexBuildResult *result; + + if (!indexInfo->ii_Auxiliary) + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("STIR indexes are not supported to be built"))); + + StirInitMetapage(index, MAIN_FORKNUM); + + result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult)); + result->heap_tuples = 0; + result->index_tuples = 0; + return result; +} + +void stirbuildempty(Relation index) +{ + StirInitMetapage(index, INIT_FORKNUM); +} + +IndexBulkDeleteResult * +stirbulkdelete(IndexVacuumInfo *info, + IndexBulkDeleteResult *stats, + IndexBulkDeleteCallback callback, + void *callback_state) +{ + Relation index = info->index; + BlockNumber blkno, npages; + Buffer buffer; + Page page; + + /* For normal VACUUM, mark to skip inserts and warn about index drop needed */ + if (!info->validate_index) + { + StirMarkAsSkipInserts(index); + + ereport(WARNING, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("\"%s\" is not a not implemented, seems like this index need to be dropped", __func__))); + return NULL; + } + + if (stats == NULL) + stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); + + /* + * Iterate over the pages. We don't care about concurrently added pages, + * because index is marked as not-ready for that momment and index not + * used for insert. + */ + npages = RelationGetNumberOfBlocks(index); + for (blkno = STIR_HEAD_BLKNO; blkno < npages; blkno++) + { + StirTuple *itup, *itupEnd; + + vacuum_delay_point(false); + + buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno, + RBM_NORMAL, info->strategy); + + LockBuffer(buffer, BUFFER_LOCK_SHARE); + page = BufferGetPage(buffer); + + if (PageIsNew(page)) + { + UnlockReleaseBuffer(buffer); + continue; + } + + itup = StirPageGetTuple(page, FirstOffsetNumber); + itupEnd = StirPageGetTuple(page, OffsetNumberNext(StirPageGetMaxOffset(page))); + while (itup < itupEnd) + { + /* Do we have to delete this tuple? */ + if (callback(&itup->heapPtr, callback_state)) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("we never delete in stir"))); + } + + itup = StirPageGetNextTuple(itup); + } + + UnlockReleaseBuffer(buffer); + } + + return stats; +} + +/* + * Mark a STIR index to skip future inserts + */ +void +StirMarkAsSkipInserts(Relation index) +{ + StirMetaPageData *metaData; + Buffer metaBuffer; + Page metaPage; + + Assert(!RelationNeedsWAL(index)); + metaBuffer = ReadBuffer(index, STIR_METAPAGE_BLKNO); + LockBuffer(metaBuffer, BUFFER_LOCK_EXCLUSIVE); + START_CRIT_SECTION(); + + metaPage = BufferGetPage(metaBuffer); + metaData = StirPageGetMeta(metaPage); + + if (!metaData->skipInserts) + { + metaData->skipInserts = true; + MarkBufferDirty(metaBuffer); + } + END_CRIT_SECTION(); + UnlockReleaseBuffer(metaBuffer); +} + +IndexBulkDeleteResult * +stirvacuumcleanup(IndexVacuumInfo *info, + IndexBulkDeleteResult *stats) +{ + StirMarkAsSkipInserts(info->index); + ereport(WARNING, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("\"%s\" is not a not implemented, seems like this index need to be dropped", __func__))); + return NULL; +} + +bytea * +stiroptions(Datum reloptions, bool validate) +{ + return NULL; +} + +void +stircostestimate(PlannerInfo *root, IndexPath *path, + double loop_count, Cost *indexStartupCost, + Cost *indexTotalCost, Selectivity *indexSelectivity, + double *indexCorrelation, double *indexPages) +{ + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("\"%s\" is not a not implemented", __func__))); +} diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 5d9db167e59..8e509a51c11 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -3411,6 +3411,7 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot) ivinfo.message_level = DEBUG2; ivinfo.num_heap_tuples = heapRelation->rd_rel->reltuples; ivinfo.strategy = NULL; + ivinfo.validate_index = true; /* * Encode TIDs as int8 values for the sort, rather than directly sorting diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c index 874a8fc89ad..9cc4f06da9f 100644 --- a/src/backend/catalog/toasting.c +++ b/src/backend/catalog/toasting.c @@ -307,6 +307,7 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, indexInfo->ii_ParallelWorkers = 0; indexInfo->ii_Am = BTREE_AM_OID; indexInfo->ii_AmCache = NULL; + indexInfo->ii_Auxiliary = false; indexInfo->ii_Context = CurrentMemoryContext; collationIds[0] = InvalidOid; diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 25089fae3e0..89721607f1f 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -719,6 +719,7 @@ do_analyze_rel(Relation onerel, const VacuumParams params, ivinfo.message_level = elevel; ivinfo.num_heap_tuples = onerel->rd_rel->reltuples; ivinfo.strategy = vac_strategy; + ivinfo.validate_index = false; stats = index_vacuum_cleanup(&ivinfo, NULL); diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c index 0feea1d30ec..582db77ddc0 100644 --- a/src/backend/commands/vacuumparallel.c +++ b/src/backend/commands/vacuumparallel.c @@ -884,6 +884,7 @@ parallel_vacuum_process_one_index(ParallelVacuumState *pvs, Relation indrel, ivinfo.estimated_count = pvs->shared->estimated_count; ivinfo.num_heap_tuples = pvs->shared->reltuples; ivinfo.strategy = pvs->bstrategy; + ivinfo.validate_index = false; /* Update error traceback information */ pvs->indname = pstrdup(RelationGetRelationName(indrel)); diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c index e2d9e9be41a..e97e0943f5b 100644 --- a/src/backend/nodes/makefuncs.c +++ b/src/backend/nodes/makefuncs.c @@ -875,6 +875,7 @@ makeIndexInfo(int numattrs, int numkeyattrs, Oid amoid, List *expressions, /* initialize index-build state to default */ n->ii_BrokenHotChain = false; n->ii_ParallelWorkers = 0; + n->ii_Auxiliary = false; /* set up for possible use by index AM */ n->ii_Am = amoid; diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 9200a22bd9f..431a2fae4ad 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -77,6 +77,7 @@ typedef struct IndexVacuumInfo bool estimated_count; /* num_heap_tuples is an estimate */ int message_level; /* ereport level for progress messages */ double num_heap_tuples; /* tuples remaining in heap */ + bool validate_index; /* validating concurrently built index? */ BufferAccessStrategy strategy; /* access strategy for reads */ } IndexVacuumInfo; diff --git a/src/include/access/reloptions.h b/src/include/access/reloptions.h index a604a4702c3..3127731f9c6 100644 --- a/src/include/access/reloptions.h +++ b/src/include/access/reloptions.h @@ -51,8 +51,9 @@ typedef enum relopt_kind RELOPT_KIND_VIEW = (1 << 9), RELOPT_KIND_BRIN = (1 << 10), RELOPT_KIND_PARTITIONED = (1 << 11), + RELOPT_KIND_STIR = (1 << 12), /* if you add a new kind, make sure you update "last_default" too */ - RELOPT_KIND_LAST_DEFAULT = RELOPT_KIND_PARTITIONED, + RELOPT_KIND_LAST_DEFAULT = RELOPT_KIND_STIR, /* some compilers treat enums as signed ints, so we can't use 1 << 31 */ RELOPT_KIND_MAX = (1 << 30) } relopt_kind; diff --git a/src/include/access/stir.h b/src/include/access/stir.h new file mode 100644 index 00000000000..9943c42a97e --- /dev/null +++ b/src/include/access/stir.h @@ -0,0 +1,117 @@ +/*------------------------------------------------------------------------- + * + * stir.h + * header file for postgres stir access method implementation. + * + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * src/include/access/stir.h + * + *------------------------------------------------------------------------- + */ +#ifndef _STIR_H_ +#define _STIR_H_ + +#include "amapi.h" +#include "xlog.h" +#include "generic_xlog.h" +#include "itup.h" +#include "fmgr.h" +#include "nodes/pathnodes.h" + +/* Support procedures numbers */ +#define STIR_NPROC 0 + +/* Scan strategies */ +#define STIR_NSTRATEGIES 1 + +#define STIR_OPTIONS_PROC 0 + +/* Macros for accessing stir page structures */ +#define StirPageGetOpaque(page) ((StirPageOpaque) PageGetSpecialPointer(page)) +#define StirPageGetMaxOffset(page) (StirPageGetOpaque(page)->maxoff) +#define StirPageIsMeta(page) \ + ((StirPageGetOpaque(page)->flags & STIR_META) != 0) +#define StirPageGetData(page) ((StirTuple *)PageGetContents(page)) +#define StirPageGetTuple(page, offset) \ + ((StirTuple *)(PageGetContents(page) \ + + sizeof(StirTuple) * ((offset) - 1))) +#define StirPageGetNextTuple(tuple) \ + ((StirTuple *)((Pointer)(tuple) + sizeof(StirTuple))) + + + +/* Preserved page numbers */ +#define STIR_METAPAGE_BLKNO (0) +#define STIR_HEAD_BLKNO (1) /* first data page */ + + +/* Opaque for stir pages */ +typedef struct StirPageOpaqueData +{ + OffsetNumber maxoff; /* number of index tuples on page */ + uint16 flags; /* see bit definitions below */ + uint16 unused; /* placeholder to force maxaligning of size of + * StirPageOpaqueData and to place + * stir_page_id exactly at the end of page */ + uint16 stir_page_id; /* for identification of STIR indexes */ +} StirPageOpaqueData; + +/* Stir page flags */ +#define STIR_META (1<<0) + +typedef StirPageOpaqueData *StirPageOpaque; + +#define STIR_PAGE_ID 0xFF84 + +/* Metadata of stir index */ +typedef struct StirMetaPageData +{ + uint32 magickNumber; + uint16 lastBlkNo; + bool skipInserts; /* should we just exit without any inserts */ +} StirMetaPageData; + +/* Magic number to distinguish stir pages from others */ +#define STIR_MAGICK_NUMBER (0xDBAC0DEF) + +#define StirPageGetMeta(page) ((StirMetaPageData *) PageGetContents(page)) + +typedef struct StirTuple +{ + ItemPointerData heapPtr; +} StirTuple; + +#define StirPageGetFreeSpace(state, page) \ + (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \ + - StirPageGetMaxOffset(page) * (sizeof(StirTuple)) \ + - MAXALIGN(sizeof(StirPageOpaqueData))) + +extern void StirFillMetapage(Relation index, Page metaPage, bool skipInserts); +extern void StirInitMetapage(Relation index, ForkNumber forknum); +extern void StirInitPage(Page page, uint16 flags); +extern void StirMarkAsSkipInserts(Relation index); + +/* index access method interface functions */ +extern bool stirvalidate(Oid opclassoid); +extern bool stirinsert(Relation index, Datum *values, bool *isnull, + ItemPointer ht_ctid, Relation heapRel, + IndexUniqueCheck checkUnique, + bool indexUnchanged, + struct IndexInfo *indexInfo); +extern IndexScanDesc stirbeginscan(Relation r, int nkeys, int norderbys); +extern void stirrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, + ScanKey orderbys, int norderbys); +extern void stirendscan(IndexScanDesc scan); +extern IndexBuildResult *stirbuild(Relation heap, Relation index, + struct IndexInfo *indexInfo); +extern void stirbuildempty(Relation index); +extern IndexBulkDeleteResult *stirbulkdelete(IndexVacuumInfo *info, + IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, + void *callback_state); +extern IndexBulkDeleteResult *stirvacuumcleanup(IndexVacuumInfo *info, + IndexBulkDeleteResult *stats); +extern bytea *stiroptions(Datum reloptions, bool validate); + +#endif \ No newline at end of file diff --git a/src/include/catalog/pg_am.dat b/src/include/catalog/pg_am.dat index 26d15928a15..a5ecf9208ad 100644 --- a/src/include/catalog/pg_am.dat +++ b/src/include/catalog/pg_am.dat @@ -33,5 +33,8 @@ { oid => '3580', oid_symbol => 'BRIN_AM_OID', descr => 'block range index (BRIN) access method', amname => 'brin', amhandler => 'brinhandler', amtype => 'i' }, +{ oid => '5555', oid_symbol => 'STIR_AM_OID', + descr => 'short term index replacement access method', + amname => 'stir', amhandler => 'stirhandler', amtype => 'i' }, ] diff --git a/src/include/catalog/pg_opclass.dat b/src/include/catalog/pg_opclass.dat index 4a9624802aa..6227c5658fc 100644 --- a/src/include/catalog/pg_opclass.dat +++ b/src/include/catalog/pg_opclass.dat @@ -488,4 +488,8 @@ # no brin opclass for the geometric types except box +# allow any types for STIR +{ opcmethod => 'stir', oid_symbol => 'ANY_STIR_OPS_OID', opcname => 'stir_ops', + opcfamily => 'stir/any_ops', opcintype => 'any'}, + ] diff --git a/src/include/catalog/pg_opfamily.dat b/src/include/catalog/pg_opfamily.dat index f7dcb96b43c..838ad32c932 100644 --- a/src/include/catalog/pg_opfamily.dat +++ b/src/include/catalog/pg_opfamily.dat @@ -304,5 +304,7 @@ opfmethod => 'hash', opfname => 'multirange_ops' }, { oid => '6158', opfmethod => 'gist', opfname => 'multirange_ops' }, +{ oid => '5558', + opfmethod => 'stir', opfname => 'any_ops' }, ] diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 5cf9e12fcb9..feb75e0dc50 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -935,6 +935,10 @@ proname => 'brinhandler', provolatile => 'v', prorettype => 'index_am_handler', proargtypes => 'internal', prosrc => 'brinhandler' }, +{ oid => '5556', descr => 'short term index replacement access method handler', + proname => 'stirhandler', provolatile => 'v', + prorettype => 'index_am_handler', proargtypes => 'internal', + prosrc => 'stirhandler' }, { oid => '3952', descr => 'brin: standalone scan new table pages', proname => 'brin_summarize_new_values', provolatile => 'v', proparallel => 'u', prorettype => 'int4', proargtypes => 'regclass', diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 18ae8f0d4bb..84b32319fb3 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -155,8 +155,8 @@ typedef struct ExprState * entries for a particular index. Used for both index_build and * retail creation of index entries. * - * ii_Concurrent, ii_BrokenHotChain, and ii_ParallelWorkers are used only - * during index build; they're conventionally zeroed otherwise. + * ii_Concurrent, ii_BrokenHotChain, ii_Auxiliary and ii_ParallelWorkers + * are used only during index build; they're conventionally zeroed otherwise * ---------------- */ typedef struct IndexInfo @@ -216,7 +216,8 @@ typedef struct IndexInfo bool ii_WithoutOverlaps; /* # of workers requested (excludes leader) */ int ii_ParallelWorkers; - + /* is auxiliary for concurrent index build? */ + bool ii_Auxiliary; /* Oid of index AM */ Oid ii_Am; /* private cache area for index AM */ diff --git a/src/include/utils/index_selfuncs.h b/src/include/utils/index_selfuncs.h index 6c64db6d456..e0d939d6857 100644 --- a/src/include/utils/index_selfuncs.h +++ b/src/include/utils/index_selfuncs.h @@ -62,6 +62,14 @@ extern void spgcostestimate(struct PlannerInfo *root, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages); +extern void stircostestimate(struct PlannerInfo *root, + struct IndexPath *path, + double loop_count, + Cost *indexStartupCost, + Cost *indexTotalCost, + Selectivity *indexSelectivity, + double *indexCorrelation, + double *indexPages); extern void gincostestimate(struct PlannerInfo *root, struct IndexPath *path, double loop_count, diff --git a/src/test/regress/expected/amutils.out b/src/test/regress/expected/amutils.out index 7ab6113c619..92c033a2010 100644 --- a/src/test/regress/expected/amutils.out +++ b/src/test/regress/expected/amutils.out @@ -173,7 +173,13 @@ select amname, prop, pg_indexam_has_property(a.oid, prop) as p spgist | can_exclude | t spgist | can_include | t spgist | bogus | -(36 rows) + stir | can_order | f + stir | can_unique | f + stir | can_multi_col | t + stir | can_exclude | f + stir | can_include | t + stir | bogus | +(42 rows) -- -- additional checks for pg_index_column_has_property diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index a357e1d0c0e..c5595e788a4 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -2122,9 +2122,10 @@ FROM pg_opclass AS c1 WHERE NOT EXISTS(SELECT 1 FROM pg_amop AS a1 WHERE a1.amopfamily = c1.opcfamily AND binary_coercible(c1.opcintype, a1.amoplefttype)); - opcname | opcfamily ----------+----------- -(0 rows) + opcname | opcfamily +----------+----------- + stir_ops | 5558 +(1 row) -- Check that each operator listed in pg_amop has an associated opclass, -- that is one whose opcintype matches oprleft (possibly by coercion). diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out index c8f3932edf0..ecc2c2a6049 100644 --- a/src/test/regress/expected/psql.out +++ b/src/test/regress/expected/psql.out @@ -5171,7 +5171,8 @@ List of access methods heap | Table heap2 | Table spgist | Index -(8 rows) + stir | Index +(9 rows) \dA * List of access methods @@ -5185,7 +5186,8 @@ List of access methods heap | Table heap2 | Table spgist | Index -(8 rows) + stir | Index +(9 rows) \dA h* List of access methods @@ -5210,9 +5212,9 @@ List of access methods \dA: extra argument "bar" ignored \dA+ - List of access methods - Name | Type | Handler | Description ---------+-------+----------------------+---------------------------------------- + List of access methods + Name | Type | Handler | Description +--------+-------+----------------------+-------------------------------------------- brin | Index | brinhandler | block range index (BRIN) access method btree | Index | bthandler | b-tree index access method gin | Index | ginhandler | GIN index access method @@ -5221,12 +5223,13 @@ List of access methods heap | Table | heap_tableam_handler | heap table access method heap2 | Table | heap_tableam_handler | spgist | Index | spghandler | SP-GiST index access method -(8 rows) + stir | Index | stirhandler | short term index replacement access method +(9 rows) \dA+ * - List of access methods - Name | Type | Handler | Description ---------+-------+----------------------+---------------------------------------- + List of access methods + Name | Type | Handler | Description +--------+-------+----------------------+-------------------------------------------- brin | Index | brinhandler | block range index (BRIN) access method btree | Index | bthandler | b-tree index access method gin | Index | ginhandler | GIN index access method @@ -5235,7 +5238,8 @@ List of access methods heap | Table | heap_tableam_handler | heap table access method heap2 | Table | heap_tableam_handler | spgist | Index | spghandler | SP-GiST index access method -(8 rows) + stir | Index | stirhandler | short term index replacement access method +(9 rows) \dA+ h* List of access methods -- 2.43.0