From dec0eeb84b7780c4de74176ba25c305134e7b439 Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Fri, 25 Apr 2025 16:14:26 +0200 Subject: [PATCH v02 2/4] GIST: Fix visibility issues in IOS Previously, GIST IOS could buffer tuples from pages while VACUUM came along and cleaned up an ALL_DEAD tuple, marking the tuple's page ALL_VISIBLE again and making IOS mistakenly believe the tuple is indeed visible. With this commit, buffer pins now conflict with GIST vacuum, and we now do visibility checks on heap items returned from index pages while we hold the pin, so that the IOS infrastructure knows to recheck the heap page even if that heap page has become ALL_VISIBLE after we released the index page. Idea from Heikki Linnakangas Backpatch-through: 14 --- src/backend/access/gist/gistget.c | 36 ++++++++++++++++++++++++---- src/backend/access/gist/gistscan.c | 14 +++++++++++ src/backend/access/gist/gistvacuum.c | 6 ++--- src/include/access/gist_private.h | 5 ++++ 4 files changed, 53 insertions(+), 8 deletions(-) diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index 4d7c100d737..20a9cc22409 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -471,8 +471,9 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, so->pageData[so->nPageData].offnum = i; /* - * In an index-only scan, also fetch the data from the tuple. The - * reconstructed tuples are stored in pageDataCxt. + * In an index-only scan, also fetch the data from the tuple and + * its visibility state. The reconstructed tuples are stored in + * pageDataCxt. */ if (scan->xs_want_itup) { @@ -480,6 +481,10 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, so->pageData[so->nPageData].recontup = gistFetchTuple(giststate, r, it); MemoryContextSwitchTo(oldcxt); + + so->pageData[so->nPageData].visrecheck = + table_index_vischeck_tuple(scan->heapRelation, + &so->vmbuf, &it->t_tid); } so->nPageData++; } @@ -507,10 +512,16 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, item->data.heap.recheckDistances = recheck_distances; /* - * In an index-only scan, also fetch the data from the tuple. + * In an index-only scan, also fetch the data from the tuple + * and its visibility state. */ if (scan->xs_want_itup) + { item->data.heap.recontup = gistFetchTuple(giststate, r, it); + item->data.heap.visrecheck = + table_index_vischeck_tuple(scan->heapRelation, + &so->vmbuf, &it->t_tid); + } } else { @@ -595,9 +606,16 @@ getNextNearest(IndexScanDesc scan) item->distances, item->data.heap.recheckDistances); - /* in an index-only scan, also return the reconstructed tuple. */ + /* + * In an index-only scan, also return the reconstructed tuple and + * the visibility check we did when we still had a pin on the + * index page. + */ if (scan->xs_want_itup) + { scan->xs_hitup = item->data.heap.recontup; + scan->xs_visrecheck = item->data.heap.visrecheck; + } res = true; } else @@ -682,9 +700,17 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir) scan->xs_heaptid = so->pageData[so->curPageData].heapPtr; scan->xs_recheck = so->pageData[so->curPageData].recheck; - /* in an index-only scan, also return the reconstructed tuple */ + /* + * In an index-only scan, also return the reconstructed tuple + * and the visibility check we did when we still had a pin on + * the index page. + */ if (scan->xs_want_itup) + { scan->xs_hitup = so->pageData[so->curPageData].recontup; + scan->xs_visrecheck = + so->pageData[so->curPageData].visrecheck; + } so->curPageData++; diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c index c65f93abdae..70b593f2898 100644 --- a/src/backend/access/gist/gistscan.c +++ b/src/backend/access/gist/gistscan.c @@ -340,6 +340,13 @@ gistrescan(IndexScanDesc scan, ScanKey key, int nkeys, pfree(fn_extras); } + /* release resources used in index-only scans */ + if (BufferIsValid(so->vmbuf)) + { + ReleaseBuffer(so->vmbuf); + so->vmbuf = InvalidBuffer; + } + /* any previous xs_hitup will have been pfree'd in context resets above */ scan->xs_hitup = NULL; } @@ -349,6 +356,13 @@ gistendscan(IndexScanDesc scan) { GISTScanOpaque so = (GISTScanOpaque) scan->opaque; + /* release resources used in index-only scans */ + if (BufferIsValid(so->vmbuf)) + { + ReleaseBuffer(so->vmbuf); + so->vmbuf = InvalidBuffer; + } + /* * freeGISTstate is enough to clean up everything made by gistbeginscan, * as well as the queueCxt if there is a separate context for it. diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index 686a0418054..1b9acec5654 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -326,10 +326,10 @@ restart: recurse_to = InvalidBlockNumber; /* - * We are not going to stay here for a long time, aggressively grab an - * exclusive lock. + * We are not going to stay here for a long time, aggressively grab a + * cleanup lock. */ - LockBuffer(buffer, GIST_EXCLUSIVE); + LockBufferForCleanup(buffer); page = BufferGetPage(buffer); if (gistPageRecyclable(page)) diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index 44514f1cb8d..b9002f0f0bf 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -22,6 +22,7 @@ #include "storage/buffile.h" #include "utils/hsearch.h" #include "access/genam.h" +#include "tableam.h" /* * Maximum number of "halves" a page can be split into in one operation. @@ -124,6 +125,8 @@ typedef struct GISTSearchHeapItem * index-only scans */ OffsetNumber offnum; /* track offset in page to mark tuple as * LP_DEAD */ + uint8 visrecheck; /* Cached visibility check result for this + * heap pointer. */ } GISTSearchHeapItem; /* Unvisited item, either index page or heap tuple */ @@ -176,6 +179,8 @@ typedef struct GISTScanOpaqueData OffsetNumber curPageData; /* next item to return */ MemoryContext pageDataCxt; /* context holding the fetched tuples, for * index-only scans */ + /* info used by Index-Only Scans */ + Buffer vmbuf; /* reusable buffer for IOS' vm lookups */ } GISTScanOpaqueData; typedef GISTScanOpaqueData *GISTScanOpaque; -- 2.50.1 (Apple Git-155)