From a8ec9732ff892dff8146a1d0e637dd30de2dcf53 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Fri, 27 Feb 2026 16:33:40 -0500
Subject: [PATCH v43 09/10] Allow on-access pruning to set pages all-visible

Many queries do not modify the underlying relation. For such queries, if
on-access pruning occurs during the scan, we can check whether the page
has become all-visible and update the visibility map accordingly.
Previously, only vacuum and COPY FREEZE marked pages as all-visible or
all-frozen.

This commit implements on-access VM setting for sequential scans as well
as for the underlying heap relation in index scans and bitmap heap
scans.

Setting the visibility map on-access can avoid write amplification
caused by vacuum later needing to set the page all-visible, trigger a
write and potentially FPI. It also allows more frequent index-only
scans, since they require pages to be marked all-visible in the VM.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com
---
 src/backend/access/heap/heapam.c         |  3 +-
 src/backend/access/heap/heapam_handler.c |  6 ++--
 src/backend/access/heap/pruneheap.c      | 46 +++++++++++++++++-------
 src/backend/access/heap/vacuumlazy.c     |  2 +-
 src/include/access/heapam.h              | 16 +++++++--
 5 files changed, 54 insertions(+), 19 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 044f385e477..dbdf6521c42 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -633,7 +633,8 @@ heap_prepare_pagescan(TableScanDesc sscan)
 	/*
 	 * Prune and repair fragmentation for the whole page, if possible.
 	 */
-	heap_page_prune_opt(scan->rs_base.rs_rd, buffer, &scan->rs_vmbuffer);
+	heap_page_prune_opt(scan->rs_base.rs_rd, buffer, &scan->rs_vmbuffer,
+						(sscan->rs_flags & SO_HINT_REL_READ_ONLY));
 
 	/*
 	 * We must hold share lock on the buffer content while examining tuple
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 66726b22de6..651efa0127a 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -148,7 +148,8 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
 		 */
 		if (prev_buf != hscan->xs_cbuf)
 			heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf,
-								&hscan->xs_vmbuffer);
+								&hscan->xs_vmbuffer,
+								(hscan->xs_base.flags & SO_HINT_REL_READ_ONLY));
 	}
 
 	/* Obtain share-lock on the buffer so we can examine visibility */
@@ -2545,7 +2546,8 @@ BitmapHeapScanNextBlock(TableScanDesc scan,
 	/*
 	 * Prune and repair fragmentation for the whole page, if possible.
 	 */
-	heap_page_prune_opt(scan->rs_rd, buffer, &hscan->rs_vmbuffer);
+	heap_page_prune_opt(scan->rs_rd, buffer, &hscan->rs_vmbuffer,
+						scan->rs_flags & SO_HINT_REL_READ_ONLY);
 
 	/*
 	 * We must hold share lock on the buffer content while examining tuple
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 41bfb6711c1..235d21c1a41 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -44,6 +44,8 @@ typedef struct
 	bool		mark_unused_now;
 	/* whether to attempt freezing tuples */
 	bool		attempt_freeze;
+	/* whether to attempt setting the VM */
+	bool		attempt_set_vm;
 	struct VacuumCutoffs *cutoffs;
 	Relation	relation;
 
@@ -232,7 +234,8 @@ static void page_verify_redirects(Page page);
 
 static bool heap_page_will_freeze(bool did_tuple_hint_fpi, bool do_prune, bool do_hint_prune,
 								  PruneState *prstate);
-static bool heap_page_will_set_vm(PruneState *prstate, PruneReason reason);
+static bool heap_page_will_set_vm(PruneState *prstate, PruneReason reason,
+								  bool do_prune, bool do_freeze);
 
 
 /*
@@ -253,7 +256,8 @@ static bool heap_page_will_set_vm(PruneState *prstate, PruneReason reason);
  * unpinning *vmbuffer.
  */
 void
-heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer)
+heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer,
+					bool rel_read_only)
 {
 	Page		page = BufferGetPage(buffer);
 	TransactionId prune_xid;
@@ -336,6 +340,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer)
 			 * current implementation.
 			 */
 			params.options = HEAP_PAGE_PRUNE_ALLOW_FAST_PATH;
+			if (rel_read_only)
+				params.options |= HEAP_PAGE_PRUNE_SET_VM;
 
 			heap_page_prune_and_freeze(&params, &presult, &dummy_off_loc,
 									   NULL, NULL);
@@ -392,6 +398,7 @@ prune_freeze_setup(PruneFreezeParams *params,
 	/* cutoffs must be provided if we will attempt freezing */
 	Assert(!(params->options & HEAP_PAGE_PRUNE_FREEZE) || params->cutoffs);
 	prstate->attempt_freeze = (params->options & HEAP_PAGE_PRUNE_FREEZE) != 0;
+	prstate->attempt_set_vm = (params->options & HEAP_PAGE_PRUNE_SET_VM) != 0;
 	prstate->cutoffs = params->cutoffs;
 	prstate->relation = params->relation;
 	prstate->block = BufferGetBlockNumber(params->buffer);
@@ -461,9 +468,8 @@ prune_freeze_setup(PruneFreezeParams *params,
 	 * We track whether the page will be all-visible/all-frozen at the end of
 	 * pruning and freezing. While examining tuple visibility, we'll set
 	 * set_all_visible to false if there are tuples on the page not visible to
-	 * all running and future transactions. set_all_visible is always
-	 * maintained but only VACUUM will set the VM if the page ends up being
-	 * all-visible.
+	 * all running and future transactions. If enabled for this scan, we will
+	 * set the VM if the page ends up being all-visible.
 	 *
 	 * We also keep track of the newest live XID, which is used to calculate
 	 * the snapshot conflict horizon for a WAL record setting the VM.
@@ -919,21 +925,37 @@ heap_page_fix_vm_corruption(PruneState *prstate, OffsetNumber offnum,
  * This function does not actually set the VM bits or page-level visibility
  * hint, PD_ALL_VISIBLE.
  *
+ * This should be called only after do_freeze has been decided (and do_prune
+ * has been set), as these factor into our heuristic-based decision.
+ *
  * Returns true if one or both VM bits should be set and false otherwise.
  */
 static bool
-heap_page_will_set_vm(PruneState *prstate, PruneReason reason)
+heap_page_will_set_vm(PruneState *prstate, PruneReason reason,
+					  bool do_prune, bool do_freeze)
 {
-	/*
-	 * Though on-access pruning maintains prstate->set_all_visible, we don't
-	 * set the VM for now.
-	 */
-	if (reason == PRUNE_ON_ACCESS)
+	if (!prstate->attempt_set_vm)
 		return false;
 
 	if (!prstate->set_all_visible)
 		return false;
 
+	/*
+	 * If this is an on-access call and we're not actually pruning, avoid
+	 * setting the visibility map if it would newly dirty the heap page or, if
+	 * the page is already dirty, if doing so would require including a
+	 * full-page image (FPI) of the heap page in the WAL. This situation
+	 * should be rare, as on-access pruning is only attempted when
+	 * pd_prune_xid is valid.
+	 */
+	if (reason == PRUNE_ON_ACCESS && !do_prune && !do_freeze &&
+		(!BufferIsDirty(prstate->buffer) || XLogCheckBufferNeedsBackup(prstate->buffer)))
+	{
+		prstate->set_all_visible = false;
+		prstate->set_all_frozen = false;
+		return false;
+	}
+
 	prstate->new_vmbits = VISIBILITYMAP_ALL_VISIBLE;
 
 	if (prstate->set_all_frozen)
@@ -1165,7 +1187,7 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
 	Assert(!prstate.set_all_frozen || prstate.set_all_visible);
 	Assert(!prstate.set_all_visible || (prstate.lpdead_items == 0));
 
-	do_set_vm = heap_page_will_set_vm(&prstate, params->reason);
+	do_set_vm = heap_page_will_set_vm(&prstate, params->reason, do_prune, do_freeze);
 
 	/*
 	 * new_vmbits should be 0 regardless of whether or not the page is
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index f698c2d899b..24001b27387 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -2021,7 +2021,7 @@ lazy_scan_prune(LVRelState *vacrel,
 		.buffer = buf,
 		.vmbuffer = vmbuffer,
 		.reason = PRUNE_VACUUM_SCAN,
-		.options = HEAP_PAGE_PRUNE_FREEZE,
+		.options = HEAP_PAGE_PRUNE_FREEZE | HEAP_PAGE_PRUNE_SET_VM,
 		.vistest = vacrel->vistest,
 		.cutoffs = &vacrel->cutoffs,
 	};
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index c6aec63a505..90ca5a2cfa8 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -43,6 +43,7 @@
 #define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW		(1 << 0)
 #define HEAP_PAGE_PRUNE_FREEZE				(1 << 1)
 #define HEAP_PAGE_PRUNE_ALLOW_FAST_PATH		(1 << 2)
+#define HEAP_PAGE_PRUNE_SET_VM				(1 << 3)
 
 typedef struct BulkInsertStateData *BulkInsertState;
 typedef struct GlobalVisState GlobalVisState;
@@ -95,7 +96,12 @@ typedef struct HeapScanDescData
 	 */
 	ParallelBlockTableScanWorkerData *rs_parallelworkerdata;
 
-	/* Current heap block's corresponding page in the visibility map */
+	/*
+	 * For sequential scans, bitmap heap scans, TID range scans, and sample
+	 * scans. The current heap block's corresponding page in the visibility
+	 * map. If the relation is not modified by the query, on-access pruning
+	 * may set the VM.
+	 */
 	Buffer		rs_vmbuffer;
 
 	/* these fields only used in page-at-a-time mode and for bitmap scans */
@@ -126,7 +132,11 @@ typedef struct IndexFetchHeapData
 	 */
 	Buffer		xs_cbuf;
 
-	/* Current heap block's corresponding page in the visibility map */
+	/*
+	 * Current heap block's corresponding page in the visibility map. For
+	 * index scans that do not modify the underlying heap table, on-access
+	 * pruning may set the VM on-access.
+	 */
 	Buffer		xs_vmbuffer;
 } IndexFetchHeapData;
 
@@ -431,7 +441,7 @@ extern TransactionId heap_index_delete_tuples(Relation rel,
 
 /* in heap/pruneheap.c */
 extern void heap_page_prune_opt(Relation relation, Buffer buffer,
-								Buffer *vmbuffer);
+								Buffer *vmbuffer, bool rel_read_only);
 extern void heap_page_prune_and_freeze(PruneFreezeParams *params,
 									   PruneFreezeResult *presult,
 									   OffsetNumber *off_loc,
-- 
2.43.0

