From 5c94a9cea77820235f62719b9e760adb6fbbc615 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Tue, 17 Jun 2025 17:22:10 -0400
Subject: [PATCH v17 01/15] Eliminate COPY FREEZE use of XLOG_HEAP2_VISIBLE

Instead of emitting a separate WAL XLOG_HEAP2_VISIBLE record for setting
bits in the VM, specify the VM block changes in the
XLOG_HEAP2_MULTI_INSERT record.

This halves the number of WAL records emitted by COPY FREEZE.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Discussion: https://postgr.es/m/flat/CAAKRu_ZMw6Npd_qm2KM%2BFwQ3cMOMx1Dh3VMhp8-V7SOLxdK9-g%40mail.gmail.com
---
 src/backend/access/heap/heapam.c        | 44 ++++++++++------
 src/backend/access/heap/heapam_xlog.c   | 52 ++++++++++++++++++-
 src/backend/access/heap/visibilitymap.c | 68 ++++++++++++++++++++++++-
 src/backend/access/rmgrdesc/heapdesc.c  |  5 ++
 src/include/access/visibilitymap.h      |  3 ++
 5 files changed, 154 insertions(+), 18 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index ed0c0c2dc9f..7f354caec31 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2466,7 +2466,11 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
 		starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
 
 		if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
+		{
 			all_frozen_set = true;
+			/* Lock the vmbuffer before entering the critical section */
+			LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
+		}
 
 		/* NO EREPORT(ERROR) from here till changes are logged */
 		START_CRIT_SECTION();
@@ -2506,7 +2510,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
 		 * going to add further frozen rows to it.
 		 *
 		 * If we're only adding already frozen rows to a previously empty
-		 * page, mark it as all-visible.
+		 * page, mark it as all-frozen and update the visibility map. We're
+		 * already holding a pin on the vmbuffer.
 		 */
 		if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
 		{
@@ -2517,7 +2522,14 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
 								vmbuffer, VISIBILITYMAP_VALID_BITS);
 		}
 		else if (all_frozen_set)
+		{
 			PageSetAllVisible(page);
+			visibilitymap_set_vmbits(BufferGetBlockNumber(buffer),
+									 vmbuffer,
+									 VISIBILITYMAP_ALL_VISIBLE |
+									 VISIBILITYMAP_ALL_FROZEN,
+									 RelationGetRelationName(relation));
+		}
 
 		/*
 		 * XXX Should we set PageSetPrunable on this page ? See heap_insert()
@@ -2565,6 +2577,12 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
 			xlrec->flags = 0;
 			if (all_visible_cleared)
 				xlrec->flags = XLH_INSERT_ALL_VISIBLE_CLEARED;
+
+			/*
+			 * We don't have to worry about including a conflict xid in the
+			 * WAL record as HEAP_INSERT_FROZEN intentionally violates
+			 * visibility rules.
+			 */
 			if (all_frozen_set)
 				xlrec->flags = XLH_INSERT_ALL_FROZEN_SET;
 
@@ -2627,7 +2645,10 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
 
 			XLogBeginInsert();
 			XLogRegisterData(xlrec, tupledata - scratch.data);
+
 			XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
+			if (all_frozen_set)
+				XLogRegisterBuffer(1, vmbuffer, 0);
 
 			XLogRegisterBufData(0, tupledata, totaldatalen);
 
@@ -2637,26 +2658,17 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
 			recptr = XLogInsert(RM_HEAP2_ID, info);
 
 			PageSetLSN(page, recptr);
+			if (all_frozen_set)
+			{
+				Assert(BufferIsDirty(vmbuffer));
+				PageSetLSN(BufferGetPage(vmbuffer), recptr);
+			}
 		}
 
 		END_CRIT_SECTION();
 
-		/*
-		 * If we've frozen everything on the page, update the visibilitymap.
-		 * We're already holding pin on the vmbuffer.
-		 */
 		if (all_frozen_set)
-		{
-			/*
-			 * It's fine to use InvalidTransactionId here - this is only used
-			 * when HEAP_INSERT_FROZEN is specified, which intentionally
-			 * violates visibility rules.
-			 */
-			visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
-							  InvalidXLogRecPtr, vmbuffer,
-							  InvalidTransactionId,
-							  VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
-		}
+			LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
 
 		UnlockReleaseBuffer(buffer);
 		ndone += nthispage;
diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c
index cf843277938..c2c7e6ab086 100644
--- a/src/backend/access/heap/heapam_xlog.c
+++ b/src/backend/access/heap/heapam_xlog.c
@@ -551,6 +551,7 @@ heap_xlog_multi_insert(XLogReaderState *record)
 	int			i;
 	bool		isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
 	XLogRedoAction action;
+	Buffer		vmbuffer = InvalidBuffer;
 
 	/*
 	 * Insertion doesn't overwrite MVCC data, so no conflict processing is
@@ -571,11 +572,11 @@ heap_xlog_multi_insert(XLogReaderState *record)
 	if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
 	{
 		Relation	reln = CreateFakeRelcacheEntry(rlocator);
-		Buffer		vmbuffer = InvalidBuffer;
 
 		visibilitymap_pin(reln, blkno, &vmbuffer);
 		visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
 		ReleaseBuffer(vmbuffer);
+		vmbuffer = InvalidBuffer;
 		FreeFakeRelcacheEntry(reln);
 	}
 
@@ -662,6 +663,55 @@ heap_xlog_multi_insert(XLogReaderState *record)
 	if (BufferIsValid(buffer))
 		UnlockReleaseBuffer(buffer);
 
+	buffer = InvalidBuffer;
+
+	/*
+	 * Read and update the visibility map (VM) block.
+	 *
+	 * We must always redo VM changes, even if the corresponding heap page
+	 * update was skipped due to the LSN interlock. Each VM block covers
+	 * multiple heap pages, so later WAL records may update other bits in the
+	 * same block. If this record includes a full-page image (FPI), subsequent
+	 * WAL records may depend on it to guard against torn pages.
+	 *
+	 * Heap page changes are replayed first to preserve the invariant:
+	 * PD_ALL_VISIBLE must be set on the heap page if the VM bit is set.
+	 *
+	 * Note that we released the heap page lock above. Under normal operation,
+	 * this would be unsafe — a concurrent modification could clear
+	 * PD_ALL_VISIBLE while the VM bit remained set, violating the invariant.
+	 *
+	 * During recovery, however, no concurrent writers exist. Therefore,
+	 * updating the VM without holding the heap page lock is safe enough. This
+	 * same approach is taken when replaying xl_heap_visible records (see
+	 * heap_xlog_visible()).
+	 */
+	if ((xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) &&
+		XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_ON_ERROR, false,
+									  &vmbuffer) == BLK_NEEDS_REDO)
+	{
+		Page		vmpage = BufferGetPage(vmbuffer);
+		char	   *relname;
+
+		/* initialize the page if it was read as zeros */
+		if (PageIsNew(vmpage))
+			PageInit(vmpage, BLCKSZ, 0);
+
+		/* We don't have relation name during recovery, so use relfilenode */
+		relname = psprintf("%u", rlocator.relNumber);
+		visibilitymap_set_vmbits(blkno,
+								 vmbuffer,
+								 VISIBILITYMAP_ALL_VISIBLE |
+								 VISIBILITYMAP_ALL_FROZEN,
+								 relname);
+
+		PageSetLSN(BufferGetPage(vmbuffer), lsn);
+		pfree(relname);
+	}
+
+	if (BufferIsValid(vmbuffer))
+		UnlockReleaseBuffer(vmbuffer);
+
 	/*
 	 * If the page is running low on free space, update the FSM as well.
 	 * Arbitrarily, our definition of "low" is less than 20%. We can't do much
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index 7306c16f05c..2d43147ffb7 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -14,7 +14,8 @@
  *		visibilitymap_clear  - clear bits for one page in the visibility map
  *		visibilitymap_pin	 - pin a map page for setting a bit
  *		visibilitymap_pin_ok - check whether correct map page is already pinned
- *		visibilitymap_set	 - set a bit in a previously pinned page
+ *		visibilitymap_set	 - set bit(s) in a previously pinned page and log
+ *		visibilitymap_set_vmbits - set bit(s) in a pinned page
  *		visibilitymap_get_status - get status of bits
  *		visibilitymap_count  - count number of bits set in visibility map
  *		visibilitymap_prepare_truncate -
@@ -321,6 +322,71 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
 	return status;
 }
 
+/*
+ * Set visibility map (VM) flags in the block referenced by vmBuf.
+ *
+ * This function is intended for callers that log VM changes together
+ * with the heap page modifications that rendered the page all-visible.
+ * Callers that log VM changes separately should use visibilitymap_set().
+ *
+ * vmBuf must be pinned and exclusively locked, and it must cover the VM bits
+ * corresponding to heapBlk.
+ *
+ * In normal operation (not recovery), this must be called inside a critical
+ * section that also applies the necessary heap page changes and, if
+ * applicable, emits WAL.
+ *
+ * The caller is responsible for ensuring consistency between the heap page
+ * and the VM page by holding a pin and exclusive lock on the buffer
+ * containing heapBlk.
+ *
+ * heapRelname is used only for debugging.
+ */
+uint8
+visibilitymap_set_vmbits(BlockNumber heapBlk,
+						 Buffer vmBuf, uint8 flags,
+						 const char *heapRelname)
+{
+	BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
+	uint32		mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
+	uint8		mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
+	Page		page;
+	uint8	   *map;
+	uint8		status;
+
+#ifdef TRACE_VISIBILITYMAP
+	elog(DEBUG1, "vm_set flags 0x%02X for %s %d",
+		 flags, heapRelname, heapBlk);
+#endif
+
+	/* Call in same critical section where WAL is emitted. */
+	Assert(InRecovery || CritSectionCount > 0);
+
+	/* Flags should be valid. Also never clear bits with this function */
+	Assert((flags & VISIBILITYMAP_VALID_BITS) == flags);
+
+	/* Must never set all_frozen bit without also setting all_visible bit */
+	Assert(flags != VISIBILITYMAP_ALL_FROZEN);
+
+	/* Check that we have the right VM page pinned */
+	if (!BufferIsValid(vmBuf) || BufferGetBlockNumber(vmBuf) != mapBlock)
+		elog(ERROR, "wrong VM buffer passed to visibilitymap_set");
+
+	Assert(BufferIsExclusiveLocked(vmBuf));
+
+	page = BufferGetPage(vmBuf);
+	map = (uint8 *) PageGetContents(page);
+
+	status = (map[mapByte] >> mapOffset) & VISIBILITYMAP_VALID_BITS;
+	if (flags != status)
+	{
+		map[mapByte] |= (flags << mapOffset);
+		MarkBufferDirty(vmBuf);
+	}
+
+	return status;
+}
+
 /*
  *	visibilitymap_get_status - get status of bits
  *
diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c
index 82b62c95de5..b48d7dc1d24 100644
--- a/src/backend/access/rmgrdesc/heapdesc.c
+++ b/src/backend/access/rmgrdesc/heapdesc.c
@@ -16,6 +16,7 @@
 
 #include "access/heapam_xlog.h"
 #include "access/rmgrdesc_utils.h"
+#include "access/visibilitymapdefs.h"
 #include "storage/standbydefs.h"
 
 /*
@@ -354,6 +355,10 @@ heap2_desc(StringInfo buf, XLogReaderState *record)
 		appendStringInfo(buf, "ntuples: %d, flags: 0x%02X", xlrec->ntuples,
 						 xlrec->flags);
 
+		if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
+			appendStringInfo(buf, ", vm_flags: 0x%02X",
+							 VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
+
 		if (XLogRecHasBlockData(record, 0) && !isinit)
 		{
 			appendStringInfoString(buf, ", offsets:");
diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h
index be21c6dd1a3..3dcf37ba03f 100644
--- a/src/include/access/visibilitymap.h
+++ b/src/include/access/visibilitymap.h
@@ -37,6 +37,9 @@ extern uint8 visibilitymap_set(Relation rel,
 							   Buffer vmBuf,
 							   TransactionId cutoff_xid,
 							   uint8 flags);
+extern uint8 visibilitymap_set_vmbits(BlockNumber heapBlk,
+									  Buffer vmBuf, uint8 flags,
+									  const char *heapRelname);
 extern uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf);
 extern void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen);
 extern BlockNumber visibilitymap_prepare_truncate(Relation rel,
-- 
2.43.0

