From 5d16ef487aac7ab9e35d14950d8de7ba6e73351b Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Wed, 29 Apr 2026 12:53:49 -0400
Subject: [PATCH v2_PG18 2/4] Fix WAL logging of VM clears

Previously, we failed to register the visibility map buffer when
emitting WAL after clearing the VM bits for heap pages (recovery code
read the VM page normally). This meant that we couldn't emit FPIs of VM
pages when clearing VM bits. While this would not result in a checksum
error because the visibility map is read with ZERO_ON_ERROR, the WAL
summarizer only includes pages which were registered, so a restore from
an incremental backup would not include the modified VM pages -- leading
to data corruption.

Fix this by registering the VM buffer in the WAL record when clearing VM
bits. This also means the VM buffer must be locked throughout the
duration of the critical section where we make changes to the VM and
heap page and emit WAL.

Author: Melanie Plageman <melanieplageman@gmail.com>
Author: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/flat/CAAKRu_bn%2Be7F4yPFBgFbnP%2BsyJRKyNK092bjD2LKvZW7O4Svag>
Backpatch-through: 17
---
 contrib/pg_surgery/heap_surgery.c       |  35 ++--
 src/backend/access/heap/heapam.c        | 259 ++++++++++++++++++++----
 src/backend/access/heap/heapam_xlog.c   | 237 ++++++++++++++++------
 src/backend/access/heap/visibilitymap.c |  22 +-
 src/bin/pg_walsummary/t/002_blocks.pl   |   7 +-
 src/include/access/heapam_xlog.h        |  15 +-
 src/include/access/visibilitymap.h      |   2 +
 7 files changed, 450 insertions(+), 127 deletions(-)

diff --git a/contrib/pg_surgery/heap_surgery.c b/contrib/pg_surgery/heap_surgery.c
index 3e86283beb7..81a96023eef 100644
--- a/contrib/pg_surgery/heap_surgery.c
+++ b/contrib/pg_surgery/heap_surgery.c
@@ -17,6 +17,7 @@
 #include "access/visibilitymap.h"
 #include "access/xloginsert.h"
 #include "catalog/pg_am_d.h"
+#include "catalog/pg_control.h"
 #include "miscadmin.h"
 #include "storage/bufmgr.h"
 #include "utils/acl.h"
@@ -233,11 +234,14 @@ heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt)
 		}
 
 		/*
-		 * Before entering the critical section, pin the visibility map page
-		 * if it appears to be necessary.
+		 * Before entering the critical section, pin and lock the visibility
+		 * map page if it appears to be necessary.
 		 */
 		if (heap_force_opt == HEAP_FORCE_KILL && PageIsAllVisible(page))
+		{
 			visibilitymap_pin(rel, blkno, &vmbuf);
+			LockBuffer(vmbuf, BUFFER_LOCK_EXCLUSIVE);
+		}
 
 		/* No ereport(ERROR) from here until all the changes are logged. */
 		START_CRIT_SECTION();
@@ -266,10 +270,10 @@ heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt)
 				 */
 				if (PageIsAllVisible(page))
 				{
-					PageClearAllVisible(page);
-					visibilitymap_clear(rel, blkno, vmbuf,
-										VISIBILITYMAP_VALID_BITS);
+					visibilitymap_clear_locked(rel, blkno, vmbuf,
+											   VISIBILITYMAP_VALID_BITS);
 					did_modify_vm = true;
+					PageClearAllVisible(page);
 				}
 			}
 			else
@@ -320,19 +324,26 @@ heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt)
 
 			/* XLOG stuff */
 			if (RelationNeedsWAL(rel))
-				log_newpage_buffer(buf, true);
+			{
+				XLogRecPtr	recptr;
+
+				XLogBeginInsert();
+				XLogRegisterBuffer(0, buf, REGBUF_STANDARD | REGBUF_FORCE_IMAGE);
+				if (did_modify_vm)
+					XLogRegisterBuffer(1, vmbuf, REGBUF_FORCE_IMAGE);
+				recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI);
+				if (did_modify_vm)
+					PageSetLSN(BufferGetPage(vmbuf), recptr);
+				PageSetLSN(BufferGetPage(buf), recptr);
+			}
 		}
 
-		/* WAL log the VM page if it was modified. */
-		if (did_modify_vm && RelationNeedsWAL(rel))
-			log_newpage_buffer(vmbuf, false);
-
 		END_CRIT_SECTION();
 
 		UnlockReleaseBuffer(buf);
 
-		if (vmbuf != InvalidBuffer)
-			ReleaseBuffer(vmbuf);
+		if (BufferIsValid(vmbuf))
+			UnlockReleaseBuffer(vmbuf);
 
 		/* Update the current_start_ptr before moving to the next page. */
 		curr_start_ptr = next_start_ptr;
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 3c3b65b3cbf..344e1f82bd7 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -58,7 +58,8 @@
 static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
 									 TransactionId xid, CommandId cid, int options);
 static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
-								  Buffer newbuf, HeapTuple oldtup,
+								  Buffer vmbuffer_old, Buffer newbuf,
+								  Buffer vmbuffer_new, HeapTuple oldtup,
 								  HeapTuple newtup, HeapTuple old_key_tuple,
 								  bool all_visible_cleared, bool new_all_visible_cleared);
 #ifdef USE_ASSERT_CHECKING
@@ -2135,10 +2136,12 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 	if (PageIsAllVisible(BufferGetPage(buffer)))
 	{
 		all_visible_cleared = true;
+
+		LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
+		visibilitymap_clear_locked(relation,
+								   ItemPointerGetBlockNumber(&(heaptup->t_self)),
+								   vmbuffer, VISIBILITYMAP_VALID_BITS);
 		PageClearAllVisible(BufferGetPage(buffer));
-		visibilitymap_clear(relation,
-							ItemPointerGetBlockNumber(&(heaptup->t_self)),
-							vmbuffer, VISIBILITYMAP_VALID_BITS);
 	}
 
 	/*
@@ -2230,13 +2233,22 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 		/* filtering by origin on a row level is much more efficient */
 		XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
 
+		if (all_visible_cleared)
+			XLogRegisterBuffer(HEAP_INSERT_BLKREF_VM, vmbuffer, 0);
+
 		recptr = XLogInsert(RM_HEAP_ID, info);
 
 		PageSetLSN(page, recptr);
+
+		if (all_visible_cleared)
+			PageSetLSN(BufferGetPage(vmbuffer), recptr);
 	}
 
 	END_CRIT_SECTION();
 
+	if (all_visible_cleared)
+		LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
+
 	UnlockReleaseBuffer(buffer);
 	if (vmbuffer != InvalidBuffer)
 		ReleaseBuffer(vmbuffer);
@@ -2505,10 +2517,11 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
 		if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
 		{
 			all_visible_cleared = true;
+			LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
+			visibilitymap_clear_locked(relation,
+									   BufferGetBlockNumber(buffer),
+									   vmbuffer, VISIBILITYMAP_VALID_BITS);
 			PageClearAllVisible(page);
-			visibilitymap_clear(relation,
-								BufferGetBlockNumber(buffer),
-								vmbuffer, VISIBILITYMAP_VALID_BITS);
 		}
 		else if (all_frozen_set)
 			PageSetAllVisible(page);
@@ -2623,6 +2636,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
 			XLogRegisterData(xlrec, tupledata - scratch.data);
 			XLogRegisterBuffer(HEAP_MULTI_INSERT_BLKREF_HEAP, buffer,
 							   REGBUF_STANDARD | bufflags);
+			if (all_visible_cleared)
+				XLogRegisterBuffer(HEAP_MULTI_INSERT_BLKREF_VM, vmbuffer, 0);
 
 			XLogRegisterBufData(HEAP_MULTI_INSERT_BLKREF_HEAP, tupledata,
 								totaldatalen);
@@ -2633,10 +2648,15 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
 			recptr = XLogInsert(RM_HEAP2_ID, info);
 
 			PageSetLSN(page, recptr);
+			if (all_visible_cleared)
+				PageSetLSN(BufferGetPage(vmbuffer), recptr);
 		}
 
 		END_CRIT_SECTION();
 
+		if (all_visible_cleared)
+			LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
+
 		/*
 		 * If we've frozen everything on the page, update the visibilitymap.
 		 * We're already holding pin on the vmbuffer.
@@ -3054,9 +3074,10 @@ l1:
 	if (PageIsAllVisible(page))
 	{
 		all_visible_cleared = true;
+		LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
+		visibilitymap_clear_locked(relation, BufferGetBlockNumber(buffer),
+								   vmbuffer, VISIBILITYMAP_VALID_BITS);
 		PageClearAllVisible(page);
-		visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
-							vmbuffer, VISIBILITYMAP_VALID_BITS);
 	}
 
 	/* store transaction information of xact deleting the tuple */
@@ -3137,13 +3158,23 @@ l1:
 		/* filtering by origin on a row level is much more efficient */
 		XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
 
+		if (all_visible_cleared)
+			XLogRegisterBuffer(HEAP_DELETE_BLKREF_VM, vmbuffer, 0);
+
 		recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
 
 		PageSetLSN(page, recptr);
+
+		if (all_visible_cleared)
+			PageSetLSN(BufferGetPage(vmbuffer), recptr);
 	}
 
 	END_CRIT_SECTION();
 
+	/* release VM lock first, since it covers many heap blocks */
+	if (all_visible_cleared)
+		LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
+
 	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 
 	if (vmbuffer != InvalidBuffer)
@@ -3261,7 +3292,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 	HeapTuple	heaptup;
 	HeapTuple	old_key_tuple = NULL;
 	bool		old_key_copied = false;
-	Page		page;
+	Page		page,
+				newpage;
 	BlockNumber block;
 	MultiXactStatus mxact_status;
 	Buffer		buffer,
@@ -3278,6 +3310,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 	bool		key_intact;
 	bool		all_visible_cleared = false;
 	bool		all_visible_cleared_new = false;
+	bool		modified_vmbuffer = false;
+	bool		modified_vmbuffer_new = false;
 	bool		checked_lockers;
 	bool		locker_remains;
 	bool		id_has_external = false;
@@ -3386,8 +3420,11 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 
 		UnlockReleaseBuffer(buffer);
 		Assert(!have_tuple_lock);
-		if (vmbuffer != InvalidBuffer)
+		if (BufferIsValid(vmbuffer))
+		{
 			ReleaseBuffer(vmbuffer);
+			vmbuffer = InvalidBuffer;
+		}
 		tmfd->ctid = *otid;
 		tmfd->xmax = InvalidTransactionId;
 		tmfd->cmax = InvalidCommandId;
@@ -3690,8 +3727,11 @@ l2:
 		UnlockReleaseBuffer(buffer);
 		if (have_tuple_lock)
 			UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
-		if (vmbuffer != InvalidBuffer)
+		if (BufferIsValid(vmbuffer))
+		{
 			ReleaseBuffer(vmbuffer);
+			vmbuffer = InvalidBuffer;
+		}
 		*update_indexes = TU_None;
 
 		bms_free(hot_attrs);
@@ -3874,10 +3914,15 @@ l2:
 		 * overhead would be unchanged, that doesn't seem necessarily
 		 * worthwhile.
 		 */
-		if (PageIsAllVisible(page) &&
-			visibilitymap_clear(relation, block, vmbuffer,
-								VISIBILITYMAP_ALL_FROZEN))
-			cleared_all_frozen = true;
+		if (PageIsAllVisible(page))
+		{
+			LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
+			if (visibilitymap_clear_locked(relation, block, vmbuffer,
+										   VISIBILITYMAP_ALL_FROZEN))
+				cleared_all_frozen = true;
+			else
+				LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
+		}
 
 		MarkBufferDirty(buffer);
 
@@ -3896,12 +3941,23 @@ l2:
 			xlrec.flags =
 				cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
 			XLogRegisterData(&xlrec, SizeOfHeapLock);
+
+			if (cleared_all_frozen)
+				XLogRegisterBuffer(HEAP_LOCK_BLKREF_VM, vmbuffer, 0);
+
 			recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
 			PageSetLSN(page, recptr);
+
+			if (cleared_all_frozen)
+				PageSetLSN(BufferGetPage(vmbuffer), recptr);
 		}
 
 		END_CRIT_SECTION();
 
+		/* release VM lock first, since it covers many heap blocks */
+		if (cleared_all_frozen)
+			LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
+
 		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 
 		/*
@@ -3987,6 +4043,8 @@ l2:
 		heaptup = newtup;
 	}
 
+	newpage = BufferGetPage(newbuf);
+
 	/*
 	 * We're about to do the actual update -- check for conflict first, to
 	 * avoid possibly having to roll back work we've just done.
@@ -4100,20 +4158,85 @@ l2:
 	/* record address of new tuple in t_ctid of old one */
 	oldtup.t_data->t_ctid = heaptup->t_self;
 
-	/* clear PD_ALL_VISIBLE flags, reset all visibilitymap bits */
-	if (PageIsAllVisible(BufferGetPage(buffer)))
+	/*
+	 * Clear PD_ALL_VISIBLE flags and reset visibility map bits for any heap
+	 * pages that were all-visible. If there are two heap pages, we may need
+	 * to clear VM bits for both.
+	 */
+	if (PageIsAllVisible(page) &&
+		newbuf != buffer && PageIsAllVisible(newpage) &&
+		vmbuffer_new == vmbuffer)
 	{
+		/*
+		 * This is the more complicated case: both the new and old heap pages
+		 * are all-visible and both their VM bits are on the same page of the
+		 * VM, so we register a single VM buffer as HEAP_UPDATE_BLKREF_VM_NEW
+		 * in the WAL record. We must be careful to only lock and register one
+		 * buffer, even though we modify it twice -- once for each heap
+		 * block's VM bits.
+		 */
+		LockBuffer(vmbuffer_new, BUFFER_LOCK_EXCLUSIVE);
+		visibilitymap_clear_locked(relation, block,
+								   vmbuffer_new, VISIBILITYMAP_VALID_BITS);
+		visibilitymap_clear_locked(relation, BufferGetBlockNumber(newbuf),
+								   vmbuffer_new, VISIBILITYMAP_VALID_BITS);
+		PageClearAllVisible(page);
+		PageClearAllVisible(newpage);
+		modified_vmbuffer_new = true;
 		all_visible_cleared = true;
-		PageClearAllVisible(BufferGetPage(buffer));
-		visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
-							vmbuffer, VISIBILITYMAP_VALID_BITS);
+		all_visible_cleared_new = true;
 	}
-	if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf)))
+	else
 	{
-		all_visible_cleared_new = true;
-		PageClearAllVisible(BufferGetPage(newbuf));
-		visibilitymap_clear(relation, BufferGetBlockNumber(newbuf),
-							vmbuffer_new, VISIBILITYMAP_VALID_BITS);
+		/*
+		 * In all the remaining cases, we will clear at most one heap block's
+		 * VM bits per VM page.
+		 */
+		bool		old_all_visible = PageIsAllVisible(page);
+		bool		new_all_visible = newbuf != buffer && PageIsAllVisible(newpage);
+		Buffer		vmbuffers[2] = {
+			old_all_visible ? vmbuffer : InvalidBuffer,
+			new_all_visible ? vmbuffer_new : InvalidBuffer
+		};
+
+		/*
+		 * When both pages need different VM pages cleared, acquire the VM
+		 * buffer locks in VM block order to avoid deadlocks between backends
+		 * updating tuples in opposite directions across VM pages.
+		 */
+		if (old_all_visible && new_all_visible &&
+			BufferGetBlockNumber(vmbuffers[0]) > BufferGetBlockNumber(vmbuffers[1]))
+		{
+			Buffer		t = vmbuffers[0];
+
+			vmbuffers[0] = vmbuffers[1];
+			vmbuffers[1] = t;
+		}
+
+		Assert((!BufferIsValid(vmbuffers[0]) && !BufferIsValid(vmbuffers[1])) ||
+			   vmbuffers[0] != vmbuffers[1]);
+
+		if (BufferIsValid(vmbuffers[0]))
+			LockBuffer(vmbuffers[0], BUFFER_LOCK_EXCLUSIVE);
+		if (BufferIsValid(vmbuffers[1]))
+			LockBuffer(vmbuffers[1], BUFFER_LOCK_EXCLUSIVE);
+
+		if (old_all_visible)
+		{
+			visibilitymap_clear_locked(relation, block,
+									   vmbuffer, VISIBILITYMAP_VALID_BITS);
+			modified_vmbuffer = true;
+			PageClearAllVisible(page);
+			all_visible_cleared = true;
+		}
+		if (new_all_visible)
+		{
+			visibilitymap_clear_locked(relation, BufferGetBlockNumber(newbuf),
+									   vmbuffer_new, VISIBILITYMAP_VALID_BITS);
+			modified_vmbuffer_new = true;
+			PageClearAllVisible(newpage);
+			all_visible_cleared_new = true;
+		}
 	}
 
 	if (newbuf != buffer)
@@ -4136,19 +4259,30 @@ l2:
 		}
 
 		recptr = log_heap_update(relation, buffer,
-								 newbuf, &oldtup, heaptup,
+								 modified_vmbuffer ? vmbuffer : InvalidBuffer,
+								 newbuf,
+								 modified_vmbuffer_new ? vmbuffer_new : InvalidBuffer,
+								 &oldtup, heaptup,
 								 old_key_tuple,
 								 all_visible_cleared,
 								 all_visible_cleared_new);
 		if (newbuf != buffer)
-		{
-			PageSetLSN(BufferGetPage(newbuf), recptr);
-		}
+			PageSetLSN(newpage, recptr);
 		PageSetLSN(BufferGetPage(buffer), recptr);
+
+		if (modified_vmbuffer)
+			PageSetLSN(BufferGetPage(vmbuffer), recptr);
+		if (modified_vmbuffer_new)
+			PageSetLSN(BufferGetPage(vmbuffer_new), recptr);
 	}
 
 	END_CRIT_SECTION();
 
+	if (modified_vmbuffer)
+		LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
+	if (modified_vmbuffer_new)
+		LockBuffer(vmbuffer_new, BUFFER_LOCK_UNLOCK);
+
 	if (newbuf != buffer)
 		LockBuffer(newbuf, BUFFER_LOCK_UNLOCK);
 	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
@@ -5197,10 +5331,15 @@ failed:
 		tuple->t_data->t_ctid = *tid;
 
 	/* Clear only the all-frozen bit on visibility map if needed */
-	if (PageIsAllVisible(page) &&
-		visibilitymap_clear(relation, block, vmbuffer,
-							VISIBILITYMAP_ALL_FROZEN))
-		cleared_all_frozen = true;
+	if (PageIsAllVisible(page))
+	{
+		LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
+		if (visibilitymap_clear_locked(relation, block, vmbuffer,
+									   VISIBILITYMAP_ALL_FROZEN))
+			cleared_all_frozen = true;
+		else
+			LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
+	}
 
 
 	MarkBufferDirty(*buffer);
@@ -5232,15 +5371,25 @@ failed:
 		xlrec.flags = cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
 		XLogRegisterData(&xlrec, SizeOfHeapLock);
 
+		if (cleared_all_frozen)
+			XLogRegisterBuffer(HEAP_LOCK_BLKREF_VM, vmbuffer, 0);
+
 		/* we don't decode row locks atm, so no need to log the origin */
 
 		recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
 
 		PageSetLSN(page, recptr);
+
+		if (cleared_all_frozen)
+			PageSetLSN(BufferGetPage(vmbuffer), recptr);
 	}
 
 	END_CRIT_SECTION();
 
+	/* release VM lock first, since it covers many heap blocks */
+	if (cleared_all_frozen)
+		LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
+
 	result = TM_Ok;
 
 out_locked:
@@ -5951,10 +6100,15 @@ l4:
 								  xid, mode, false,
 								  &new_xmax, &new_infomask, &new_infomask2);
 
-		if (PageIsAllVisible(BufferGetPage(buf)) &&
-			visibilitymap_clear(rel, block, vmbuffer,
-								VISIBILITYMAP_ALL_FROZEN))
-			cleared_all_frozen = true;
+		if (PageIsAllVisible(BufferGetPage(buf)))
+		{
+			LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
+			if (visibilitymap_clear_locked(rel, block, vmbuffer,
+										   VISIBILITYMAP_ALL_FROZEN))
+				cleared_all_frozen = true;
+			else
+				LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
+		}
 
 		START_CRIT_SECTION();
 
@@ -5985,13 +6139,23 @@ l4:
 
 			XLogRegisterData(&xlrec, SizeOfHeapLockUpdated);
 
+			if (cleared_all_frozen)
+				XLogRegisterBuffer(HEAP_LOCK_BLKREF_VM, vmbuffer, 0);
+
 			recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_LOCK_UPDATED);
 
 			PageSetLSN(page, recptr);
+
+			if (cleared_all_frozen)
+				PageSetLSN(BufferGetPage(vmbuffer), recptr);
 		}
 
 		END_CRIT_SECTION();
 
+		/* release VM lock first, since it covers many heap blocks */
+		if (cleared_all_frozen)
+			LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
+
 next:
 		/* if we find the end of update chain, we're done. */
 		if (mytup.t_data->t_infomask & HEAP_XMAX_INVALID ||
@@ -8848,8 +9012,9 @@ log_heap_visible(Relation rel, Buffer heap_buffer, Buffer vm_buffer,
  * have modified the buffer(s) and marked them dirty.
  */
 static XLogRecPtr
-log_heap_update(Relation reln, Buffer oldbuf,
-				Buffer newbuf, HeapTuple oldtup, HeapTuple newtup,
+log_heap_update(Relation reln, Buffer oldbuf, Buffer vmbuffer_old,
+				Buffer newbuf, Buffer vmbuffer_new,
+				HeapTuple oldtup, HeapTuple newtup,
 				HeapTuple old_key_tuple,
 				bool all_visible_cleared, bool new_all_visible_cleared)
 {
@@ -9058,6 +9223,20 @@ log_heap_update(Relation reln, Buffer oldbuf,
 						 old_key_tuple->t_len - SizeofHeapTupleHeader);
 	}
 
+	/*
+	 * Register VM buffers. If the old and new heap pages' VM bits are on the
+	 * same VM page, the caller passes only vmbuffer_new (mirroring the heap
+	 * page convention where block 0 = new is always registered).
+	 */
+	Assert((BufferIsInvalid(vmbuffer_old) && BufferIsInvalid(vmbuffer_new)) ||
+		   (vmbuffer_old != vmbuffer_new));
+
+	if (BufferIsValid(vmbuffer_new))
+		XLogRegisterBuffer(HEAP_UPDATE_BLKREF_VM_NEW, vmbuffer_new, 0);
+
+	if (BufferIsValid(vmbuffer_old))
+		XLogRegisterBuffer(HEAP_UPDATE_BLKREF_VM_OLD, vmbuffer_old, 0);
+
 	/* filtering by origin on a row level is much more efficient */
 	XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
 
diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c
index 6cc3bc991c3..4ad56d76768 100644
--- a/src/backend/access/heap/heapam_xlog.c
+++ b/src/backend/access/heap/heapam_xlog.c
@@ -22,6 +22,51 @@
 #include "storage/freespace.h"
 #include "storage/standby.h"
 
+/*
+ * Helper to clear visibility map bits during heap redo.
+ *
+ * This handles records that modify one heap block and its corresponding VM
+ * block. More complex cases, such as update records that can touch multiple
+ * heap or VM blocks, must handle VM replay directly. 'flags' specifies which
+ * visibility map bits to clear.
+ */
+static void
+heap_xlog_vm_clear(XLogReaderState *record,
+				   XLogRecPtr lsn, RelFileLocator target_locator,
+				   BlockNumber heap_blkno,
+				   int vm_block_id, uint8 flags)
+{
+	Relation	reln = CreateFakeRelcacheEntry(target_locator);
+	Buffer		vmbuffer = InvalidBuffer;
+
+	/*
+	 * If this record includes the VM block, use it for redo. Older minor
+	 * releases did not register the VM buffer when clearing the VM, so keep
+	 * the fallback path to support replay of WAL generated before that fix.
+	 */
+	if (XLogRecHasBlockRef(record, vm_block_id))
+	{
+		if (XLogReadBufferForRedo(record, vm_block_id,
+								  &vmbuffer) == BLK_NEEDS_REDO)
+		{
+			visibilitymap_clear_locked(reln,
+									   heap_blkno, vmbuffer,
+									   flags);
+			PageSetLSN(BufferGetPage(vmbuffer), lsn);
+		}
+		if (BufferIsValid(vmbuffer))
+			UnlockReleaseBuffer(vmbuffer);
+	}
+	else
+	{
+		visibilitymap_pin(reln, heap_blkno, &vmbuffer);
+		visibilitymap_clear(reln, heap_blkno, vmbuffer, flags);
+		ReleaseBuffer(vmbuffer);
+	}
+
+	FreeFakeRelcacheEntry(reln);
+}
+
 
 /*
  * Replay XLOG_HEAP2_PRUNE_* records.
@@ -360,15 +405,9 @@ heap_xlog_delete(XLogReaderState *record)
 	 * already up-to-date.
 	 */
 	if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
-	{
-		Relation	reln = CreateFakeRelcacheEntry(target_locator);
-		Buffer		vmbuffer = InvalidBuffer;
-
-		visibilitymap_pin(reln, blkno, &vmbuffer);
-		visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
-		ReleaseBuffer(vmbuffer);
-		FreeFakeRelcacheEntry(reln);
-	}
+		heap_xlog_vm_clear(record, lsn, target_locator,
+						   blkno, HEAP_DELETE_BLKREF_VM,
+						   VISIBILITYMAP_VALID_BITS);
 
 	if (XLogReadBufferForRedo(record, HEAP_DELETE_BLKREF_HEAP,
 							  &buffer) == BLK_NEEDS_REDO)
@@ -449,15 +488,9 @@ heap_xlog_insert(XLogReaderState *record)
 	 * already up-to-date.
 	 */
 	if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
-	{
-		Relation	reln = CreateFakeRelcacheEntry(target_locator);
-		Buffer		vmbuffer = InvalidBuffer;
-
-		visibilitymap_pin(reln, blkno, &vmbuffer);
-		visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
-		ReleaseBuffer(vmbuffer);
-		FreeFakeRelcacheEntry(reln);
-	}
+		heap_xlog_vm_clear(record, lsn, target_locator,
+						   blkno, HEAP_INSERT_BLKREF_VM,
+						   VISIBILITYMAP_VALID_BITS);
 
 	/*
 	 * If we inserted the first and only tuple on the page, re-initialize the
@@ -571,19 +604,19 @@ heap_xlog_multi_insert(XLogReaderState *record)
 			 (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
 
 	/*
-	 * The visibility map may need to be fixed even if the heap page is
-	 * already up-to-date.
+	 * If required, clear the VM first, to prevent all-visible temporarily
+	 * being set for a heap page that's not all visible anymore.
+	 *
+	 * If it were possible for XLH_INSERT_ALL_VISIBLE_CLEARED and
+	 * XLH_INSERT_ALL_FROZEN_SET to be present in the same record, doing the
+	 * XLogReadBufferForRedo() before the PageSetAllVisible() below would be a
+	 * problem, as it'd violate the rule that a heap page must never be set
+	 * all-visible in the VM while its PD_ALL_VISIBLE is clear.
 	 */
 	if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
-	{
-		Relation	reln = CreateFakeRelcacheEntry(rlocator);
-		Buffer		vmbuffer = InvalidBuffer;
-
-		visibilitymap_pin(reln, blkno, &vmbuffer);
-		visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
-		ReleaseBuffer(vmbuffer);
-		FreeFakeRelcacheEntry(reln);
-	}
+		heap_xlog_vm_clear(record, lsn, rlocator,
+						   blkno, HEAP_MULTI_INSERT_BLKREF_VM,
+						   VISIBILITYMAP_VALID_BITS);
 
 	if (isinit)
 	{
@@ -698,6 +731,8 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
 	Buffer		obuffer,
 				nbuffer;
 	Page		page;
+	bool		new_cleared,
+				old_cleared;
 	OffsetNumber offnum;
 	ItemId		lp = NULL;
 	HeapTupleData oldtup;
@@ -737,14 +772,107 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
 	 * The visibility map may need to be fixed even if the heap page is
 	 * already up-to-date.
 	 */
-	if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
+	new_cleared = (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0;
+	old_cleared = (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0;
+	if (new_cleared || old_cleared)
 	{
 		Relation	reln = CreateFakeRelcacheEntry(rlocator);
-		Buffer		vmbuffer = InvalidBuffer;
+		bool		has_vm_old = XLogRecHasBlockRef(record, HEAP_UPDATE_BLKREF_VM_OLD);
+		bool		has_vm_new = XLogRecHasBlockRef(record, HEAP_UPDATE_BLKREF_VM_NEW);
+
+		if (has_vm_new || has_vm_old)
+		{
+			/*
+			 * If both the old and new heap pages were all-visible and their
+			 * VM bits are on the same VM page, that single VM page is
+			 * registered as HEAP_UPDATE_BLKREF_VM_NEW. Clear both heap
+			 * blocks' VM bits from the single provided VM buffer.
+			 */
+			bool		same_vm_page = new_cleared && old_cleared &&
+				!has_vm_old && has_vm_new;
+
+			if (same_vm_page)
+			{
+				Buffer		vmbuffer = InvalidBuffer;
+
+				if (XLogReadBufferForRedo(record, HEAP_UPDATE_BLKREF_VM_NEW, &vmbuffer) ==
+					BLK_NEEDS_REDO)
+				{
+					visibilitymap_clear_locked(reln, oldblk, vmbuffer,
+											   VISIBILITYMAP_VALID_BITS);
+					visibilitymap_clear_locked(reln, newblk, vmbuffer,
+											   VISIBILITYMAP_VALID_BITS);
+					PageSetLSN(BufferGetPage(vmbuffer), lsn);
+				}
+
+				if (BufferIsValid(vmbuffer))
+					UnlockReleaseBuffer(vmbuffer);
+			}
+			else
+			{
+				/*
+				 * We can be sure that we need to clear at most one heap
+				 * page's VM bits in each registered VM buffer.
+				 */
+				if (new_cleared)
+				{
+					Buffer		vmbuffer = InvalidBuffer;
+
+					if (XLogReadBufferForRedo(record, HEAP_UPDATE_BLKREF_VM_NEW, &vmbuffer) ==
+						BLK_NEEDS_REDO)
+					{
+						visibilitymap_clear_locked(reln, newblk, vmbuffer,
+												   VISIBILITYMAP_VALID_BITS);
+						PageSetLSN(BufferGetPage(vmbuffer), lsn);
+					}
+
+					if (BufferIsValid(vmbuffer))
+						UnlockReleaseBuffer(vmbuffer);
+				}
+				if (old_cleared)
+				{
+					Buffer		vmbuffer = InvalidBuffer;
+
+					if (XLogReadBufferForRedo(record, HEAP_UPDATE_BLKREF_VM_OLD, &vmbuffer) ==
+						BLK_NEEDS_REDO)
+					{
+						visibilitymap_clear_locked(reln, oldblk, vmbuffer,
+												   VISIBILITYMAP_VALID_BITS);
+						PageSetLSN(BufferGetPage(vmbuffer), lsn);
+					}
+
+					if (BufferIsValid(vmbuffer))
+						UnlockReleaseBuffer(vmbuffer);
+				}
+			}
+		}
+		else
+		{
+			/*
+			 * Backwards compatibility path. Previously, the VM buffers were
+			 * not registered in the WAL record. We need this path to replay
+			 * WAL generated by a not-yet-patched primary during upgrade.
+			 */
+			if (old_cleared)
+			{
+				Buffer		vmbuffer = InvalidBuffer;
+
+				visibilitymap_pin(reln, oldblk, &vmbuffer);
+				visibilitymap_clear(reln, oldblk, vmbuffer,
+									VISIBILITYMAP_VALID_BITS);
+				ReleaseBuffer(vmbuffer);
+			}
+			if (new_cleared)
+			{
+				Buffer		vmbuffer = InvalidBuffer;
+
+				visibilitymap_pin(reln, newblk, &vmbuffer);
+				visibilitymap_clear(reln, newblk, vmbuffer,
+									VISIBILITYMAP_VALID_BITS);
+				ReleaseBuffer(vmbuffer);
+			}
+		}
 
-		visibilitymap_pin(reln, oldblk, &vmbuffer);
-		visibilitymap_clear(reln, oldblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
-		ReleaseBuffer(vmbuffer);
 		FreeFakeRelcacheEntry(reln);
 	}
 
@@ -819,21 +947,6 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
 		newaction = XLogReadBufferForRedo(record, HEAP_UPDATE_BLKREF_NEW,
 										  &nbuffer);
 
-	/*
-	 * The visibility map may need to be fixed even if the heap page is
-	 * already up-to-date.
-	 */
-	if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
-	{
-		Relation	reln = CreateFakeRelcacheEntry(rlocator);
-		Buffer		vmbuffer = InvalidBuffer;
-
-		visibilitymap_pin(reln, newblk, &vmbuffer);
-		visibilitymap_clear(reln, newblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
-		ReleaseBuffer(vmbuffer);
-		FreeFakeRelcacheEntry(reln);
-	}
-
 	/* Deal with new tuple */
 	if (newaction == BLK_NEEDS_REDO)
 	{
@@ -1022,20 +1135,15 @@ heap_xlog_lock(XLogReaderState *record)
 	 */
 	if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
 	{
-		RelFileLocator rlocator;
-		Buffer		vmbuffer = InvalidBuffer;
 		BlockNumber block;
-		Relation	reln;
+		RelFileLocator rlocator;
 
 		XLogRecGetBlockTag(record, HEAP_LOCK_BLKREF_HEAP, &rlocator, NULL,
 						   &block);
-		reln = CreateFakeRelcacheEntry(rlocator);
-
-		visibilitymap_pin(reln, block, &vmbuffer);
-		visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
 
-		ReleaseBuffer(vmbuffer);
-		FreeFakeRelcacheEntry(reln);
+		heap_xlog_vm_clear(record, lsn, rlocator,
+						   block, HEAP_LOCK_BLKREF_VM,
+						   VISIBILITYMAP_ALL_FROZEN);
 	}
 
 	if (XLogReadBufferForRedo(record, HEAP_LOCK_BLKREF_HEAP,
@@ -1100,20 +1208,15 @@ heap_xlog_lock_updated(XLogReaderState *record)
 	 */
 	if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
 	{
-		RelFileLocator rlocator;
-		Buffer		vmbuffer = InvalidBuffer;
 		BlockNumber block;
-		Relation	reln;
+		RelFileLocator rlocator;
 
 		XLogRecGetBlockTag(record, HEAP_LOCK_BLKREF_HEAP, &rlocator, NULL,
 						   &block);
-		reln = CreateFakeRelcacheEntry(rlocator);
-
-		visibilitymap_pin(reln, block, &vmbuffer);
-		visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
 
-		ReleaseBuffer(vmbuffer);
-		FreeFakeRelcacheEntry(reln);
+		heap_xlog_vm_clear(record, lsn, rlocator,
+						   block, HEAP_LOCK_BLKREF_VM,
+						   VISIBILITYMAP_ALL_FROZEN);
 	}
 
 	if (XLogReadBufferForRedo(record, HEAP_LOCK_BLKREF_HEAP,
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index 1874a3fda37..89079b52a2f 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -135,9 +135,27 @@ static Buffer vm_extend(Relation rel, BlockNumber vm_nblocks);
  * You must pass a buffer containing the correct map page to this function.
  * Call visibilitymap_pin first to pin the right one. This function doesn't do
  * any I/O.  Returns true if any bits have been cleared and false otherwise.
+ *
+ * This is retained for backwards compatability, as it is usually necessary to
+ * register the VM buffer in the WAL record and this necessitates holding the
+ * lock for longer than it is held here.
  */
 bool
 visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
+{
+	bool		cleared = false;
+
+	LockBuffer(vmbuf, BUFFER_LOCK_EXCLUSIVE);
+
+	cleared = visibilitymap_clear_locked(rel, heapBlk, vmbuf, flags);
+
+	LockBuffer(vmbuf, BUFFER_LOCK_UNLOCK);
+
+	return cleared;
+}
+
+bool
+visibilitymap_clear_locked(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
 {
 	BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
 	int			mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
@@ -157,7 +175,8 @@ visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags
 	if (!BufferIsValid(vmbuf) || BufferGetBlockNumber(vmbuf) != mapBlock)
 		elog(ERROR, "wrong buffer passed to visibilitymap_clear");
 
-	LockBuffer(vmbuf, BUFFER_LOCK_EXCLUSIVE);
+	Assert(BufferIsExclusiveLocked(vmbuf));
+
 	map = PageGetContents(BufferGetPage(vmbuf));
 
 	if (map[mapByte] & mask)
@@ -168,7 +187,6 @@ visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags
 		cleared = true;
 	}
 
-	LockBuffer(vmbuf, BUFFER_LOCK_UNLOCK);
 
 	return cleared;
 }
diff --git a/src/bin/pg_walsummary/t/002_blocks.pl b/src/bin/pg_walsummary/t/002_blocks.pl
index 0f98c7df82e..fcb701e1b98 100644
--- a/src/bin/pg_walsummary/t/002_blocks.pl
+++ b/src/bin/pg_walsummary/t/002_blocks.pl
@@ -93,13 +93,14 @@ my $filename = sprintf "%s/pg_wal/summaries/%08s%08s%08s%08s%08s.summary",
   split(m@/@, $end_lsn);
 ok(-f $filename, "WAL summary file exists");
 
-# Run pg_walsummary on it. We expect exactly two blocks to be modified,
-# block 0 and one other.
+# Run pg_walsummary on it. We expect exactly three blocks to be modified,
+# block 0 (old tuple), another block (new tuple), and the block for the VM.
 my ($stdout, $stderr) = run_command([ 'pg_walsummary', '-i', $filename ]);
 note($stdout);
 @lines = split(/\n/, $stdout);
 like($stdout, qr/FORK main: block 0$/m, "stdout shows block 0 modified");
+like($stdout, qr/FORK vm: block 0$/m, "stdout shows VM block 0 modified");
 is($stderr, '', 'stderr is empty');
-is(0 + @lines, 2, "UPDATE modified 2 blocks");
+is(0 + @lines, 3, "UPDATE modified 3 blocks");
 
 done_testing();
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index 55e3c7b0015..752f69a2691 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -225,10 +225,19 @@ typedef struct xl_multi_insert_tuple
  *
  * HEAP_UPDATE_BLKREF_OLD: old page, if different. (no data, just a reference
  * to the block)
+ *
+ * HEAP_UPDATE_BLKREF_VM_NEW: VM page covering the new heap page. Registered
+ * whenever XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED is set. Also covers the old heap
+ * page's VM bits when both heap pages map to the same VM page.
+ *
+ * HEAP_UPDATE_BLKREF_VM_OLD: VM page covering the old heap page. Only
+ * registered when XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is set and the old heap
+ * page's VM bits are on a different VM page from the new heap page's.
  */
-
-#define HEAP_UPDATE_BLKREF_NEW        0
-#define HEAP_UPDATE_BLKREF_OLD        1
+#define HEAP_UPDATE_BLKREF_NEW		0
+#define HEAP_UPDATE_BLKREF_OLD		1
+#define HEAP_UPDATE_BLKREF_VM_NEW	2
+#define HEAP_UPDATE_BLKREF_VM_OLD	3
 
 typedef struct xl_heap_update
 {
diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h
index ea889bf9ec7..1bc59c9ac33 100644
--- a/src/include/access/visibilitymap.h
+++ b/src/include/access/visibilitymap.h
@@ -26,6 +26,8 @@
 #define VM_ALL_FROZEN(r, b, v) \
 	((visibilitymap_get_status((r), (b), (v)) & VISIBILITYMAP_ALL_FROZEN) != 0)
 
+extern bool visibilitymap_clear_locked(Relation rel, BlockNumber heapBlk,
+									   Buffer vmbuf, uint8 flags);
 extern bool visibilitymap_clear(Relation rel, BlockNumber heapBlk,
 								Buffer vmbuf, uint8 flags);
 extern void visibilitymap_pin(Relation rel, BlockNumber heapBlk,
-- 
2.43.0

