From dcd2c7887b26d313351b7616e7b24a8e199c9445 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Sun, 26 Jan 2025 15:18:46 -0500
Subject: [PATCH v9 02/10] heapam: Use exclusive lock on old page in CLUSTER

To be able to guarantee that we can set the hint bit, acquire an exclusive
lock on the old buffer. This is required as a future commit will only allow
hint bits to be set with a new lock level, which is acquired as-needed in a
non-blocking fashion.

We need the hint bits, set in heapam_relation_copy_for_cluster() ->
HeapTupleSatisfiesVacuum(), to be set, as otherwise reform_and_rewrite_tuple()
-> rewrite_heap_tuple() will get confused. Specifically, rewrite_heap_tuple()
checks for HEAP_XMAX_INVALID in the old tuple to determine whether to check
the old-to-new mapping hash table.

It'd be better if we somehow could avoid setting hint bits on the old page. A
common reason to use VACUUM FULL are very bloated tables - rewriting most of
the old table before during VACUUM FULL doesn't exactly help.

Author:
Reviewed-by:
Discussion: https://postgr.es/m/
Backpatch:
---
 src/backend/access/heap/heapam_handler.c    | 16 +++++++++++++++-
 src/backend/access/heap/heapam_visibility.c |  7 +++++++
 src/backend/access/heap/rewriteheap.c       |  3 +++
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 09a456e9966..505faaaf9d2 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -837,7 +837,21 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
 		tuple = ExecFetchSlotHeapTuple(slot, false, NULL);
 		buf = hslot->buffer;
 
-		LockBuffer(buf, BUFFER_LOCK_SHARE);
+		/*
+		 * To be able to guarantee that we can set the hint bit, acquire an
+		 * exclusive lock on the old buffer. We need the hint bits, set in
+		 * heapam_relation_copy_for_cluster() -> HeapTupleSatisfiesVacuum(),
+		 * to be set, as otherwise reform_and_rewrite_tuple() ->
+		 * rewrite_heap_tuple() will get confused. Specifically,
+		 * rewrite_heap_tuple() checks for HEAP_XMAX_INVALID in the old tuple
+		 * to determine whether to check the old-to-new mapping hash table.
+		 *
+		 * It'd be better if we somehow could avoid setting hint bits on the
+		 * old page. One reason to use VACUUM FULL are very bloated tables -
+		 * rewriting most of the old table before during VACUUM FULL doesn't
+		 * exactly help...
+		 */
+		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
 
 		switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
 		{
diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c
index 05e70b7d92a..9a034d5c9e8 100644
--- a/src/backend/access/heap/heapam_visibility.c
+++ b/src/backend/access/heap/heapam_visibility.c
@@ -141,6 +141,13 @@ void
 HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
 					 uint16 infomask, TransactionId xid)
 {
+	/*
+	 * The uses from heapam.c rely on being able to perform the hint bit
+	 * updates, which can only be guaranteed if we are holding an exclusive
+	 * lock on the buffer - which all callers are doing.
+	 */
+	Assert(BufferIsLockedByMeInMode(buffer, BUFFER_LOCK_EXCLUSIVE));
+
 	SetHintBits(tuple, buffer, infomask, xid);
 }
 
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index bae3a2da77a..77fd48eb59e 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -382,6 +382,9 @@ rewrite_heap_tuple(RewriteState state,
 
 	/*
 	 * If the tuple has been updated, check the old-to-new mapping hash table.
+	 *
+	 * Note that this check relies on the HeapTupleSatisfiesVacuum() in
+	 * heapam_relation_copy_for_cluster() to have set hint bits.
 	 */
 	if (!((old_tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
 		  HeapTupleHeaderIsOnlyLocked(old_tuple->t_data)) &&
-- 
2.48.1.76.g4e746b1a31.dirty

