From 054647bcc53e57a5f30fecd9c7de1d8e74570417 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Mon, 15 Sep 2025 17:14:12 -0400
Subject: [PATCH v3 4/6] bufmgr: Use atomic sub/or for unpinning and marking
 buffers dirty

The prior commit made it legal to modify BufferDesc.state while the buffer
header spinlock is held. This allows us to replace a few CAS loops with atomic
sub/or.

Particularly the change in UnpinBufferNoOwner() improves scalability
significantly. See the prior commit for more background.

Author:
Reviewed-by:
Discussion: https://postgr.es/m/
Backpatch:
---
 src/backend/storage/buffer/bufmgr.c | 53 ++++-------------------------
 1 file changed, 6 insertions(+), 47 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index c498402e85b..ec8fad2d72e 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -2906,8 +2906,7 @@ void
 MarkBufferDirty(Buffer buffer)
 {
 	BufferDesc *bufHdr;
-	uint32		buf_state;
-	uint32		old_buf_state;
+	uint32		old_buf_state PG_USED_FOR_ASSERTS_ONLY;
 
 	if (!BufferIsValid(buffer))
 		elog(ERROR, "bad buffer ID: %d", buffer);
@@ -2924,26 +2923,9 @@ MarkBufferDirty(Buffer buffer)
 	Assert(LWLockHeldByMeInMode(BufferDescriptorGetContentLock(bufHdr),
 								LW_EXCLUSIVE));
 
-	/*
-	 * TODO: A future commit will replace this loop with a single atomic
-	 * operation, we do not need to wait for the buffer header spinlock to be
-	 * released anymore.
-	 */
-	old_buf_state = pg_atomic_read_u32(&bufHdr->state);
-	for (;;)
-	{
-		if (old_buf_state & BM_LOCKED)
-			old_buf_state = WaitBufHdrUnlocked(bufHdr);
+	old_buf_state = pg_atomic_fetch_or_u32(&bufHdr->state, BM_DIRTY | BM_JUST_DIRTIED);
 
-		buf_state = old_buf_state;
-
-		Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0);
-		buf_state |= BM_DIRTY | BM_JUST_DIRTIED;
-
-		if (pg_atomic_compare_exchange_u32(&bufHdr->state, &old_buf_state,
-										   buf_state))
-			break;
-	}
+	Assert(BUF_STATE_GET_REFCOUNT(old_buf_state) > 0);
 
 	/*
 	 * If the buffer was not dirty already, do vacuum accounting.
@@ -3248,7 +3230,6 @@ UnpinBufferNoOwner(BufferDesc *buf)
 	ref->refcount--;
 	if (ref->refcount == 0)
 	{
-		uint32		buf_state;
 		uint32		old_buf_state;
 
 		/*
@@ -3263,33 +3244,11 @@ UnpinBufferNoOwner(BufferDesc *buf)
 		/* I'd better not still hold the buffer content lock */
 		Assert(!LWLockHeldByMe(BufferDescriptorGetContentLock(buf)));
 
-		/*
-		 * Decrement the shared reference count.
-		 *
-		 * Since buffer spinlock holder can update status using just write,
-		 * it's not safe to use atomic decrement here; thus use a CAS loop.
-		 *
-		 * TODO: The above requirement does not hold anymore, in a future
-		 * commit this will be rewritten to release the pin in a single atomic
-		 * operation.
-		 */
-		old_buf_state = pg_atomic_read_u32(&buf->state);
-		for (;;)
-		{
-			if (old_buf_state & BM_LOCKED)
-				old_buf_state = WaitBufHdrUnlocked(buf);
-
-			buf_state = old_buf_state;
-
-			buf_state -= BUF_REFCOUNT_ONE;
-
-			if (pg_atomic_compare_exchange_u32(&buf->state, &old_buf_state,
-											   buf_state))
-				break;
-		}
+		/* decrement the shared reference count */
+		old_buf_state = pg_atomic_fetch_sub_u32(&buf->state, BUF_REFCOUNT_ONE);
 
 		/* Support LockBufferForCleanup() */
-		if (buf_state & BM_PIN_COUNT_WAITER)
+		if (old_buf_state & BM_PIN_COUNT_WAITER)
 			WakePinCountWaiter(buf);
 
 		ForgetPrivateRefCountEntry(ref);
-- 
2.48.1.76.g4e746b1a31.dirty

