From cc8e3efef44c7ce467fc61c7521d6c35c18a0f3c Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Mon, 2 Feb 2026 11:58:07 -0500
Subject: [PATCH v15 14/19] Remove SyncOneBuffer() and refactor BgBufferSync()

Since xxx, only bgwriter used SyncOneBuffer() and it can be easily
inlined, so do so. Simplifying the logic for a single case makes it much
easier to understand.

While we're here replace PinBuffer_Locked() with PinBuffer() since
PinBuffer() can now avoid incrementign the usage count.
---
 src/backend/access/transam/README   |   4 +-
 src/backend/commands/sequence.c     |  16 +--
 src/backend/storage/buffer/bufmgr.c | 149 +++++++++-------------------
 src/include/storage/proc.h          |   2 +-
 4 files changed, 62 insertions(+), 109 deletions(-)

diff --git a/src/backend/access/transam/README b/src/backend/access/transam/README
index 231106270fd..8ca94e0575b 100644
--- a/src/backend/access/transam/README
+++ b/src/backend/access/transam/README
@@ -448,7 +448,9 @@ critical section.)
 3. Apply the required changes to the shared buffer(s).
 
 4. Mark the shared buffer(s) as dirty with MarkBufferDirty().  (This must
-happen before the WAL record is inserted; see notes in SyncOneBuffer().)
+happen before the WAL record is inserted; otherwise a checkpoint could
+write the buffer without the change and its WAL record before the redo
+point, making the change unrecoverable.)
 Note that marking a buffer dirty with MarkBufferDirty() should only
 happen iff you write a WAL record; see Writing Hints below.
 
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index 551667650ba..e51afbc67cd 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -809,13 +809,15 @@ nextval_internal(Oid relid, bool check_permissions)
 	START_CRIT_SECTION();
 
 	/*
-	 * We must mark the buffer dirty before doing XLogInsert(); see notes in
-	 * SyncOneBuffer().  However, we don't apply the desired changes just yet.
-	 * This looks like a violation of the buffer update protocol, but it is in
-	 * fact safe because we hold exclusive lock on the buffer.  Any other
-	 * process, including a checkpoint, that tries to examine the buffer
-	 * contents will block until we release the lock, and then will see the
-	 * final state that we install below.
+	 * We must mark the buffer dirty before doing XLogInsert(); otherwise a
+	 * checkpoint could write the buffer without the change and its WAL record
+	 * before the redo point, making the change unrecoverable.  However, we
+	 * don't apply the desired changes just yet.  This looks like a violation
+	 * of the buffer update protocol, but it is in fact safe because we hold
+	 * exclusive lock on the buffer.  Any other process, including a
+	 * checkpoint, that tries to examine the buffer contents will block until
+	 * we release the lock, and then will see the final state that we install
+	 * below.
 	 */
 	MarkBufferDirty(buf);
 
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 7e9bdc7ef85..6c82e7d4039 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -75,10 +75,6 @@
 #define LocalBufHdrGetBlock(bufHdr) \
 	LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
 
-/* Bits in SyncOneBuffer's return value */
-#define BUF_WRITTEN				0x01
-#define BUF_REUSABLE			0x02
-
 #define RELS_BSEARCH_THRESHOLD		20
 
 /*
@@ -663,8 +659,6 @@ static bool PinBuffer(BufferDesc *buf, BufferUsageCountChange usage_count_change
 static void PinBuffer_Locked(BufferDesc *buf);
 static void UnpinBuffer(BufferDesc *buf);
 static void UnpinBufferNoOwner(BufferDesc *buf);
-static int	SyncOneBuffer(int buf_id, bool skip_recently_used,
-						  WritebackContext *wb_context);
 static void WaitIO(BufferDesc *buf);
 static void AbortBufferIO(Buffer buffer);
 static void shared_buffer_write_error_callback(void *arg);
@@ -3814,8 +3808,12 @@ CheckPointBuffers(int flags)
 		uint64		buf_state;
 
 		/*
-		 * Header spinlock is enough to examine BM_DIRTY, see comment in
-		 * SyncOneBuffer.
+		 * We can make this check without taking the buffer content lock so
+		 * long as we mark pages dirty in access methods *before* logging
+		 * changes with XLogInsert(): if someone marks the buffer dirty just
+		 * after our check we don't worry because our checkpoint.redo points
+		 * before log record for upcoming changes and so we are not required
+		 * to write such dirty buffer.
 		 */
 		buf_state = LockBufHdr(bufHdr);
 
@@ -4444,29 +4442,57 @@ BgBufferSync(WritebackContext *wb_context)
 	reusable_buffers = reusable_buffers_est;
 
 	/* Execute the LRU scan */
-	while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
+	for (; num_to_scan > 0; num_to_scan--, next_to_clean++)
 	{
-		int			sync_state = SyncOneBuffer(next_to_clean, true,
-											   wb_context);
+		uint64		buf_state;
+		BufferDesc *bufHdr;
+		BufferTag	tag;
 
-		if (++next_to_clean >= NBuffers)
+		if (reusable_buffers >= upcoming_alloc_est)
+			break;
+
+		if (next_to_clean >= NBuffers)
 		{
 			next_to_clean = 0;
 			next_passes++;
 		}
-		num_to_scan--;
 
-		if (sync_state & BUF_WRITTEN)
+		bufHdr = GetBufferDescriptor(next_to_clean);
+		buf_state = pg_atomic_read_u64(&bufHdr->state);
+		if (BUF_STATE_GET_REFCOUNT(buf_state) != 0 ||
+			BUF_STATE_GET_USAGECOUNT(buf_state) != 0)
+			continue;
+
+		reusable_buffers++;
+
+		/*
+		 * Racy check is fine: bgwriter writes are opportunistic. If we miss a
+		 * buffer that just became dirty, it will be written by a future
+		 * bgwriter pass or by checkpointer.
+		 */
+		if (!(buf_state & BM_VALID) || !(buf_state & BM_DIRTY))
+			continue;
+
+		/* Make sure we can handle the pin */
+		ReservePrivateRefCountEntry();
+		ResourceOwnerEnlarge(CurrentResourceOwner);
+
+		if (!PinBuffer(bufHdr, BUC_ZERO, true))
+			continue;
+
+		FlushUnlockedBuffer(bufHdr, NULL, IOOBJECT_RELATION, IOCONTEXT_NORMAL);
+
+		/* Snapshot the tag before unpinning */
+		tag = bufHdr->tag;
+		UnpinBuffer(bufHdr);
+
+		ScheduleBufferTagForWriteback(wb_context, IOCONTEXT_NORMAL, &tag);
+
+		if (++num_written >= bgwriter_lru_maxpages)
 		{
-			reusable_buffers++;
-			if (++num_written >= bgwriter_lru_maxpages)
-			{
-				PendingBgWriterStats.maxwritten_clean++;
-				break;
-			}
+			PendingBgWriterStats.maxwritten_clean++;
+			break;
 		}
-		else if (sync_state & BUF_REUSABLE)
-			reusable_buffers++;
 	}
 
 	PendingBgWriterStats.buf_written_clean += num_written;
@@ -4507,83 +4533,6 @@ BgBufferSync(WritebackContext *wb_context)
 	return (bufs_to_lap == 0 && recent_alloc == 0);
 }
 
-/*
- * SyncOneBuffer -- process a single buffer during syncing.
- *
- * If skip_recently_used is true, we don't write currently-pinned buffers, nor
- * buffers marked recently used, as these are not replacement candidates.
- *
- * Returns a bitmask containing the following flag bits:
- *	BUF_WRITTEN: we wrote the buffer.
- *	BUF_REUSABLE: buffer is available for replacement, ie, it has
- *		pin count 0 and usage count 0.
- *
- * (BUF_WRITTEN could be set in error if FlushBuffer finds the buffer clean
- * after locking it, but we don't care all that much.)
- */
-static int
-SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
-{
-	BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
-	int			result = 0;
-	uint64		buf_state;
-	BufferTag	tag;
-
-	/* Make sure we can handle the pin */
-	ReservePrivateRefCountEntry();
-	ResourceOwnerEnlarge(CurrentResourceOwner);
-
-	/*
-	 * Check whether buffer needs writing.
-	 *
-	 * We can make this check without taking the buffer content lock so long
-	 * as we mark pages dirty in access methods *before* logging changes with
-	 * XLogInsert(): if someone marks the buffer dirty just after our check we
-	 * don't worry because our checkpoint.redo points before log record for
-	 * upcoming changes and so we are not required to write such dirty buffer.
-	 */
-	buf_state = LockBufHdr(bufHdr);
-
-	if (BUF_STATE_GET_REFCOUNT(buf_state) == 0 &&
-		BUF_STATE_GET_USAGECOUNT(buf_state) == 0)
-	{
-		result |= BUF_REUSABLE;
-	}
-	else if (skip_recently_used)
-	{
-		/* Caller told us not to write recently-used buffers */
-		UnlockBufHdr(bufHdr);
-		return result;
-	}
-
-	if (!(buf_state & BM_VALID) || !(buf_state & BM_DIRTY))
-	{
-		/* It's clean, so nothing to do */
-		UnlockBufHdr(bufHdr);
-		return result;
-	}
-
-	/*
-	 * Pin it, share-exclusive-lock it, write it.  (FlushBuffer will do
-	 * nothing if the buffer is clean by the time we've locked it.)
-	 */
-	PinBuffer_Locked(bufHdr);
-
-	FlushUnlockedBuffer(bufHdr, NULL, IOOBJECT_RELATION, IOCONTEXT_NORMAL);
-
-	tag = bufHdr->tag;
-
-	UnpinBuffer(bufHdr);
-
-	/*
-	 * SyncOneBuffer() is only called by checkpointer and bgwriter, so
-	 * IOContext will always be IOCONTEXT_NORMAL.
-	 */
-	ScheduleBufferTagForWriteback(wb_context, IOCONTEXT_NORMAL, &tag);
-
-	return result | BUF_WRITTEN;
-}
-
 /*
  *		AtEOXact_Buffers - clean up at end of transaction.
  *
@@ -6498,7 +6447,7 @@ MarkSharedBufferDirtyHint(Buffer buffer, BufferDesc *bufHdr, uint64 lockstate,
 
 		/*
 		 * We must mark the page dirty before we emit the WAL record, as per
-		 * the usual rules, to ensure that BufferSync()/SyncOneBuffer() try to
+		 * the usual rules, to ensure that BufferSync()/BgBufferSync() try to
 		 * flush the buffer, even if we haven't inserted the WAL record yet.
 		 * As we hold at least a share-exclusive lock, checkpoints will wait
 		 * for this backend to be done with the buffer before continuing. If
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index 3e1d1fad5f9..488553fba90 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -125,7 +125,7 @@ extern PGDLLIMPORT int FastPathLockGroupsPerBackend;
  * and we acquire an exclusive content lock and MarkBufferDirty() on the
  * relevant buffers before writing WAL, this mechanism is not needed, because
  * phase 2 will block until we release the content lock and then flush the
- * modified data to disk.  See transam/README and SyncOneBuffer().)
+ * modified data to disk.  See transam/README.)
  *
  * Setting DELAY_CHKPT_COMPLETE prevents the system from moving from phase 2
  * to phase 3. This is useful if we are performing a WAL-logged operation that
-- 
2.43.0

