From 5b216371de7d37f88d51fdb330775f2ece1d021f Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Wed, 7 Jan 2026 13:32:18 -0500
Subject: [PATCH v13 1/8] Streamline buffer rejection for bulkreads of unlogged
 tables

Bulk-read buffer access strategies reject reusing a buffer from the
buffer access strategy ring if reusing it would require flushing WAL.
Unlogged relations never require WAL flushes, so this check can be
skipped. This avoids taking the buffer header lock unnecessarily.

Refactor this into StrategyRejectBuffer() itself, which also avoids LSN
checking for non-bulkread buffer access strategies.

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://postgr.es/m/flat/0198DBB9-4A76-49E4-87F8-43D46DD0FD76%40gmail.com#1d8677fc75dc8b39f0eb5dd6bbafb65d
---
 src/backend/storage/buffer/bufmgr.c   | 60 ++++++++++++++++++++-------
 src/backend/storage/buffer/freelist.c | 14 ++++++-
 src/include/storage/buf_internals.h   |  2 +
 3 files changed, 59 insertions(+), 17 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index a036c2aa275..67f9a210872 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -2483,25 +2483,14 @@ again:
 		 * If using a nondefault strategy, and writing the buffer would
 		 * require a WAL flush, let the strategy decide whether to go ahead
 		 * and write/reuse the buffer or to choose another victim.  We need a
-		 * lock to inspect the page LSN, so this can't be done inside
+		 * content lock to inspect the page LSN, so this can't be done inside
 		 * StrategyGetBuffer.
 		 */
-		if (strategy != NULL)
+		if (StrategyRejectBuffer(strategy, buf_hdr, from_ring))
 		{
-			XLogRecPtr	lsn;
-
-			/* Read the LSN while holding buffer header lock */
-			buf_state = LockBufHdr(buf_hdr);
-			lsn = BufferGetLSN(buf_hdr);
-			UnlockBufHdr(buf_hdr);
-
-			if (XLogNeedsFlush(lsn)
-				&& StrategyRejectBuffer(strategy, buf_hdr, from_ring))
-			{
-				LWLockRelease(content_lock);
-				UnpinBuffer(buf_hdr);
-				goto again;
-			}
+			LWLockRelease(content_lock);
+			UnpinBuffer(buf_hdr);
+			goto again;
 		}
 
 		/* OK, do the I/O */
@@ -3416,6 +3405,45 @@ TrackNewBufferPin(Buffer buf)
 							  BLCKSZ);
 }
 
+/*
+ * Returns true if the buffer needs WAL flushed before it can be written out.
+ *
+ * If the result is required to be correct, the caller must hold a buffer
+ * content lock. If they only hold a shared content lock, we'll need to
+ * acquire the buffer header spinlock, so they must not already hold it.
+ */
+bool
+BufferNeedsWALFlush(BufferDesc *bufdesc, bool exclusive_locked)
+{
+	uint32		buf_state = pg_atomic_read_u32(&bufdesc->state);
+	Buffer		buffer;
+	char	   *page;
+	XLogRecPtr	lsn;
+
+	/*
+	 * Unlogged buffers can't need WAL flush. See FlushBuffer() for more
+	 * details on unlogged relations with LSNs.
+	 */
+	if (!(buf_state & BM_PERMANENT))
+		return false;
+
+	buffer = BufferDescriptorGetBuffer(bufdesc);
+	page = BufferGetPage(buffer);
+
+	if (!XLogHintBitIsNeeded() || BufferIsLocal(buffer) || exclusive_locked)
+		lsn = PageGetLSN(page);
+	else
+	{
+		/* Buffer is either share locked or not locked */
+		LockBufHdr(bufdesc);
+		lsn = PageGetLSN(page);
+		UnlockBufHdr(bufdesc);
+	}
+
+	return XLogNeedsFlush(lsn);
+}
+
+
 #define ST_SORT sort_checkpoint_bufferids
 #define ST_ELEMENT_TYPE CkptSortItem
 #define ST_COMPARE(a, b) ckpt_buforder_comparator(a, b)
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index 9a93fb335fc..570b933ddb3 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -780,12 +780,18 @@ IOContextForStrategy(BufferAccessStrategy strategy)
  * be written out and doing so would require flushing WAL too.  This gives us
  * a chance to choose a different victim.
  *
+ * The buffer must be pinned and content locked and the buffer header spinlock
+ * must not be held.
+ *
  * Returns true if buffer manager should ask for a new victim, and false
  * if this buffer should be written and re-used.
  */
 bool
 StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
 {
+	if (!strategy)
+		return false;
+
 	/* We only do this in bulkread mode */
 	if (strategy->btype != BAS_BULKREAD)
 		return false;
@@ -795,8 +801,14 @@ StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_r
 		strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf))
 		return false;
 
+	Assert(BufferIsLockedByMe(BufferDescriptorGetBuffer(buf)));
+	Assert(!(pg_atomic_read_u32(&buf->state) & BM_LOCKED));
+
+	if (!BufferNeedsWALFlush(buf, false))
+		return false;
+
 	/*
-	 * Remove the dirty buffer from the ring; necessary to prevent infinite
+	 * Remove the dirty buffer from the ring; necessary to prevent an infinite
 	 * loop if all ring members are dirty.
 	 */
 	strategy->buffers[strategy->current] = InvalidBuffer;
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 2f607ea2ac5..916be941a41 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -15,6 +15,7 @@
 #ifndef BUFMGR_INTERNALS_H
 #define BUFMGR_INTERNALS_H
 
+#include "access/xlogdefs.h"
 #include "pgstat.h"
 #include "port/atomics.h"
 #include "storage/aio_types.h"
@@ -522,6 +523,7 @@ extern void ScheduleBufferTagForWriteback(WritebackContext *wb_context,
 										  IOContext io_context, BufferTag *tag);
 
 extern void TrackNewBufferPin(Buffer buf);
+extern bool BufferNeedsWALFlush(BufferDesc *bufdesc, bool exclusive_locked);
 
 /* solely to make it easier to write tests */
 extern bool StartBufferIO(BufferDesc *buf, bool forInput, bool nowait);
-- 
2.43.0

