From e513688b5ff6fcc88c55c22f5f0464ead2dc899f Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Tue, 12 May 2026 17:24:15 -0400
Subject: [PATCH v15 05/19] Refactor victim buffer selection and add helpers

StrategyGetBuffer() (called by GetVictimBuffer()), contrary to its name,
did not always return a strategy buffer. It might have returned a buffer
from the strategy ring, a buffer from shared buffers, or a buffer from
shared buffers that was now in the strategy ring.

This commit makes dedicated helpers for getting a buffer from the
strategy ring and getting a buffer from shared buffers. It adds a little
code duplication but makes the rules for when to look for a buffer where
more clear.
---
 src/backend/storage/buffer/bufmgr.c   |  23 ++-
 src/backend/storage/buffer/freelist.c | 254 +++++++++++---------------
 src/include/storage/buf_internals.h   |   7 +-
 3 files changed, 128 insertions(+), 156 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index c060ee46163..771130b06a1 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -2545,7 +2545,8 @@ InvalidateVictimBuffer(BufferDesc *buf_hdr)
  * Helper to claim a victim buffer -- which is invalidating its existing
  * contents (including flushing the old contents first if needed).
  * Returns true if it successfully claimed the victim buffer and false if it
- * failed to do so. Buffer must already be pinned.
+ * failed to do so. Buffer must already be pinned, but if we fail to claim it
+ * we will unpin it.
  */
 bool
 ClaimVictimBuffer(BufferAccessStrategy strategy,
@@ -2603,8 +2604,7 @@ ClaimVictimBuffer(BufferAccessStrategy strategy,
 		 * We need to hold the content lock in at least share-exclusive mode
 		 * to safely inspect the page LSN.
 		 */
-		if (strategy && from_ring &&
-			StrategyRejectBuffer(strategy, buf_hdr, buf_state))
+		if (from_ring && StrategyRejectBuffer(strategy, buf_hdr, buf_state))
 		{
 			UnlockReleaseBuffer(bufnum);
 			return false;
@@ -2649,11 +2649,19 @@ GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context)
 	ResourceOwnerEnlarge(CurrentResourceOwner);
 
 	/*
-	 * Select a victim buffer.  The buffer is returned pinned and owned by
-	 * this backend.
+	 * Select a victim buffer. The buffer is returned pinned and owned by this
+	 * backend and is already cleaned and invalidated.
 	 */
-	bufnum = StrategyGetBuffer(strategy, io_context);
+	if (strategy)
+		bufnum = GetBufferFromRing(strategy, io_context);
 
+	/* If no strategy or didn't find a strategy buffer, get one from SB */
+	if (!BufferIsValid(bufnum))
+	{
+		bufnum = GetBufferFromClocksweep(io_context);
+		if (strategy)
+			AddBufferToRing(strategy, bufnum);
+	}
 
 	/* a final set of sanity checks */
 #ifdef USE_ASSERT_CHECKING
@@ -4585,8 +4593,7 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
 	 * If a shared buffer which was added to the ring later because the
 	 * current strategy buffer is pinned or in use or because all strategy
 	 * buffers were dirty and rejected (for BAS_BULKREAD operations only)
-	 * requires flushing, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE
-	 * (from_ring will be false).
+	 * requires flushing, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE.
 	 *
 	 * When a strategy is not in use, the write can only be a "regular" write
 	 * of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE).
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index f0f3a30cd03..9c8ac8e7152 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -96,13 +96,9 @@ typedef struct BufferAccessStrategyData
 
 
 /* Prototypes for internal functions */
-static BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy,
-									 uint64 *buf_state);
-static void AddBufferToRing(BufferAccessStrategy strategy,
-							Buffer bufnum);
 
 /*
- * ClockSweepTick - Helper routine for StrategyGetBuffer()
+ * ClockSweepTick - Helper routine for GetBufferFromClocksweep()
  *
  * Move the clock hand one buffer ahead of its current position and return the
  * id of the buffer now under the hand.
@@ -167,72 +163,15 @@ ClockSweepTick(void)
 }
 
 /*
- * StrategyGetBuffer
- *
- *	Called by the bufmgr to get the next candidate buffer to use in
- *	GetVictimBuffer(). The only hard requirement GetVictimBuffer() has is that
- *	the selected buffer must not currently be pinned by anyone.
- *
- *	strategy is a BufferAccessStrategy object, or NULL for default strategy.
- *
- *	It is the callers responsibility to ensure the buffer ownership can be
- *	tracked via TrackNewBufferPin().
- *
- *	The buffer is pinned and marked as owned, using TrackNewBufferPin(),
- *	before returning.
+ *	Return a buffer from clock sweep, pinned and marked as owned using
+ *	TrackNewBufferPin(). It is the caller's responsibility to make sure the
+ *	buffer ownership can be tracked.
  */
 Buffer
-StrategyGetBuffer(BufferAccessStrategy strategy, IOContext io_context)
+GetBufferFromClocksweep(IOContext io_context)
 {
-	BufferDesc *buf;
-	Buffer		bufnum;
 	int			bgwprocno;
 	int			trycounter;
-	uint64		buf_state;
-	bool		buf_valid;
-
-	/*
-	 * If given a strategy object, see whether it can select a buffer. We
-	 * assume strategy objects don't need buffer_strategy_lock.
-	 */
-	if (strategy != NULL)
-	{
-		int			ring_tries;
-
-		/*
-		 * Don't loop around the ring forever. If we can't reuse any of the
-		 * ring buffers (e.g. because they are all concurrently locked), fall
-		 * back to the clock sweep after one full pass.
-		 */
-		for (ring_tries = 0; ring_tries < strategy->nbuffers; ring_tries++)
-		{
-			buf = GetBufferFromRing(strategy, &buf_state);
-			if (buf == NULL)
-				break;
-
-			bufnum = BufferDescriptorGetBuffer(buf);
-			/* Don't use a stale buf_state value after InvalidateVictimBuffer */
-			buf_valid = buf_state & BM_VALID;
-			if (ClaimVictimBuffer(strategy, buf, bufnum, buf_state,
-								  true, io_context))
-			{
-				/*
-				 * Blocks evicted from buffers already in the strategy ring
-				 * are counted as IOOP_REUSE in the corresponding strategy
-				 * context.
-				 *
-				 * We can only count this now that we've successfully claimed
-				 * the buffer and invalidated its previous tenant. Previously
-				 * we may have been forced to release the buffer due to
-				 * concurrent pinners or erroring out.
-				 */
-				if (buf_valid)
-					pgstat_count_io_op(IOOBJECT_RELATION, io_context,
-									   IOOP_REUSE, 1, 0);
-				return bufnum;
-			}
-		}
-	}
 
 	/*
 	 * If asked, we need to waken the bgwriter. Since we don't want to rely on
@@ -271,8 +210,11 @@ StrategyGetBuffer(BufferAccessStrategy strategy, IOContext io_context)
 	trycounter = NBuffers;
 	for (;;)
 	{
+		BufferDesc *buf;
+		Buffer		bufnum = InvalidBuffer;
 		uint64		old_buf_state;
-		uint64		local_buf_state;
+		uint64		buf_state;
+		bool		buf_valid;
 
 		buf = GetBufferDescriptor(ClockSweepTick());
 
@@ -283,7 +225,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy, IOContext io_context)
 		old_buf_state = pg_atomic_read_u64(&buf->state);
 		for (;;)
 		{
-			local_buf_state = old_buf_state;
+			buf_state = old_buf_state;
 
 			/*
 			 * If the buffer is pinned or has a nonzero usage_count, we cannot
@@ -291,7 +233,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy, IOContext io_context)
 			 * scanning.
 			 */
 
-			if (BUF_STATE_GET_REFCOUNT(local_buf_state) != 0)
+			if (BUF_STATE_GET_REFCOUNT(buf_state) != 0)
 			{
 				if (--trycounter == 0)
 				{
@@ -308,18 +250,18 @@ StrategyGetBuffer(BufferAccessStrategy strategy, IOContext io_context)
 			}
 
 			/* See equivalent code in PinBuffer() */
-			if (unlikely(local_buf_state & BM_LOCKED))
+			if (unlikely(buf_state & BM_LOCKED))
 			{
 				old_buf_state = WaitBufHdrUnlocked(buf);
 				continue;
 			}
 
-			if (BUF_STATE_GET_USAGECOUNT(local_buf_state) != 0)
+			if (BUF_STATE_GET_USAGECOUNT(buf_state) != 0)
 			{
-				local_buf_state -= BUF_USAGECOUNT_ONE;
+				buf_state -= BUF_USAGECOUNT_ONE;
 
 				if (pg_atomic_compare_exchange_u64(&buf->state, &old_buf_state,
-												   local_buf_state))
+												   buf_state))
 				{
 					trycounter = NBuffers;
 					break;
@@ -328,16 +270,14 @@ StrategyGetBuffer(BufferAccessStrategy strategy, IOContext io_context)
 			else
 			{
 				/* pin the buffer if the CAS succeeds */
-				local_buf_state += BUF_REFCOUNT_ONE;
+				buf_state += BUF_REFCOUNT_ONE;
 
 				if (pg_atomic_compare_exchange_u64(&buf->state, &old_buf_state,
-												   local_buf_state))
+												   buf_state))
 				{
 					/* Found a usable buffer */
-					buf_state = local_buf_state;
-
-					TrackNewBufferPin(BufferDescriptorGetBuffer(buf));
 					bufnum = BufferDescriptorGetBuffer(buf);
+					TrackNewBufferPin(bufnum);
 
 					/*
 					 * Don't use a stale buf_state value after
@@ -345,12 +285,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy, IOContext io_context)
 					 */
 					buf_valid = buf_state & BM_VALID;
 
-					/*
-					 * If we can't claim the buffer (concurrent pin/dirty), go
-					 * back to the clock sweep to find another one. Don't add
-					 * a buffer we failed to claim to the strategy ring.
-					 */
-					if (ClaimVictimBuffer(strategy, buf, bufnum, buf_state,
+					if (ClaimVictimBuffer(NULL, buf, bufnum, buf_state,
 										  false, io_context))
 					{
 						/*
@@ -371,8 +306,6 @@ StrategyGetBuffer(BufferAccessStrategy strategy, IOContext io_context)
 						if (buf_valid)
 							pgstat_count_io_op(IOOBJECT_RELATION, io_context,
 											   IOOP_EVICT, 1, 0);
-						if (strategy != NULL)
-							AddBufferToRing(strategy, bufnum);
 						return bufnum;
 					}
 
@@ -426,8 +359,8 @@ StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
 /*
  * StrategyNotifyBgWriter -- set or clear allocation notification latch
  *
- * If bgwprocno isn't -1, the next invocation of StrategyGetBuffer will
- * set that latch.  Pass -1 to clear the pending notification before it
+ * If bgwprocno isn't -1, the next invocation of GetBufferFromClocksweep()
+ * will set that latch.  Pass -1 to clear the pending notification before it
  * happens.  This feature is used by the bgwriter process to wake itself up
  * from hibernation, and is not meant for anybody else to use.
  */
@@ -436,8 +369,8 @@ StrategyNotifyBgWriter(int bgwprocno)
 {
 	/*
 	 * We acquire buffer_strategy_lock just to ensure that the store appears
-	 * atomic to StrategyGetBuffer.  The bgwriter should call this rather
-	 * infrequently, so there's no performance penalty from being safe.
+	 * atomic to GetBufferFromClocksweep.  The bgwriter should call this
+	 * rather infrequently, so there's no performance penalty from being safe.
 	 */
 	SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
 	StrategyControl->bgwprocno = bgwprocno;
@@ -680,90 +613,121 @@ FreeAccessStrategy(BufferAccessStrategy strategy)
 }
 
 /*
- * GetBufferFromRing -- returns a buffer from the ring, or NULL if the
- *		ring is empty / not usable.
+ * Returns a clean, invalidated buffer from the ring, or InvalidBuffer if the
+ * ring is empty or contains no usable buffers. In that case, the caller will
+ * select a buffer from the clocksweep and add it to the ring.
  *
  * The buffer is pinned and marked as owned, using TrackNewBufferPin(), before
- * returning.
+ * returning. It is the caller's responsibility to make sure the buffer
+ * ownership can be tracked.
  */
-static BufferDesc *
-GetBufferFromRing(BufferAccessStrategy strategy, uint64 *buf_state)
+Buffer
+GetBufferFromRing(BufferAccessStrategy strategy, IOContext io_context)
 {
-	BufferDesc *buf;
-	Buffer		bufnum;
-	uint64		old_buf_state;
-	uint64		local_buf_state;	/* to avoid repeated (de-)referencing */
-
-
-	/* Advance to next ring slot */
-	if (++strategy->current >= strategy->nbuffers)
-		strategy->current = 0;
-
-	/*
-	 * If the slot hasn't been filled yet, tell the caller to allocate a new
-	 * buffer with the normal allocation strategy.  He will then fill this
-	 * slot by calling AddBufferToRing with the new buffer.
-	 */
-	bufnum = strategy->buffers[strategy->current];
-	if (bufnum == InvalidBuffer)
-		return NULL;
-
-	buf = GetBufferDescriptor(bufnum - 1);
+	Assert(strategy);
 
 	/*
-	 * Check whether the buffer can be used and pin it if so. Do this using a
-	 * CAS loop, to avoid having to lock the buffer header.
+	 * Don't loop around the ring forever if none of the strategy buffers meet
+	 * our criteria.
 	 */
-	old_buf_state = pg_atomic_read_u64(&buf->state);
-	for (;;)
+	for (int trycounter = 0; trycounter < strategy->nbuffers; trycounter++)
 	{
-		local_buf_state = old_buf_state;
+		BufferDesc *buf;
+		Buffer		bufnum;
+		uint64		old_buf_state;
+		uint64		buf_state;
+		bool		buf_valid;
+
+		/* Advance to next ring slot */
+		if (++strategy->current >= strategy->nbuffers)
+			strategy->current = 0;
 
 		/*
-		 * If the buffer is pinned we cannot use it under any circumstances.
-		 *
-		 * If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
-		 * since our own previous usage of the ring element would have left it
-		 * there, but it might've been decremented by clock-sweep since then).
-		 * A higher usage_count indicates someone else has touched the buffer,
-		 * so we shouldn't re-use it.
+		 * If the slot hasn't been filled yet, tell the caller to allocate a
+		 * new buffer with the normal allocation strategy. He will then fill
+		 * this slot by calling AddBufferToRing with the new buffer.
 		 */
-		if (BUF_STATE_GET_REFCOUNT(local_buf_state) != 0
-			|| BUF_STATE_GET_USAGECOUNT(local_buf_state) > 1)
-			break;
+		bufnum = strategy->buffers[strategy->current];
+		if (bufnum == InvalidBuffer)
+			return InvalidBuffer;
 
-		/* See equivalent code in PinBuffer() */
-		if (unlikely(local_buf_state & BM_LOCKED))
+		buf = GetBufferDescriptor(bufnum - 1);
+
+		/*
+		 * Check whether the buffer can be used and pin it if so. Do this
+		 * using a CAS loop, to avoid having to lock the buffer header.
+		 */
+		old_buf_state = pg_atomic_read_u64(&buf->state);
+		for (;;)
 		{
-			old_buf_state = WaitBufHdrUnlocked(buf);
-			continue;
+			buf_state = old_buf_state;
+
+			/*
+			 * If the buffer is pinned we cannot use it under any
+			 * circumstances.
+			 *
+			 * If usage_count is 0 or 1 then the buffer is fair game (we
+			 * expect 1, since our own previous usage of the ring element
+			 * would have left it there, but it might've been decremented by
+			 * clock-sweep since then). A higher usage_count indicates someone
+			 * else has touched the buffer, so we shouldn't re-use it.
+			 */
+			if (BUF_STATE_GET_REFCOUNT(buf_state) != 0
+				|| BUF_STATE_GET_USAGECOUNT(buf_state) > 1)
+				return InvalidBuffer;
+
+			/* See equivalent code in PinBuffer() */
+			if (unlikely(buf_state & BM_LOCKED))
+			{
+				old_buf_state = WaitBufHdrUnlocked(buf);
+				continue;
+			}
+
+			/* pin the buffer if the CAS succeeds */
+			buf_state += BUF_REFCOUNT_ONE;
+
+			/* if we can't change state, keep trying */
+			if (pg_atomic_compare_exchange_u64(&buf->state, &old_buf_state,
+											   buf_state))
+			{
+				/* got a pin */
+				TrackNewBufferPin(BufferDescriptorGetBuffer(buf));
+				break;
+			}
 		}
 
-		/* pin the buffer if the CAS succeeds */
-		local_buf_state += BUF_REFCOUNT_ONE;
+		/* Don't use a stale buf_state value after InvalidateVictimBuffer */
+		buf_valid = buf_state & BM_VALID;
 
-		if (pg_atomic_compare_exchange_u64(&buf->state, &old_buf_state,
-										   local_buf_state))
+		if (ClaimVictimBuffer(strategy, buf, bufnum, buf_state, true, io_context))
 		{
-			*buf_state = local_buf_state;
-
-			TrackNewBufferPin(BufferDescriptorGetBuffer(buf));
-			return buf;
+			/*
+			 * Blocks evicted from buffers already in the strategy ring are
+			 * counted as IOOP_REUSE in the corresponding strategy context.
+			 *
+			 * We can only count this now that we've successfully claimed the
+			 * buffer and invalidated its previous tenant. Previously we may
+			 * have been forced to release the buffer due to concurrent
+			 * pinners or erroring out.
+			 */
+			if (buf_valid)
+				pgstat_count_io_op(IOOBJECT_RELATION, io_context, IOOP_REUSE, 1, 0);
+			return bufnum;
 		}
 	}
 
 	/*
 	 * Tell caller to allocate a new buffer with the normal allocation
-	 * strategy.  He'll then replace this ring element via AddBufferToRing.
+	 * strategy. He'll then replace this ring element via AddBufferToRing.
 	 */
-	return NULL;
+	return InvalidBuffer;
 }
 
 /*
  * Records the buffer in the current ring slot. The caller must have already
  * pinned the buffer and invalidated its previous contents.
  */
-static void
+void
 AddBufferToRing(BufferAccessStrategy strategy, Buffer bufnum)
 {
 	strategy->buffers[strategy->current] = bufnum;
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 8477cec8d34..426393408f0 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -589,13 +589,14 @@ extern void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint64 set_flag
 
 /* freelist.c */
 extern IOContext IOContextForStrategy(BufferAccessStrategy strategy);
-extern Buffer StrategyGetBuffer(BufferAccessStrategy strategy,
+extern Buffer GetBufferFromRing(BufferAccessStrategy strategy,
 								IOContext io_context);
-extern bool StrategyRejectBuffer(BufferAccessStrategy strategy,
-								 BufferDesc *buf, uint64 buf_state);
+extern void AddBufferToRing(BufferAccessStrategy strategy, Buffer bufnum);
+extern Buffer GetBufferFromClocksweep(IOContext io_context);
 
 extern int	StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc);
 extern void StrategyNotifyBgWriter(int bgwprocno);
+extern bool StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, uint64 buf_state);
 
 /* buf_table.c */
 extern uint32 BufTableHashCode(BufferTag *tagPtr);
-- 
2.43.0

