From 9f18bac7869810914e0dfde2fc14060293bcd5b4 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Mon, 30 Jun 2025 18:04:33 -0400
Subject: [PATCH v1 1/2] Eager evict bulkwrite strategy ring

Operations using BAS_BULKWRITE (COPY FROM and createdb) will inevitably
need to flush buffers in the strategy ring buffer in order to reuse
them. By eagerly evicting the buffers in a larger batch, we incur less
interleaving of WAL flushes and data file writes. The effect is mainly
noticeable with multiple parallel COPY FROMs. In this case, client
backends achieve higher write throughput and end up spending less time
waiting on acquiring the lock to flush WAL. Larger flush operations also
mean less time waiting for flush operations at the kernel level as well.

The heuristic for eager eviction is to only evict buffers in the
strategy ring which flushing does not require flushing WAL.
---
 src/backend/storage/buffer/bufmgr.c   | 72 +++++++++++++++++++++++++++
 src/backend/storage/buffer/freelist.c | 53 ++++++++++++++++++++
 src/include/storage/buf_internals.h   |  1 +
 src/include/storage/bufmgr.h          |  2 +
 4 files changed, 128 insertions(+)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 6afdd28dba6..ca7d900e7ec 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -2346,6 +2346,75 @@ InvalidateVictimBuffer(BufferDesc *buf_hdr)
 	return true;
 }

+/*
+ * Pin and lock a shared buffer and then evict it. Don't evict the buffer if
+ * doing so would mean we have to flush WAL. We only evict the buffer if doing
+ * so is "cheap", i.e. we're able to lock the buffer and we don't have to
+ * flush WAL. This is appropriate for occasions in which we don't need to
+ * guarantee that the buffer is flushed.
+ *
+ * Returns true if the buffer was flushed, false otherwise.
+ */
+bool
+QuickCleanBuffer(BufferDesc *bufdesc, IOContext io_context)
+{
+	uint32		buf_state;
+	XLogRecPtr	lsn;
+	LWLock	   *content_lock;
+	Buffer		buffer;
+
+	buffer = BufferDescriptorGetBuffer(bufdesc);
+	buf_state = LockBufHdr(bufdesc);
+
+	Assert(!BufferIsLocal(buffer));
+
+	/*
+	 * No need to evict the buffer if it isn't dirty. We won't flush buffers
+	 * in use by other backends.
+	 */
+	if (!(buf_state & BM_DIRTY) ||
+		BUF_STATE_GET_REFCOUNT(buf_state) > 0 ||
+		BUF_STATE_GET_USAGECOUNT(buf_state) > 1)
+	{
+		UnlockBufHdr(bufdesc, buf_state);
+		return false;
+	}
+
+	ReservePrivateRefCountEntry();
+	ResourceOwnerEnlarge(CurrentResourceOwner);
+
+	/* Releases buffer header lock before acquiring content lock */
+	PinBuffer_Locked(bufdesc);
+	content_lock = BufferDescriptorGetContentLock(bufdesc);
+	if (!LWLockConditionalAcquire(content_lock, LW_SHARED))
+	{
+		UnpinBuffer(bufdesc);
+		return false;
+	}
+
+	CheckBufferIsPinnedOnce(buffer);
+
+	/* Need buffer header lock to get the LSN */
+	buf_state = LockBufHdr(bufdesc);
+	lsn = BufferGetLSN(bufdesc);
+	UnlockBufHdr(bufdesc, buf_state);
+
+	if (XLogNeedsFlush(lsn))
+	{
+		UnlockReleaseBuffer(buffer);
+		return false;
+	}
+
+	FlushBuffer(bufdesc, NULL, IOOBJECT_RELATION, io_context);
+
+	LWLockRelease(content_lock);
+	ScheduleBufferTagForWriteback(&BackendWritebackContext, io_context,
+								  &bufdesc->tag);
+
+	UnpinBuffer(bufdesc);
+	return true;
+}
+
 static Buffer
 GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context)
 {
@@ -2451,6 +2520,9 @@ again:

 		ScheduleBufferTagForWriteback(&BackendWritebackContext, io_context,
 									  &buf_hdr->tag);
+
+		if (strategy)
+			EvictStrategyRing(strategy);
 	}


diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index 01909be0272..ab38c96e2de 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -180,6 +180,31 @@ have_free_buffer(void)
 		return false;
 }

+/*
+ * Some BufferAccessStrategies support eager eviction -- which is evicting
+ * buffers in the ring before they are needed. This can lean to better I/O
+ * patterns than lazily evicting buffers directly before reusing them.
+ */
+bool
+strategy_supports_eager_eviction(BufferAccessStrategy strategy)
+{
+	Assert(strategy);
+
+	switch (strategy->btype)
+	{
+		case BAS_BULKWRITE:
+			return true;
+		case BAS_VACUUM:
+		case BAS_NORMAL:
+		case BAS_BULKREAD:
+			return false;
+		default:
+			elog(ERROR, "unrecognized buffer access strategy: %d",
+				 (int) strategy->btype);
+			return false;
+	}
+}
+
 /*
  * StrategyGetBuffer
  *
@@ -780,6 +805,34 @@ GetBufferFromRing(BufferAccessStrategy strategy, uint32 *buf_state)
 	return NULL;
 }

+/*
+ * Evict all the buffers we can in the strategy ring. This encourages write
+ * batching at the kernel level and leaves a ring full of clean buffers. We'll
+ * skip evicting buffers that would require us to flush WAL.
+ */
+void
+EvictStrategyRing(BufferAccessStrategy strategy)
+{
+	IOContext	io_context;
+
+	if (!strategy_supports_eager_eviction(strategy))
+		return;
+
+	io_context = IOContextForStrategy(strategy);
+
+	for (int i = 0; i < strategy->nbuffers; i++)
+	{
+		BufferDesc *bufdesc;
+		Buffer		bufnum = strategy->buffers[i];
+
+		if (bufnum == InvalidBuffer)
+			continue;
+		bufdesc = GetBufferDescriptor(bufnum - 1);
+		QuickCleanBuffer(bufdesc, io_context);
+	}
+}
+
+
 /*
  * AddBufferToRing -- add a buffer to the buffer ring
  *
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 52a71b138f7..4d3f9552027 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -433,6 +433,7 @@ extern void WritebackContextInit(WritebackContext *context, int *max_pending);
 extern void IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_context);
 extern void ScheduleBufferTagForWriteback(WritebackContext *wb_context,
 										  IOContext io_context, BufferTag *tag);
+extern bool QuickCleanBuffer(BufferDesc *bufdesc, IOContext io_context);

 /* solely to make it easier to write tests */
 extern bool StartBufferIO(BufferDesc *buf, bool forInput, bool nowait);
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 41fdc1e7693..a4d122fa3c5 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -331,8 +331,10 @@ extern BufferAccessStrategy GetAccessStrategyWithSize(BufferAccessStrategyType b
 extern int	GetAccessStrategyBufferCount(BufferAccessStrategy strategy);
 extern int	GetAccessStrategyPinLimit(BufferAccessStrategy strategy);

+extern void EvictStrategyRing(BufferAccessStrategy strategy);
 extern void FreeAccessStrategy(BufferAccessStrategy strategy);

+extern bool strategy_supports_eager_eviction(BufferAccessStrategy strategy);

 /* inline functions */

--
2.43.0

