From 99354adda53d07d28940810b429648a855eeaf12 Mon Sep 17 00:00:00 2001
From: Soumya <soumyamurali.work@gmail.com>
Date: Fri, 23 Jan 2026 12:24:11 +0530
Subject: [PATCH] Batch buffer writebacks during checkpoints

Signed-off-by: Soumya <soumyamurali.work@gmail.com>
---
 src/backend/storage/buffer/bufmgr.c | 77 ++++++++++++++++++++++++++---
 1 file changed, 71 insertions(+), 6 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 6f935648ae9..c92d638f804 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -3467,6 +3467,15 @@ BufferSync(int flags)
 	int			i;
 	uint64		mask = BM_DIRTY;
 	WritebackContext wb_context;
+	uint64  ckpt_write_count = 0;
+	uint64  ckpt_issue_writeback_calls = 0;
+
+	/* --- checkpoint write batching --- */
+	#define CHECKPOINT_WRITE_BATCH 16
+
+	int     batch_bufs[CHECKPOINT_WRITE_BATCH];
+	int     batch_count = 0;
+	/* --- checkpoint write batching --- */
 
 	/*
 	 * Unless this is a shutdown checkpoint or we have been explicitly told,
@@ -3666,12 +3675,37 @@ BufferSync(int flags)
 		 */
 		if (pg_atomic_read_u64(&bufHdr->state) & BM_CHECKPOINT_NEEDED)
 		{
-			if (SyncOneBuffer(buf_id, false, &wb_context) & BUF_WRITTEN)
-			{
-				TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(buf_id);
-				PendingCheckpointerStats.buffers_written++;
-				num_written++;
-			}
+    		/*
+    		 * Collect buffer into a small local batch.
+    		 * Phase 1: batch size is 1, so this is behavior-neutral.
+    		 */
+    		batch_bufs[batch_count++] = buf_id;
+
+    		/*
+    		 * Flush batch if full.
+    		 */
+    		if (batch_count == CHECKPOINT_WRITE_BATCH)
+    		{
+        		int j;
+
+        		for (j = 0; j < batch_count; j++)
+        		{
+            		if (SyncOneBuffer(batch_bufs[j], false, &wb_context) & BUF_WRITTEN)
+            		{
+                		TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(batch_bufs[j]);
+                		PendingCheckpointerStats.buffers_written++;
+                		num_written++;
+                		ckpt_write_count++;
+            		}
+        		}
+				/*
+				 * Issue writeback for this batch to amortize syscall cost.
+				 * This does NOT change durability semantics.
+				 */
+				ckpt_issue_writeback_calls++;
+				IssuePendingWritebacks(&wb_context, IOCONTEXT_NORMAL);
+				batch_count = 0;
+    		}
 		}
 
 		/*
@@ -3701,6 +3735,31 @@ BufferSync(int flags)
 		CheckpointWriteDelay(flags, (double) num_processed / num_to_scan);
 	}
 
+	/*
+	 * Flush any remaining buffers in the batch.
+	 */
+	if (batch_count > 0)
+	{
+    	int j;
+
+    	for (j = 0; j < batch_count; j++)
+    	{
+        	if (SyncOneBuffer(batch_bufs[j], false, &wb_context) & BUF_WRITTEN)
+        	{
+            	TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(batch_bufs[j]);
+            	PendingCheckpointerStats.buffers_written++;
+            	num_written++;
+            	ckpt_write_count++;
+        	}
+    	}
+		ckpt_issue_writeback_calls++;
+		IssuePendingWritebacks(&wb_context, IOCONTEXT_NORMAL);
+		batch_count = 0;
+	}
+
+	/* --- checkpoint instrumentation --- */
+    ckpt_issue_writeback_calls++;
+	
 	/*
 	 * Issue all pending flushes. Only checkpointer calls BufferSync(), so
 	 * IOContext will always be IOCONTEXT_NORMAL.
@@ -3717,6 +3776,12 @@ BufferSync(int flags)
 	 */
 	CheckpointStats.ckpt_bufs_written += num_written;
 
+    ereport(DEBUG1,
+            (errmsg("checkpoint BufferSync stats: buffers_written=%lu, "
+                    "writeback_calls=%lu",
+                    ckpt_write_count,
+                    ckpt_issue_writeback_calls)));
+
 	TRACE_POSTGRESQL_BUFFER_SYNC_DONE(NBuffers, num_written, num_to_scan);
 }
 
-- 
2.34.1

