From ef08eb591f0d539ca4603686302a2f454c960662 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Wed, 22 Jan 2025 16:09:51 -0500
Subject: [PATCH v2.6 28/34] bufmgr: Implement AIO write support

As of this commit there are no users of these AIO facilities, that'll come in
later commits.

Author:
Reviewed-By:
Discussion: https://postgr.es/m/
Backpatch:
---
 src/include/storage/aio.h              |  2 +
 src/include/storage/bufmgr.h           |  2 +
 src/backend/storage/aio/aio_callback.c |  2 +
 src/backend/storage/buffer/bufmgr.c    | 88 ++++++++++++++++++++++++++
 4 files changed, 94 insertions(+)

diff --git a/src/include/storage/aio.h b/src/include/storage/aio.h
index cdf54b90b15..b53aa9748c3 100644
--- a/src/include/storage/aio.h
+++ b/src/include/storage/aio.h
@@ -182,8 +182,10 @@ typedef enum PgAioHandleCallbackID
 	PGAIO_HCB_MD_WRITEV,
 
 	PGAIO_HCB_SHARED_BUFFER_READV,
+	PGAIO_HCB_SHARED_BUFFER_WRITEV,
 
 	PGAIO_HCB_LOCAL_BUFFER_READV,
+	PGAIO_HCB_LOCAL_BUFFER_WRITEV,
 } PgAioHandleCallbackID;
 
 
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index db9a4673097..a2bff99fb55 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -176,7 +176,9 @@ extern PGDLLIMPORT int backend_flush_after;
 extern PGDLLIMPORT int bgwriter_flush_after;
 
 extern const PgAioHandleCallbacks aio_shared_buffer_readv_cb;
+extern const PgAioHandleCallbacks aio_shared_buffer_writev_cb;
 extern const PgAioHandleCallbacks aio_local_buffer_readv_cb;
+extern const PgAioHandleCallbacks aio_local_buffer_writev_cb;
 
 /* in buf_init.c */
 extern PGDLLIMPORT char *BufferBlocks;
diff --git a/src/backend/storage/aio/aio_callback.c b/src/backend/storage/aio/aio_callback.c
index fb6ac058a09..7162f722e3c 100644
--- a/src/backend/storage/aio/aio_callback.c
+++ b/src/backend/storage/aio/aio_callback.c
@@ -44,8 +44,10 @@ static const PgAioHandleCallbacksEntry aio_handle_cbs[] = {
 	CALLBACK_ENTRY(PGAIO_HCB_MD_WRITEV, aio_md_writev_cb),
 
 	CALLBACK_ENTRY(PGAIO_HCB_SHARED_BUFFER_READV, aio_shared_buffer_readv_cb),
+	CALLBACK_ENTRY(PGAIO_HCB_SHARED_BUFFER_WRITEV, aio_shared_buffer_writev_cb),
 
 	CALLBACK_ENTRY(PGAIO_HCB_LOCAL_BUFFER_READV, aio_local_buffer_readv_cb),
+	CALLBACK_ENTRY(PGAIO_HCB_LOCAL_BUFFER_WRITEV, aio_local_buffer_writev_cb),
 #undef CALLBACK_ENTRY
 };
 
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 20544b39ef9..1f47edaa7b9 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -6467,6 +6467,42 @@ SharedBufferCompleteRead(int buf_off, Buffer buffer, uint8 flags, bool failed)
 	return result;
 }
 
+static uint64
+BufferCompleteWriteShared(Buffer buffer, bool release_lock, bool failed)
+{
+	BufferDesc *bufHdr;
+	bool		result = false;
+
+	Assert(BufferIsValid(buffer));
+
+	bufHdr = GetBufferDescriptor(buffer - 1);
+
+#ifdef USE_ASSERT_CHECKING
+	{
+		uint32		buf_state = pg_atomic_read_u32(&bufHdr->state);
+
+		Assert(buf_state & BM_VALID);
+		Assert(buf_state & BM_TAG_VALID);
+		Assert(buf_state & BM_IO_IN_PROGRESS);
+		Assert(buf_state & BM_DIRTY);
+	}
+#endif
+
+	TerminateBufferIO(bufHdr, /* clear_dirty = */ true,
+					  failed ? BM_IO_ERROR : 0,
+					   /* forget_owner = */ false,
+					   /* syncio = */ false);
+
+	/*
+	 * The initiator of IO is not managing the lock (i.e. called
+	 * LWLockDisown()), we are.
+	 */
+	if (release_lock)
+		LWLockReleaseDisowned(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
+
+	return result;
+}
+
 /*
  * Helper to prepare IO on shared buffers for execution, shared between reads
  * and writes.
@@ -6555,6 +6591,12 @@ shared_buffer_readv_stage(PgAioHandle *ioh, uint8 cb_data)
  * - result.error_data is the offset of the first page that failed
  *   verification in a larger IO
  */
+static void
+shared_buffer_writev_stage(PgAioHandle *ioh, uint8 cb_data)
+{
+	shared_buffer_stage_common(ioh, true);
+}
+
 static void
 buffer_readv_report(PgAioResult result, const PgAioTargetData *target_data, int elevel)
 {
@@ -6641,6 +6683,33 @@ shared_buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 c
 	return buffer_readv_complete_common(ioh, prior_result, false, cb_data);
 }
 
+static PgAioResult
+shared_buffer_writev_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data)
+{
+	PgAioResult result = prior_result;
+	uint64	   *io_data;
+	uint8		handle_data_len;
+
+	ereport(DEBUG5,
+			errmsg("%s: %d %d", __func__, prior_result.status, prior_result.result),
+			errhidestmt(true), errhidecontext(true));
+
+	io_data = pgaio_io_get_handle_data(ioh, &handle_data_len);
+
+	/* FIXME: handle outright errors */
+
+	for (int io_data_off = 0; io_data_off < handle_data_len; io_data_off++)
+	{
+		Buffer		buf = io_data[io_data_off];
+
+		/* FIXME: handle short writes / failures */
+		/* FIXME: ioh->target_data.shared_buffer.release_lock */
+		BufferCompleteWriteShared(buf, true, false);
+	}
+
+	return result;
+}
+
 /*
  * Helper to stage IO on local buffers for execution, shared between reads
  * and writes.
@@ -6685,6 +6754,16 @@ local_buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb
 	return buffer_readv_complete_common(ioh, prior_result, true, cb_data);
 }
 
+static void
+local_buffer_writev_stage(PgAioHandle *ioh, uint8 cb_data)
+{
+	/*
+	 * Currently this is unreachable as the only write support is for
+	 * checkpointer / bgwriter, which don't deal with local buffers.
+	 */
+	elog(ERROR, "not yet");
+}
+
 
 /* readv callback is is passed READ_BUFFERS_* flags as callback data */
 const PgAioHandleCallbacks aio_shared_buffer_readv_cb = {
@@ -6693,6 +6772,11 @@ const PgAioHandleCallbacks aio_shared_buffer_readv_cb = {
 	.report = buffer_readv_report,
 };
 
+const PgAioHandleCallbacks aio_shared_buffer_writev_cb = {
+	.stage = shared_buffer_writev_stage,
+	.complete_shared = shared_buffer_writev_complete,
+};
+
 /* readv callback is is passed READ_BUFFERS_* flags as callback data */
 const PgAioHandleCallbacks aio_local_buffer_readv_cb = {
 	.stage = local_buffer_readv_stage,
@@ -6706,3 +6790,7 @@ const PgAioHandleCallbacks aio_local_buffer_readv_cb = {
 	.complete_local = local_buffer_readv_complete,
 	.report = buffer_readv_report,
 };
+
+const PgAioHandleCallbacks aio_local_buffer_writev_cb = {
+	.stage = local_buffer_writev_stage,
+};
-- 
2.48.1.76.g4e746b1a31.dirty

