From d6783896069de828b13c554c4c21ce439a76d2bc Mon Sep 17 00:00:00 2001
From: Jakub Wartak <jakub.wartak@enterprisedb.com>
Date: Wed, 18 Mar 2026 07:24:14 +0100
Subject: [PATCH v10f 2/3] Lower pg_stat_io_histogram private (backend) memory
 in pending_hist_time_buckets by using array with indirect offsets.

---
 src/backend/utils/activity/pgstat.c    |  9 +--
 src/backend/utils/activity/pgstat_io.c | 90 ++++++++++++++++++++++++--
 src/include/pgstat.h                   | 19 ++++--
 src/include/utils/pgstat_internal.h    |  1 +
 4 files changed, 102 insertions(+), 17 deletions(-)

diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c
index 9feb2f1370b..7c597932671 100644
--- a/src/backend/utils/activity/pgstat.c
+++ b/src/backend/utils/activity/pgstat.c
@@ -445,6 +445,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 		.shared_data_off = offsetof(PgStatShared_IO, stats),
 		.shared_data_len = sizeof(((PgStatShared_IO *) 0)->stats),
 
+		.init_backend_cb = pgstat_io_init_backend_cb,
 		.flush_static_cb = pgstat_io_flush_cb,
 		.init_shmem_cb = pgstat_io_init_shmem_cb,
 		.reset_all_cb = pgstat_io_reset_all_cb,
@@ -691,14 +692,6 @@ pgstat_initialize(void)
 	/* Set up a process-exit hook to clean up */
 	before_shmem_exit(pgstat_shutdown_hook, 0);
 
-	/* Allocate I/O latency buckets only if we are going to populate it */
-	if (track_io_timing || track_wal_io_timing)
-		PendingIOStats.pending_hist_time_buckets = MemoryContextAllocZero(TopMemoryContext,
-																		  IOOBJECT_NUM_TYPES * IOCONTEXT_NUM_TYPES * IOOP_NUM_TYPES *
-																		  PGSTAT_IO_HIST_BUCKETS * sizeof(uint64));
-	else
-		PendingIOStats.pending_hist_time_buckets = NULL;
-
 #ifdef USE_ASSERT_CHECKING
 	pgstat_is_initialized = true;
 #endif
diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c
index bbf910ac4bb..1696f278a77 100644
--- a/src/backend/utils/activity/pgstat_io.c
+++ b/src/backend/utils/activity/pgstat_io.c
@@ -16,6 +16,7 @@
 
 #include "postgres.h"
 
+#include "access/xlog.h"
 #include "executor/instrument.h"
 #include "port/pg_bitutils.h"
 #include "storage/bufmgr.h"
@@ -66,6 +67,27 @@ pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io,
 	return true;
 }
 
+int
+pgstat_bktype_count_potentially_used(BackendType bktype)
+{
+	int			cnt = 0;
+
+	for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
+	{
+		for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
+		{
+			for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
+			{
+				/* we do track it */
+				if (pgstat_tracks_io_op(bktype, io_object, io_context, io_op))
+					cnt++;
+			}
+		}
+	}
+
+	return cnt;
+}
+
 void
 pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op,
 				   uint32 cnt, uint64 bytes)
@@ -186,12 +208,16 @@ pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op,
 
 		if (PendingIOStats.pending_hist_time_buckets != NULL)
 		{
+			int			offset;
+
 			/*
 			 * calculate the bucket_index based on latency in nanoseconds
 			 * (uint64)
 			 */
 			bucket_index = get_bucket_index(INSTR_TIME_GET_NANOSEC(io_time));
-			PendingIOStats.pending_hist_time_buckets[io_object][io_context][io_op][bucket_index]++;
+
+			offset = PendingIOStats.pending_hist_time_buckets_offsets[io_object][io_context][io_op];
+			PendingIOStats.pending_hist_time_buckets[offset][bucket_index]++;
 		}
 
 		/* Add the per-backend count */
@@ -264,10 +290,23 @@ pgstat_io_flush_cb(bool nowait)
 				bktype_shstats->times[io_object][io_context][io_op] +=
 					INSTR_TIME_GET_MICROSEC(time);
 
+				/*
+				 * If tracking I/O stats, save I/O histograms from backend
+				 * local's PendingIOStats by using indirect offsets from the
+				 * pending_hist_time_buckets dynamic array (accessed with
+				 * offsets to save memory) into shared memory.
+				 */
 				if (PendingIOStats.pending_hist_time_buckets != NULL)
 					for (int b = 0; b < PGSTAT_IO_HIST_BUCKETS; b++)
-						bktype_shstats->hist_time_buckets[io_object][io_context][io_op][b] +=
-							PendingIOStats.pending_hist_time_buckets[io_object][io_context][io_op][b];
+					{
+						int			pending_off = PendingIOStats.pending_hist_time_buckets_offsets[io_object][io_context][io_op];
+
+						if (pending_off != -1)
+						{
+							bktype_shstats->hist_time_buckets[io_object][io_context][io_op][b] +=
+								PendingIOStats.pending_hist_time_buckets[pending_off][b];
+						}
+					}
 			}
 		}
 	}
@@ -276,8 +315,14 @@ pgstat_io_flush_cb(bool nowait)
 
 	LWLockRelease(bktype_lock);
 
-	/* Avoid overwriting latency buckets array pointer */
+	/*
+	 * Avoid overwriting histogram latency array (with offsets) and pointer to
+	 * dynamically allocated memory
+	 */
 	memset(&PendingIOStats, 0, offsetof(PgStat_PendingIO, pending_hist_time_buckets));
+	if (PendingIOStats.pending_hist_time_buckets != NULL)
+		memset(PendingIOStats.pending_hist_time_buckets, 0,
+			   PendingIOStats.pending_hist_time_buckets_size * sizeof(*PendingIOStats.pending_hist_time_buckets));
 
 	have_iostats = false;
 
@@ -349,6 +394,43 @@ pgstat_get_io_op_name(IOOp io_op)
 	pg_unreachable();
 }
 
+void
+pgstat_io_init_backend_cb(void)
+{
+	/* Allocate I/O latency buckets only if we are going to populate it */
+	if (track_io_timing || track_wal_io_timing)
+	{
+		int			alloc_sz,
+					io_histograms_used = 0;
+
+		PendingIOStats.pending_hist_time_buckets_size = pgstat_bktype_count_potentially_used(MyBackendType);
+		alloc_sz = PendingIOStats.pending_hist_time_buckets_size * sizeof(*PendingIOStats.pending_hist_time_buckets);
+		PendingIOStats.pending_hist_time_buckets = MemoryContextAllocZero(TopMemoryContext, alloc_sz);
+
+		for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
+		{
+			for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
+			{
+				for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
+				{
+					if (pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op))
+					{
+						Assert(io_histograms_used <= PendingIOStats.pending_hist_time_buckets_size);
+
+						PendingIOStats.pending_hist_time_buckets_offsets[io_object][io_context][io_op] =
+							io_histograms_used++;
+					}
+					else
+						PendingIOStats.pending_hist_time_buckets_offsets[io_object][io_context][io_op] = -1;
+				}
+			}
+		}
+	}
+	else
+		PendingIOStats.pending_hist_time_buckets = NULL;
+
+}
+
 void
 pgstat_io_init_shmem_cb(void *stats)
 {
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 34fd93f86dc..984914e69b8 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -352,12 +352,20 @@ typedef struct PgStat_PendingIO
 	instr_time	pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
 
 	/*
-	 * Dynamically allocated array of
-	 * [IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES]
-	 * [IOOP_NUM_TYPES][PGSTAT_IO_HIST_BUCKETS] only with track_io_timings
-	 * true.
+	 * Dynamically allocated array for pg_stat_io_histograms only when
+	 * track_io_timings is true. pending_hist_time_buckets_offsets is just an
+	 * offset within pending_hist_time_buckets to avoid using unnecessary
+	 * memory.
 	 */
-	uint64		(*pending_hist_time_buckets)[IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES][PGSTAT_IO_HIST_BUCKETS];
+	uint64		(*pending_hist_time_buckets)[PGSTAT_IO_HIST_BUCKETS];
+	uint64		pending_hist_time_buckets_offsets[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
+
+	/*
+	 * Cache how much histograms we have allocated to avoid repetably calling
+	 * pgstat_bktype_count_potentially_used(MyBackendType) from
+	 * pgstat_io_flush_cb()
+	 */
+	int			pending_hist_time_buckets_size;
 } PgStat_PendingIO;
 
 extern PgStat_PendingIO PendingIOStats;
@@ -645,6 +653,7 @@ extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void);
 
 extern bool pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io,
 										 BackendType bktype);
+extern int	pgstat_bktype_count_potentially_used(BackendType bktype);
 extern void pgstat_count_io_op(IOObject io_object, IOContext io_context,
 							   IOOp io_op, uint32 cnt, uint64 bytes);
 extern instr_time pgstat_prepare_io_time(bool track_io_guc);
diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index a3ce8b04723..fcaf21db574 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -759,6 +759,7 @@ extern void pgstat_function_reset_timestamp_cb(PgStatShared_Common *header, Time
 extern void pgstat_flush_io(bool nowait);
 
 extern bool pgstat_io_flush_cb(bool nowait);
+extern void pgstat_io_init_backend_cb(void);
 extern void pgstat_io_init_shmem_cb(void *stats);
 extern void pgstat_io_reset_all_cb(TimestampTz ts);
 extern void pgstat_io_snapshot_cb(void);
-- 
2.43.0

