From b9652e6e6031ff88f9b789f99c5c19a38fa61d0c Mon Sep 17 00:00:00 2001
From: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Date: Mon, 5 Jan 2026 09:41:39 +0000
Subject: [PATCH v3 1/3] Add pgstat_report_anytime_stat() for periodic stats
 flushing

Long running transactions can accumulate significant statistics (WAL, IO, ...)
that remain unflushed until the transaction ends. This delays visibility of
resource usage in monitoring views like pg_stat_io and pg_stat_wal.

This commit introduces pgstat_report_anytime_stat(), which flushes
non transactional statistics even inside active transactions. A new timeout
handler fires every second to call this function, ensuring timely stats visibility
without waiting for transaction completion.

Implementation details:

- Add PgStat_FlushMode enum to classify stats kinds:
  * FLUSH_ANYTIME: Stats that can always be flushed (WAL, IO, ...)
  * FLUSH_AT_TXN_BOUNDARY: Stats requiring transaction boundaries

- Modify pgstat_flush_pending_entries() and pgstat_flush_fixed_stats()
  to accept a boolean anytime_only parameter:
  * When false: flushes all stats (existing behavior)
  * When true: flushes only FLUSH_ANYTIME stats and skips FLUSH_AT_TXN_BOUNDARY stats

- This relies on the existing PGSTAT_MIN_INTERVAL to fire every 1 second, calling
pgstat_report_anytime_stat(false)

The force parameter in pgstat_report_anytime_stat() is currently unused (always
called with force=false) but reserved for future use cases requiring immediate
flushing.
---
 src/backend/tcop/postgres.c         |  16 ++++
 src/backend/utils/activity/pgstat.c | 111 +++++++++++++++++++++++-----
 src/backend/utils/init/globals.c    |   1 +
 src/backend/utils/init/postinit.c   |  15 ++++
 src/include/miscadmin.h             |   1 +
 src/include/pgstat.h                |   4 +
 src/include/utils/pgstat_internal.h |  16 ++++
 src/include/utils/timeout.h         |   1 +
 src/tools/pgindent/typedefs.list    |   1 +
 9 files changed, 148 insertions(+), 18 deletions(-)
   8.1% src/backend/tcop/
  68.4% src/backend/utils/activity/
   9.6% src/backend/utils/init/
   9.2% src/include/utils/
   4.1% src/include/

diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index e54bf1e760f..132fae61423 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -3530,6 +3530,22 @@ ProcessInterrupts(void)
 		pgstat_report_stat(true);
 	}
 
+	/*
+	 * Flush stats outside of transaction boundary if the timeout fired.
+	 * Unlike transactional stats, these can be flushed even inside a running
+	 * transaction.
+	 */
+	if (AnytimeStatsUpdateTimeoutPending)
+	{
+		AnytimeStatsUpdateTimeoutPending = false;
+
+		pgstat_report_anytime_stat(false);
+
+		/* Schedule next timeout */
+		enable_timeout_after(ANYTIME_STATS_UPDATE_TIMEOUT,
+							 PGSTAT_MIN_INTERVAL);
+	}
+
 	if (ProcSignalBarrierPending)
 		ProcessProcSignalBarrier();
 
diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c
index 11bb71cad5a..ab4d9088a9a 100644
--- a/src/backend/utils/activity/pgstat.c
+++ b/src/backend/utils/activity/pgstat.c
@@ -122,8 +122,6 @@
  * ----------
  */
 
-/* minimum interval non-forced stats flushes.*/
-#define PGSTAT_MIN_INTERVAL			1000
 /* how long until to block flushing pending stats updates */
 #define PGSTAT_MAX_INTERVAL			60000
 /* when to call pgstat_report_stat() again, even when idle */
@@ -187,7 +185,8 @@ static void pgstat_init_snapshot_fixed(void);
 
 static void pgstat_reset_after_failure(void);
 
-static bool pgstat_flush_pending_entries(bool nowait);
+static bool pgstat_flush_pending_entries(bool nowait, bool anytime_only);
+static bool pgstat_flush_fixed_stats(bool nowait, bool anytime_only);
 
 static void pgstat_prep_snapshot(void);
 static void pgstat_build_snapshot(void);
@@ -288,6 +287,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = false,
 		.write_to_file = true,
+		.flush_mode = FLUSH_AT_TXN_BOUNDARY,
 		/* so pg_stat_database entries can be seen in all databases */
 		.accessed_across_databases = true,
 
@@ -305,6 +305,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = false,
 		.write_to_file = true,
+		.flush_mode = FLUSH_AT_TXN_BOUNDARY,
 
 		.shared_size = sizeof(PgStatShared_Relation),
 		.shared_data_off = offsetof(PgStatShared_Relation, stats),
@@ -321,6 +322,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = false,
 		.write_to_file = true,
+		.flush_mode = FLUSH_AT_TXN_BOUNDARY,
 
 		.shared_size = sizeof(PgStatShared_Function),
 		.shared_data_off = offsetof(PgStatShared_Function, stats),
@@ -336,6 +338,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = false,
 		.write_to_file = true,
+		.flush_mode = FLUSH_AT_TXN_BOUNDARY,
 
 		.accessed_across_databases = true,
 
@@ -353,6 +356,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = false,
 		.write_to_file = true,
+		.flush_mode = FLUSH_AT_TXN_BOUNDARY,
 		/* so pg_stat_subscription_stats entries can be seen in all databases */
 		.accessed_across_databases = true,
 
@@ -370,6 +374,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = false,
 		.write_to_file = false,
+		.flush_mode = FLUSH_ANYTIME,
 
 		.accessed_across_databases = true,
 
@@ -388,6 +393,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = true,
 		.write_to_file = true,
+		.flush_mode = FLUSH_ANYTIME,
 
 		.snapshot_ctl_off = offsetof(PgStat_Snapshot, archiver),
 		.shared_ctl_off = offsetof(PgStat_ShmemControl, archiver),
@@ -404,6 +410,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = true,
 		.write_to_file = true,
+		.flush_mode = FLUSH_ANYTIME,
 
 		.snapshot_ctl_off = offsetof(PgStat_Snapshot, bgwriter),
 		.shared_ctl_off = offsetof(PgStat_ShmemControl, bgwriter),
@@ -420,6 +427,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = true,
 		.write_to_file = true,
+		.flush_mode = FLUSH_ANYTIME,
 
 		.snapshot_ctl_off = offsetof(PgStat_Snapshot, checkpointer),
 		.shared_ctl_off = offsetof(PgStat_ShmemControl, checkpointer),
@@ -436,6 +444,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = true,
 		.write_to_file = true,
+		.flush_mode = FLUSH_ANYTIME,
 
 		.snapshot_ctl_off = offsetof(PgStat_Snapshot, io),
 		.shared_ctl_off = offsetof(PgStat_ShmemControl, io),
@@ -453,6 +462,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = true,
 		.write_to_file = true,
+		.flush_mode = FLUSH_ANYTIME,
 
 		.snapshot_ctl_off = offsetof(PgStat_Snapshot, slru),
 		.shared_ctl_off = offsetof(PgStat_ShmemControl, slru),
@@ -470,6 +480,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = true,
 		.write_to_file = true,
+		.flush_mode = FLUSH_ANYTIME,
 
 		.snapshot_ctl_off = offsetof(PgStat_Snapshot, wal),
 		.shared_ctl_off = offsetof(PgStat_ShmemControl, wal),
@@ -775,23 +786,11 @@ pgstat_report_stat(bool force)
 	partial_flush = false;
 
 	/* flush of variable-numbered stats tracked in pending entries list */
-	partial_flush |= pgstat_flush_pending_entries(nowait);
+	partial_flush |= pgstat_flush_pending_entries(nowait, false);
 
 	/* flush of other stats kinds */
 	if (pgstat_report_fixed)
-	{
-		for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
-		{
-			const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
-
-			if (!kind_info)
-				continue;
-			if (!kind_info->flush_static_cb)
-				continue;
-
-			partial_flush |= kind_info->flush_static_cb(nowait);
-		}
-	}
+		partial_flush |= pgstat_flush_fixed_stats(nowait, false);
 
 	last_flush = now;
 
@@ -1345,9 +1344,14 @@ pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
 
 /*
  * Flush out pending variable-numbered stats.
+ *
+ * If anytime_only is true, only flushes FLUSH_ANYTIME entries.
+ * This is safe to call inside transactions.
+ *
+ * If anytime_only is false, flushes all entries.
  */
 static bool
-pgstat_flush_pending_entries(bool nowait)
+pgstat_flush_pending_entries(bool nowait, bool anytime_only)
 {
 	bool		have_pending = false;
 	dlist_node *cur = NULL;
@@ -1377,6 +1381,20 @@ pgstat_flush_pending_entries(bool nowait)
 		Assert(!kind_info->fixed_amount);
 		Assert(kind_info->flush_pending_cb != NULL);
 
+		/* Skip transactional stats if we're in anytime_only mode */
+		if (anytime_only && kind_info->flush_mode == FLUSH_AT_TXN_BOUNDARY)
+		{
+			have_pending = true;
+
+			if (dlist_has_next(&pgStatPending, cur))
+				next = dlist_next_node(&pgStatPending, cur);
+			else
+				next = NULL;
+
+			cur = next;
+			continue;
+		}
+
 		/* flush the stats, if possible */
 		did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
 
@@ -1402,6 +1420,33 @@ pgstat_flush_pending_entries(bool nowait)
 	return have_pending;
 }
 
+/*
+ * Flush fixed-amount stats.
+ *
+ * If anytime_only is true, only flushes FLUSH_ANYTIME stats (safe inside transactions).
+ * If anytime_only is false, flushes all stats with flush_static_cb.
+ */
+static bool
+pgstat_flush_fixed_stats(bool nowait, bool anytime_only)
+{
+	bool		partial_flush = false;
+
+	for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
+	{
+		const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
+
+		if (!kind_info || !kind_info->flush_static_cb)
+			continue;
+
+		/* Skip transactional stats if we're in anytime_only mode */
+		if (anytime_only && kind_info->flush_mode == FLUSH_AT_TXN_BOUNDARY)
+			continue;
+
+		partial_flush |= kind_info->flush_static_cb(nowait);
+	}
+
+	return partial_flush;
+}
 
 /* ------------------------------------------------------------
  * Helper / infrastructure functions
@@ -2119,3 +2164,33 @@ assign_stats_fetch_consistency(int newval, void *extra)
 	if (pgstat_fetch_consistency != newval)
 		force_stats_snapshot_clear = true;
 }
+
+/*
+ * Flushes only FLUSH_ANYTIME stats using non-blocking locks. Transactional
+ * stats (FLUSH_AT_TXN_BOUNDARY) remain pending until transaction boundary.
+ * Safe to call inside transactions.
+ */
+void
+pgstat_report_anytime_stat(bool force)
+{
+	bool		nowait = !force;
+
+	pgstat_assert_is_up();
+
+	/*
+	 * Exit if no pending stats at all. This avoids unnecessary work when
+	 * backends are idle or in sessions without stats accumulation.
+	 *
+	 * Note: This check isn't precise as there might be only transactional
+	 * stats pending, which we'll skip during the flush. However, maintaining
+	 * precise tracking would add complexity that does not seem worth it from
+	 * a performance point of view (no noticeable performance regression has
+	 * been observed with the current implementation).
+	 */
+	if (dlist_is_empty(&pgStatPending) && !pgstat_report_fixed)
+		return;
+
+	/* Flush stats outside of transaction boundary */
+	pgstat_flush_pending_entries(nowait, true);
+	pgstat_flush_fixed_stats(nowait, true);
+}
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index 36ad708b360..ad44826c39e 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -40,6 +40,7 @@ volatile sig_atomic_t IdleSessionTimeoutPending = false;
 volatile sig_atomic_t ProcSignalBarrierPending = false;
 volatile sig_atomic_t LogMemoryContextPending = false;
 volatile sig_atomic_t IdleStatsUpdateTimeoutPending = false;
+volatile sig_atomic_t AnytimeStatsUpdateTimeoutPending = false;
 volatile uint32 InterruptHoldoffCount = 0;
 volatile uint32 QueryCancelHoldoffCount = 0;
 volatile uint32 CritSectionCount = 0;
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 3f401faf3de..6076f531c4a 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -82,6 +82,7 @@ static void TransactionTimeoutHandler(void);
 static void IdleSessionTimeoutHandler(void);
 static void IdleStatsUpdateTimeoutHandler(void);
 static void ClientCheckTimeoutHandler(void);
+static void AnytimeStatsUpdateTimeoutHandler(void);
 static bool ThereIsAtLeastOneRole(void);
 static void process_startup_options(Port *port, bool am_superuser);
 static void process_settings(Oid databaseid, Oid roleid);
@@ -765,6 +766,9 @@ InitPostgres(const char *in_dbname, Oid dboid,
 		RegisterTimeout(CLIENT_CONNECTION_CHECK_TIMEOUT, ClientCheckTimeoutHandler);
 		RegisterTimeout(IDLE_STATS_UPDATE_TIMEOUT,
 						IdleStatsUpdateTimeoutHandler);
+		RegisterTimeout(ANYTIME_STATS_UPDATE_TIMEOUT,
+						AnytimeStatsUpdateTimeoutHandler);
+		enable_timeout_after(ANYTIME_STATS_UPDATE_TIMEOUT, PGSTAT_MIN_INTERVAL);
 	}
 
 	/*
@@ -1446,3 +1450,14 @@ ThereIsAtLeastOneRole(void)
 
 	return result;
 }
+
+/*
+ * Timeout handler for flushing non-transactional stats.
+ */
+static void
+AnytimeStatsUpdateTimeoutHandler(void)
+{
+	AnytimeStatsUpdateTimeoutPending = true;
+	InterruptPending = true;
+	SetLatch(MyLatch);
+}
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index db559b39c4d..8aeb9628871 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -96,6 +96,7 @@ extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending;
 extern PGDLLIMPORT volatile sig_atomic_t ProcSignalBarrierPending;
 extern PGDLLIMPORT volatile sig_atomic_t LogMemoryContextPending;
 extern PGDLLIMPORT volatile sig_atomic_t IdleStatsUpdateTimeoutPending;
+extern PGDLLIMPORT volatile sig_atomic_t AnytimeStatsUpdateTimeoutPending;
 
 extern PGDLLIMPORT volatile sig_atomic_t CheckClientConnectionPending;
 extern PGDLLIMPORT volatile sig_atomic_t ClientConnectionLost;
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index fff7ecc2533..1651f16f966 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -35,6 +35,9 @@
 /* Default directory to store temporary statistics data in */
 #define PG_STAT_TMP_DIR		"pg_stat_tmp"
 
+/* Minimum interval non-forced stats flushes */
+#define PGSTAT_MIN_INTERVAL	1000
+
 /* Values for track_functions GUC variable --- order is significant! */
 typedef enum TrackFunctionsLevel
 {
@@ -533,6 +536,7 @@ extern void pgstat_initialize(void);
 
 /* Functions called from backends */
 extern long pgstat_report_stat(bool force);
+extern void pgstat_report_anytime_stat(bool force);
 extern void pgstat_force_next_flush(void);
 
 extern void pgstat_reset_counters(void);
diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index 9b8fbae00ed..46ce90c9624 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -224,6 +224,19 @@ typedef struct PgStat_SubXactStatus
 	PgStat_TableXactStatus *first;	/* head of list for this subxact */
 } PgStat_SubXactStatus;
 
+/*
+ * Flush mode for statistics kinds.
+ *
+ * FLUSH_AT_TXN_BOUNDARY has to be the first because we want it to be the
+ * default value.
+ */
+typedef enum PgStat_FlushMode
+{
+	FLUSH_AT_TXN_BOUNDARY,		/* All fields can only be flushed at
+								 * transaction boundary */
+	FLUSH_ANYTIME,				/* All fields can be flushed anytime,
+								 * including within transactions */
+} PgStat_FlushMode;
 
 /*
  * Metadata for a specific kind of statistics.
@@ -251,6 +264,9 @@ typedef struct PgStat_KindInfo
 	 */
 	bool		track_entry_count:1;
 
+	/* Flush mode */
+	PgStat_FlushMode flush_mode;
+
 	/*
 	 * The size of an entry in the shared stats hash table (pointed to by
 	 * PgStatShared_HashEntry->body).  For fixed-numbered statistics, this is
diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h
index 0965b590b34..10723bb664c 100644
--- a/src/include/utils/timeout.h
+++ b/src/include/utils/timeout.h
@@ -35,6 +35,7 @@ typedef enum TimeoutId
 	IDLE_SESSION_TIMEOUT,
 	IDLE_STATS_UPDATE_TIMEOUT,
 	CLIENT_CONNECTION_CHECK_TIMEOUT,
+	ANYTIME_STATS_UPDATE_TIMEOUT,
 	STARTUP_PROGRESS_TIMEOUT,
 	/* First user-definable timeout reason */
 	USER_TIMEOUT,
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 3f3a888fd0e..d3912b43fdc 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2268,6 +2268,7 @@ PgStat_Counter
 PgStat_EntryRef
 PgStat_EntryRefHashEntry
 PgStat_FetchConsistency
+PgStat_FlushMode
 PgStat_FunctionCallUsage
 PgStat_FunctionCounts
 PgStat_HashKey
-- 
2.34.1

