From 07ac566644abd0ee01ab76cb96b0b176f3d25c5a Mon Sep 17 00:00:00 2001
From: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Date: Mon, 5 Jan 2026 09:41:39 +0000
Subject: [PATCH v4 1/4] Add pgstat_report_anytime_stat() for periodic stats
 flushing

Long running transactions can accumulate significant statistics (WAL, IO, ...)
that remain unflushed until the transaction ends. This delays visibility of
resource usage in monitoring views like pg_stat_io and pg_stat_wal.

This commit introduces pgstat_report_anytime_stat(), which flushes
non transactional statistics even inside active transactions. A new timeout
handler fires every second to call this function, ensuring timely stats visibility
without waiting for transaction completion.

Implementation details:

- Add PgStat_FlushMode enum to classify stats kinds:
  * FLUSH_ANYTIME: Stats that can always be flushed (WAL, IO, ...)
  * FLUSH_AT_TXN_BOUNDARY: Stats requiring transaction boundaries

- Modify pgstat_flush_pending_entries() and pgstat_flush_fixed_stats()
  to accept a boolean anytime_only parameter:
  * When false: flushes all stats (existing behavior)
  * When true: flushes only FLUSH_ANYTIME stats and skips FLUSH_AT_TXN_BOUNDARY stats

- This relies on the existing PGSTAT_MIN_INTERVAL to fire every 1 second, calling
pgstat_report_anytime_stat(false)

The force parameter in pgstat_report_anytime_stat() is currently unused (always
called with force=false) but reserved for future use cases requiring immediate
flushing.
---
 src/backend/storage/lmgr/proc.c     |  10 +++
 src/backend/tcop/postgres.c         |  16 ++++
 src/backend/utils/activity/pgstat.c | 111 +++++++++++++++++++++++-----
 src/backend/utils/init/globals.c    |   1 +
 src/backend/utils/init/postinit.c   |  15 ++++
 src/include/miscadmin.h             |   1 +
 src/include/pgstat.h                |   4 +
 src/include/utils/pgstat_internal.h |  20 +++++
 src/include/utils/timeout.h         |   1 +
 src/tools/pgindent/typedefs.list    |   1 +
 10 files changed, 162 insertions(+), 18 deletions(-)
   7.0% src/backend/storage/lmgr/
   7.3% src/backend/tcop/
  61.1% src/backend/utils/activity/
   8.6% src/backend/utils/init/
  11.8% src/include/utils/
   3.6% src/include/

diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 063826ae576..012705a2ee6 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -1322,6 +1322,7 @@ ProcSleep(LOCALLOCK *locallock)
 	bool		allow_autovacuum_cancel = true;
 	bool		logged_recovery_conflict = false;
 	ProcWaitStatus myWaitStatus;
+	bool		anytime_timeout_was_active = false;
 
 	/* The caller must've armed the on-error cleanup mechanism */
 	Assert(GetAwaitedLock() == locallock);
@@ -1398,6 +1399,12 @@ ProcSleep(LOCALLOCK *locallock)
 		standbyWaitStart = GetCurrentTimestamp();
 	}
 
+	anytime_timeout_was_active = get_timeout_active(ANYTIME_STATS_UPDATE_TIMEOUT);
+
+	/* No need to try to flush the statistics while the process is sleeping */
+	if (anytime_timeout_was_active)
+		disable_timeout(ANYTIME_STATS_UPDATE_TIMEOUT, false);
+
 	/*
 	 * If somebody wakes us between LWLockRelease and WaitLatch, the latch
 	 * will not wait. But a set latch does not necessarily mean that the lock
@@ -1661,6 +1668,9 @@ ProcSleep(LOCALLOCK *locallock)
 		}
 	} while (myWaitStatus == PROC_WAIT_STATUS_WAITING);
 
+	if (anytime_timeout_was_active)
+		enable_timeout_after(ANYTIME_STATS_UPDATE_TIMEOUT, PGSTAT_MIN_INTERVAL);
+
 	/*
 	 * Disable the timers, if they are still running.  As in LockErrorCleanup,
 	 * we must preserve the LOCK_TIMEOUT indicator flag: if a lock timeout has
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index e54bf1e760f..132fae61423 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -3530,6 +3530,22 @@ ProcessInterrupts(void)
 		pgstat_report_stat(true);
 	}
 
+	/*
+	 * Flush stats outside of transaction boundary if the timeout fired.
+	 * Unlike transactional stats, these can be flushed even inside a running
+	 * transaction.
+	 */
+	if (AnytimeStatsUpdateTimeoutPending)
+	{
+		AnytimeStatsUpdateTimeoutPending = false;
+
+		pgstat_report_anytime_stat(false);
+
+		/* Schedule next timeout */
+		enable_timeout_after(ANYTIME_STATS_UPDATE_TIMEOUT,
+							 PGSTAT_MIN_INTERVAL);
+	}
+
 	if (ProcSignalBarrierPending)
 		ProcessProcSignalBarrier();
 
diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c
index 11bb71cad5a..ab4d9088a9a 100644
--- a/src/backend/utils/activity/pgstat.c
+++ b/src/backend/utils/activity/pgstat.c
@@ -122,8 +122,6 @@
  * ----------
  */
 
-/* minimum interval non-forced stats flushes.*/
-#define PGSTAT_MIN_INTERVAL			1000
 /* how long until to block flushing pending stats updates */
 #define PGSTAT_MAX_INTERVAL			60000
 /* when to call pgstat_report_stat() again, even when idle */
@@ -187,7 +185,8 @@ static void pgstat_init_snapshot_fixed(void);
 
 static void pgstat_reset_after_failure(void);
 
-static bool pgstat_flush_pending_entries(bool nowait);
+static bool pgstat_flush_pending_entries(bool nowait, bool anytime_only);
+static bool pgstat_flush_fixed_stats(bool nowait, bool anytime_only);
 
 static void pgstat_prep_snapshot(void);
 static void pgstat_build_snapshot(void);
@@ -288,6 +287,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = false,
 		.write_to_file = true,
+		.flush_mode = FLUSH_AT_TXN_BOUNDARY,
 		/* so pg_stat_database entries can be seen in all databases */
 		.accessed_across_databases = true,
 
@@ -305,6 +305,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = false,
 		.write_to_file = true,
+		.flush_mode = FLUSH_AT_TXN_BOUNDARY,
 
 		.shared_size = sizeof(PgStatShared_Relation),
 		.shared_data_off = offsetof(PgStatShared_Relation, stats),
@@ -321,6 +322,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = false,
 		.write_to_file = true,
+		.flush_mode = FLUSH_AT_TXN_BOUNDARY,
 
 		.shared_size = sizeof(PgStatShared_Function),
 		.shared_data_off = offsetof(PgStatShared_Function, stats),
@@ -336,6 +338,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = false,
 		.write_to_file = true,
+		.flush_mode = FLUSH_AT_TXN_BOUNDARY,
 
 		.accessed_across_databases = true,
 
@@ -353,6 +356,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = false,
 		.write_to_file = true,
+		.flush_mode = FLUSH_AT_TXN_BOUNDARY,
 		/* so pg_stat_subscription_stats entries can be seen in all databases */
 		.accessed_across_databases = true,
 
@@ -370,6 +374,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = false,
 		.write_to_file = false,
+		.flush_mode = FLUSH_ANYTIME,
 
 		.accessed_across_databases = true,
 
@@ -388,6 +393,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = true,
 		.write_to_file = true,
+		.flush_mode = FLUSH_ANYTIME,
 
 		.snapshot_ctl_off = offsetof(PgStat_Snapshot, archiver),
 		.shared_ctl_off = offsetof(PgStat_ShmemControl, archiver),
@@ -404,6 +410,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = true,
 		.write_to_file = true,
+		.flush_mode = FLUSH_ANYTIME,
 
 		.snapshot_ctl_off = offsetof(PgStat_Snapshot, bgwriter),
 		.shared_ctl_off = offsetof(PgStat_ShmemControl, bgwriter),
@@ -420,6 +427,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = true,
 		.write_to_file = true,
+		.flush_mode = FLUSH_ANYTIME,
 
 		.snapshot_ctl_off = offsetof(PgStat_Snapshot, checkpointer),
 		.shared_ctl_off = offsetof(PgStat_ShmemControl, checkpointer),
@@ -436,6 +444,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = true,
 		.write_to_file = true,
+		.flush_mode = FLUSH_ANYTIME,
 
 		.snapshot_ctl_off = offsetof(PgStat_Snapshot, io),
 		.shared_ctl_off = offsetof(PgStat_ShmemControl, io),
@@ -453,6 +462,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = true,
 		.write_to_file = true,
+		.flush_mode = FLUSH_ANYTIME,
 
 		.snapshot_ctl_off = offsetof(PgStat_Snapshot, slru),
 		.shared_ctl_off = offsetof(PgStat_ShmemControl, slru),
@@ -470,6 +480,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = true,
 		.write_to_file = true,
+		.flush_mode = FLUSH_ANYTIME,
 
 		.snapshot_ctl_off = offsetof(PgStat_Snapshot, wal),
 		.shared_ctl_off = offsetof(PgStat_ShmemControl, wal),
@@ -775,23 +786,11 @@ pgstat_report_stat(bool force)
 	partial_flush = false;
 
 	/* flush of variable-numbered stats tracked in pending entries list */
-	partial_flush |= pgstat_flush_pending_entries(nowait);
+	partial_flush |= pgstat_flush_pending_entries(nowait, false);
 
 	/* flush of other stats kinds */
 	if (pgstat_report_fixed)
-	{
-		for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
-		{
-			const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
-
-			if (!kind_info)
-				continue;
-			if (!kind_info->flush_static_cb)
-				continue;
-
-			partial_flush |= kind_info->flush_static_cb(nowait);
-		}
-	}
+		partial_flush |= pgstat_flush_fixed_stats(nowait, false);
 
 	last_flush = now;
 
@@ -1345,9 +1344,14 @@ pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
 
 /*
  * Flush out pending variable-numbered stats.
+ *
+ * If anytime_only is true, only flushes FLUSH_ANYTIME entries.
+ * This is safe to call inside transactions.
+ *
+ * If anytime_only is false, flushes all entries.
  */
 static bool
-pgstat_flush_pending_entries(bool nowait)
+pgstat_flush_pending_entries(bool nowait, bool anytime_only)
 {
 	bool		have_pending = false;
 	dlist_node *cur = NULL;
@@ -1377,6 +1381,20 @@ pgstat_flush_pending_entries(bool nowait)
 		Assert(!kind_info->fixed_amount);
 		Assert(kind_info->flush_pending_cb != NULL);
 
+		/* Skip transactional stats if we're in anytime_only mode */
+		if (anytime_only && kind_info->flush_mode == FLUSH_AT_TXN_BOUNDARY)
+		{
+			have_pending = true;
+
+			if (dlist_has_next(&pgStatPending, cur))
+				next = dlist_next_node(&pgStatPending, cur);
+			else
+				next = NULL;
+
+			cur = next;
+			continue;
+		}
+
 		/* flush the stats, if possible */
 		did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
 
@@ -1402,6 +1420,33 @@ pgstat_flush_pending_entries(bool nowait)
 	return have_pending;
 }
 
+/*
+ * Flush fixed-amount stats.
+ *
+ * If anytime_only is true, only flushes FLUSH_ANYTIME stats (safe inside transactions).
+ * If anytime_only is false, flushes all stats with flush_static_cb.
+ */
+static bool
+pgstat_flush_fixed_stats(bool nowait, bool anytime_only)
+{
+	bool		partial_flush = false;
+
+	for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
+	{
+		const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
+
+		if (!kind_info || !kind_info->flush_static_cb)
+			continue;
+
+		/* Skip transactional stats if we're in anytime_only mode */
+		if (anytime_only && kind_info->flush_mode == FLUSH_AT_TXN_BOUNDARY)
+			continue;
+
+		partial_flush |= kind_info->flush_static_cb(nowait);
+	}
+
+	return partial_flush;
+}
 
 /* ------------------------------------------------------------
  * Helper / infrastructure functions
@@ -2119,3 +2164,33 @@ assign_stats_fetch_consistency(int newval, void *extra)
 	if (pgstat_fetch_consistency != newval)
 		force_stats_snapshot_clear = true;
 }
+
+/*
+ * Flushes only FLUSH_ANYTIME stats using non-blocking locks. Transactional
+ * stats (FLUSH_AT_TXN_BOUNDARY) remain pending until transaction boundary.
+ * Safe to call inside transactions.
+ */
+void
+pgstat_report_anytime_stat(bool force)
+{
+	bool		nowait = !force;
+
+	pgstat_assert_is_up();
+
+	/*
+	 * Exit if no pending stats at all. This avoids unnecessary work when
+	 * backends are idle or in sessions without stats accumulation.
+	 *
+	 * Note: This check isn't precise as there might be only transactional
+	 * stats pending, which we'll skip during the flush. However, maintaining
+	 * precise tracking would add complexity that does not seem worth it from
+	 * a performance point of view (no noticeable performance regression has
+	 * been observed with the current implementation).
+	 */
+	if (dlist_is_empty(&pgStatPending) && !pgstat_report_fixed)
+		return;
+
+	/* Flush stats outside of transaction boundary */
+	pgstat_flush_pending_entries(nowait, true);
+	pgstat_flush_fixed_stats(nowait, true);
+}
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index 36ad708b360..ad44826c39e 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -40,6 +40,7 @@ volatile sig_atomic_t IdleSessionTimeoutPending = false;
 volatile sig_atomic_t ProcSignalBarrierPending = false;
 volatile sig_atomic_t LogMemoryContextPending = false;
 volatile sig_atomic_t IdleStatsUpdateTimeoutPending = false;
+volatile sig_atomic_t AnytimeStatsUpdateTimeoutPending = false;
 volatile uint32 InterruptHoldoffCount = 0;
 volatile uint32 QueryCancelHoldoffCount = 0;
 volatile uint32 CritSectionCount = 0;
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 3f401faf3de..6076f531c4a 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -82,6 +82,7 @@ static void TransactionTimeoutHandler(void);
 static void IdleSessionTimeoutHandler(void);
 static void IdleStatsUpdateTimeoutHandler(void);
 static void ClientCheckTimeoutHandler(void);
+static void AnytimeStatsUpdateTimeoutHandler(void);
 static bool ThereIsAtLeastOneRole(void);
 static void process_startup_options(Port *port, bool am_superuser);
 static void process_settings(Oid databaseid, Oid roleid);
@@ -765,6 +766,9 @@ InitPostgres(const char *in_dbname, Oid dboid,
 		RegisterTimeout(CLIENT_CONNECTION_CHECK_TIMEOUT, ClientCheckTimeoutHandler);
 		RegisterTimeout(IDLE_STATS_UPDATE_TIMEOUT,
 						IdleStatsUpdateTimeoutHandler);
+		RegisterTimeout(ANYTIME_STATS_UPDATE_TIMEOUT,
+						AnytimeStatsUpdateTimeoutHandler);
+		enable_timeout_after(ANYTIME_STATS_UPDATE_TIMEOUT, PGSTAT_MIN_INTERVAL);
 	}
 
 	/*
@@ -1446,3 +1450,14 @@ ThereIsAtLeastOneRole(void)
 
 	return result;
 }
+
+/*
+ * Timeout handler for flushing non-transactional stats.
+ */
+static void
+AnytimeStatsUpdateTimeoutHandler(void)
+{
+	AnytimeStatsUpdateTimeoutPending = true;
+	InterruptPending = true;
+	SetLatch(MyLatch);
+}
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index db559b39c4d..8aeb9628871 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -96,6 +96,7 @@ extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending;
 extern PGDLLIMPORT volatile sig_atomic_t ProcSignalBarrierPending;
 extern PGDLLIMPORT volatile sig_atomic_t LogMemoryContextPending;
 extern PGDLLIMPORT volatile sig_atomic_t IdleStatsUpdateTimeoutPending;
+extern PGDLLIMPORT volatile sig_atomic_t AnytimeStatsUpdateTimeoutPending;
 
 extern PGDLLIMPORT volatile sig_atomic_t CheckClientConnectionPending;
 extern PGDLLIMPORT volatile sig_atomic_t ClientConnectionLost;
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index fff7ecc2533..1651f16f966 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -35,6 +35,9 @@
 /* Default directory to store temporary statistics data in */
 #define PG_STAT_TMP_DIR		"pg_stat_tmp"
 
+/* Minimum interval non-forced stats flushes */
+#define PGSTAT_MIN_INTERVAL	1000
+
 /* Values for track_functions GUC variable --- order is significant! */
 typedef enum TrackFunctionsLevel
 {
@@ -533,6 +536,7 @@ extern void pgstat_initialize(void);
 
 /* Functions called from backends */
 extern long pgstat_report_stat(bool force);
+extern void pgstat_report_anytime_stat(bool force);
 extern void pgstat_force_next_flush(void);
 
 extern void pgstat_reset_counters(void);
diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index 9b8fbae00ed..9ca39ea9a9a 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -224,6 +224,19 @@ typedef struct PgStat_SubXactStatus
 	PgStat_TableXactStatus *first;	/* head of list for this subxact */
 } PgStat_SubXactStatus;
 
+/*
+ * Flush mode for statistics kinds.
+ *
+ * FLUSH_AT_TXN_BOUNDARY has to be the first because we want it to be the
+ * default value.
+ */
+typedef enum PgStat_FlushMode
+{
+	FLUSH_AT_TXN_BOUNDARY,		/* All fields can only be flushed at
+								 * transaction boundary */
+	FLUSH_ANYTIME,				/* All fields can be flushed anytime,
+								 * including within transactions */
+} PgStat_FlushMode;
 
 /*
  * Metadata for a specific kind of statistics.
@@ -251,6 +264,13 @@ typedef struct PgStat_KindInfo
 	 */
 	bool		track_entry_count:1;
 
+	/*
+	 * Some stats have to be updated only at transaction boundaries (such as
+	 * tuples_inserted updated, deleted), so it's very important to set the
+	 * right flush mode (FLUSH_AT_TXN_BOUNDARY being the default).
+	 */
+	PgStat_FlushMode flush_mode;
+
 	/*
 	 * The size of an entry in the shared stats hash table (pointed to by
 	 * PgStatShared_HashEntry->body).  For fixed-numbered statistics, this is
diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h
index 0965b590b34..10723bb664c 100644
--- a/src/include/utils/timeout.h
+++ b/src/include/utils/timeout.h
@@ -35,6 +35,7 @@ typedef enum TimeoutId
 	IDLE_SESSION_TIMEOUT,
 	IDLE_STATS_UPDATE_TIMEOUT,
 	CLIENT_CONNECTION_CHECK_TIMEOUT,
+	ANYTIME_STATS_UPDATE_TIMEOUT,
 	STARTUP_PROGRESS_TIMEOUT,
 	/* First user-definable timeout reason */
 	USER_TIMEOUT,
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index ddbe4c64971..af21c87234a 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -2268,6 +2268,7 @@ PgStat_Counter
 PgStat_EntryRef
 PgStat_EntryRefHashEntry
 PgStat_FetchConsistency
+PgStat_FlushMode
 PgStat_FunctionCallUsage
 PgStat_FunctionCounts
 PgStat_HashKey
-- 
2.34.1

