From af71e6472727b4e18ca369e21e2b4667d4cd172b Mon Sep 17 00:00:00 2001
From: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Date: Thu, 8 Jan 2026 09:17:38 +0000
Subject: [PATCH v1 3/3] Add FLUSH_MIXED support and implement it for RELATION
 stats

This commit extends the non transactional stats infrastructure to support statistics
kinds with mixed transaction behavior: some fields are transactional (e.g., tuple
inserts/updates/deletes) while others are non transactional (e.g., sequential scans
blocks read, ...).

It introduces FLUSH_MIXED as a third flush behavior type, alongside FLUSH_ANYTIME
and FLUSH_AT_TXN_BOUNDARY. For FLUSH_MIXED kinds, a new flush_anytime_cb callback
enables partial flushing of only the non transactional fields during running
transactions.

Some tests are also added.

Implementation details:

- Add FLUSH_MIXED to PgStat_FlushBehavior enum
- Add flush_anytime_cb to PgStat_KindInfo for partial flushing callback
- Update pgstat_flush_pending_entries() to call flush_anytime_cb for
  FLUSH_MIXED entries when in anytime_only mode
- Keep FLUSH_MIXED entries in the pending list after partial flush, as
  transactional fields still need to be flushed at transaction boundary

RELATION stats are making use of FLUSH_MIXED:

- Change RELATION from TXN_ALL to FLUSH_MIXED
- Implement pgstat_relation_flush_anytime_cb() to flush only read related
  stats: numscans, tuples_returned, tuples_fetched, blocks_fetched,
  blocks_hit
- Clear these fields after flushing to prevent double counting when
  pgstat_relation_flush_cb() runs at transaction commit
- Transactional stats (tuples_inserted, tuples_updated, tuples_deleted,
  live_tuples, dead_tuples) remain pending until transaction boundary

Remark:

We could also imagine adding a new flush_anytime_static_cb() callback for
future FLUSH_MIXED fixed amount stats.
---
 src/backend/utils/activity/pgstat.c          | 36 ++++++---
 src/backend/utils/activity/pgstat_relation.c | 82 ++++++++++++++++++++
 src/include/utils/pgstat_internal.h          |  8 ++
 src/test/isolation/expected/stats.out        | 40 ++++++++++
 src/test/isolation/expected/stats_1.out      | 40 ++++++++++
 src/test/isolation/specs/stats.spec          | 12 +++
 6 files changed, 209 insertions(+), 9 deletions(-)
  56.6% src/backend/utils/activity/
   4.7% src/include/utils/
  34.0% src/test/isolation/expected/
   4.6% src/test/isolation/specs/

diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c
index f7942e47475..191e0ceac88 100644
--- a/src/backend/utils/activity/pgstat.c
+++ b/src/backend/utils/activity/pgstat.c
@@ -307,7 +307,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
 		.fixed_amount = false,
 		.write_to_file = true,
-		.flush_behavior = FLUSH_AT_TXN_BOUNDARY,
+		.flush_behavior = FLUSH_MIXED,
 
 		.shared_size = sizeof(PgStatShared_Relation),
 		.shared_data_off = offsetof(PgStatShared_Relation, stats),
@@ -315,6 +315,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 		.pending_size = sizeof(PgStat_TableStatus),
 
 		.flush_pending_cb = pgstat_relation_flush_cb,
+		.flush_anytime_cb = pgstat_relation_flush_anytime_cb,
 		.delete_pending_cb = pgstat_relation_delete_pending_cb,
 		.reset_timestamp_cb = pgstat_relation_reset_timestamp_cb,
 	},
@@ -1347,10 +1348,11 @@ pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
 /*
  * Flush out pending variable-numbered stats.
  *
- * If anytime_only is true, only flushes FLUSH_ANYTIME entries.
+ * If anytime_only is true, only flushes FLUSH_ANYTIME and FLUSH_MIXED entries,
+ * using flush_anytime_cb for FLUSH_MIXED.
  * This is safe to call inside transactions.
  *
- * If anytime_only is false, flushes all entries.
+ * If anytime_only is false, flushes all entries using flush_pending_cb.
  */
 static bool
 pgstat_flush_pending_entries(bool nowait, bool anytime_only)
@@ -1378,6 +1380,7 @@ pgstat_flush_pending_entries(bool nowait, bool anytime_only)
 		PgStat_Kind kind = key.kind;
 		const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
 		bool		did_flush;
+		bool		is_partial_flush = false;
 		dlist_node *next;
 
 		Assert(!kind_info->fixed_amount);
@@ -1397,8 +1400,21 @@ pgstat_flush_pending_entries(bool nowait, bool anytime_only)
 			continue;
 		}
 
-		/* flush the stats, if possible */
-		did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
+		/* flush the stats (with the appropriate callback), if possible */
+		if (anytime_only &&
+			kind_info->flush_behavior == FLUSH_MIXED &&
+			kind_info->flush_anytime_cb != NULL)
+		{
+			/* Partial flush of non-transactional fields only */
+			did_flush = kind_info->flush_anytime_cb(entry_ref, nowait);
+			is_partial_flush = true;
+		}
+		else
+		{
+			/* Full flush */
+			did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
+			is_partial_flush = false;
+		}
 
 		Assert(did_flush || nowait);
 
@@ -1408,8 +1424,8 @@ pgstat_flush_pending_entries(bool nowait, bool anytime_only)
 		else
 			next = NULL;
 
-		/* if successfully flushed, remove entry */
-		if (did_flush)
+		/* if successfull non partial flush, remove entry */
+		if (did_flush && !is_partial_flush)
 			pgstat_delete_pending_entry(entry_ref);
 		else
 			have_pending = true;
@@ -1418,8 +1434,10 @@ pgstat_flush_pending_entries(bool nowait, bool anytime_only)
 	}
 
 	/*
-	 * When in anytime_only mode, the list may not be empty because
-	 * FLUSH_AT_TXN_BOUNDARY entries were skipped.
+	 * When in anytime_only mode, the list may not be empty even after
+	 * successful flushes because FLUSH_AT_TXN_BOUNDARY entries were skipped
+	 * or FLUSH_MIXED entries had partial flushes and remain for transaction
+	 * boundary.
 	 */
 	Assert(!anytime_only || dlist_is_empty(&pgStatPending) == !have_pending);
 
diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c
index feae2ae5f44..6d6f333039e 100644
--- a/src/backend/utils/activity/pgstat_relation.c
+++ b/src/backend/utils/activity/pgstat_relation.c
@@ -887,6 +887,88 @@ pgstat_relation_flush_cb(PgStat_EntryRef *entry_ref, bool nowait)
 	return true;
 }
 
+/*
+ * Flush only non-transactional relation stats.
+ *
+ * This is called periodically during running transactions to make some
+ * statistics visible without waiting for the transaction to finish.
+ *
+ * Transactional stats (inserts/updates/deletes and their effects on live/dead
+ * tuple counts) remain in pending until the transaction ends, at which point
+ * pgstat_relation_flush_cb() will flush them.
+ *
+ * If nowait is true and the lock could not be immediately acquired, returns
+ * false without flushing the entry. Otherwise returns true.
+ */
+bool
+pgstat_relation_flush_anytime_cb(PgStat_EntryRef *entry_ref, bool nowait)
+{
+	Oid			dboid;
+	PgStat_TableStatus *lstats; /* pending stats entry */
+	PgStatShared_Relation *shtabstats;
+	PgStat_StatTabEntry *tabentry;	/* table entry of shared stats */
+	PgStat_StatDBEntry *dbentry;	/* pending database entry */
+	bool		has_nontxn_stats = false;
+
+	dboid = entry_ref->shared_entry->key.dboid;
+	lstats = (PgStat_TableStatus *) entry_ref->pending;
+	shtabstats = (PgStatShared_Relation *) entry_ref->shared_stats;
+
+	/*
+	 * Check if there are any non-transactional stats to flush. Avoid
+	 * unnecessarily locking the entry if nothing accumulated.
+	 */
+	if (lstats->counts.numscans > 0 ||
+		lstats->counts.tuples_returned > 0 ||
+		lstats->counts.tuples_fetched > 0 ||
+		lstats->counts.blocks_fetched > 0 ||
+		lstats->counts.blocks_hit > 0)
+		has_nontxn_stats = true;
+
+	if (!has_nontxn_stats)
+		return true;
+
+	if (!pgstat_lock_entry(entry_ref, nowait))
+		return false;
+
+	/* Add only the non-transactional values to the shared entry */
+	tabentry = &shtabstats->stats;
+
+	tabentry->numscans += lstats->counts.numscans;
+	if (lstats->counts.numscans)
+	{
+		TimestampTz t = GetCurrentTimestamp();
+
+		if (t > tabentry->lastscan)
+			tabentry->lastscan = t;
+	}
+	tabentry->tuples_returned += lstats->counts.tuples_returned;
+	tabentry->tuples_fetched += lstats->counts.tuples_fetched;
+	tabentry->blocks_fetched += lstats->counts.blocks_fetched;
+	tabentry->blocks_hit += lstats->counts.blocks_hit;
+
+	pgstat_unlock_entry(entry_ref);
+
+	/* Also update the corresponding fields in database stats */
+	dbentry = pgstat_prep_database_pending(dboid);
+	dbentry->tuples_returned += lstats->counts.tuples_returned;
+	dbentry->tuples_fetched += lstats->counts.tuples_fetched;
+	dbentry->blocks_fetched += lstats->counts.blocks_fetched;
+	dbentry->blocks_hit += lstats->counts.blocks_hit;
+
+	/*
+	 * Clear the flushed fields from pending stats to prevent double-counting
+	 * when pgstat_relation_flush_cb() runs at transaction boundary.
+	 */
+	lstats->counts.numscans = 0;
+	lstats->counts.tuples_returned = 0;
+	lstats->counts.tuples_fetched = 0;
+	lstats->counts.blocks_fetched = 0;
+	lstats->counts.blocks_hit = 0;
+
+	return true;
+}
+
 void
 pgstat_relation_delete_pending_cb(PgStat_EntryRef *entry_ref)
 {
diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index 02f4f13fc0f..85d92f4c945 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -231,6 +231,7 @@ typedef enum PgStat_FlushBehavior
 {
 	FLUSH_ANYTIME,				/* All fields can flush anytime */
 	FLUSH_AT_TXN_BOUNDARY,		/* All fields need transaction boundary */
+	FLUSH_MIXED,				/* MIXED so needs callbacks */
 } PgStat_FlushBehavior;
 
 /*
@@ -262,6 +263,12 @@ typedef struct PgStat_KindInfo
 	/* Flush behavior */
 	PgStat_FlushBehavior flush_behavior;
 
+	/*
+	 * For PGSTAT_FLUSH_MIXED kinds: callback to flush only some fields. If
+	 * NULL for a MIXED kind, treated as PGSTAT_FLUSH_AT_TXN_BOUNDARY.
+	 */
+	bool		(*flush_anytime_cb) (PgStat_EntryRef *entry_ref, bool nowait);
+
 	/*
 	 * The size of an entry in the shared stats hash table (pointed to by
 	 * PgStatShared_HashEntry->body).  For fixed-numbered statistics, this is
@@ -774,6 +781,7 @@ extern void AtPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state);
 extern void PostPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state);
 
 extern bool pgstat_relation_flush_cb(PgStat_EntryRef *entry_ref, bool nowait);
+extern bool pgstat_relation_flush_anytime_cb(PgStat_EntryRef *entry_ref, bool nowait);
 extern void pgstat_relation_delete_pending_cb(PgStat_EntryRef *entry_ref);
 extern void pgstat_relation_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts);
 
diff --git a/src/test/isolation/expected/stats.out b/src/test/isolation/expected/stats.out
index cfad309ccf3..6d62b30e4a7 100644
--- a/src/test/isolation/expected/stats.out
+++ b/src/test/isolation/expected/stats.out
@@ -2245,6 +2245,46 @@ seq_scan|seq_tup_read|n_tup_ins|n_tup_upd|n_tup_del|n_live_tup|n_dead_tup|vacuum
 (1 row)
 
 
+starting permutation: s2_begin s2_table_select s1_sleep s1_table_stats s2_table_drop s2_commit
+pg_stat_force_next_flush
+------------------------
+                        
+(1 row)
+
+step s2_begin: BEGIN;
+step s2_table_select: SELECT * FROM test_stat_tab ORDER BY key, value;
+key|value
+---+-----
+k0 |    1
+(1 row)
+
+step s1_sleep: SELECT pg_sleep(1.5);
+pg_sleep
+--------
+        
+(1 row)
+
+step s1_table_stats: 
+    SELECT
+        pg_stat_get_numscans(tso.oid) AS seq_scan,
+        pg_stat_get_tuples_returned(tso.oid) AS seq_tup_read,
+        pg_stat_get_tuples_inserted(tso.oid) AS n_tup_ins,
+        pg_stat_get_tuples_updated(tso.oid) AS n_tup_upd,
+        pg_stat_get_tuples_deleted(tso.oid) AS n_tup_del,
+        pg_stat_get_live_tuples(tso.oid) AS n_live_tup,
+        pg_stat_get_dead_tuples(tso.oid) AS n_dead_tup,
+        pg_stat_get_vacuum_count(tso.oid) AS vacuum_count
+    FROM test_stat_oid AS tso
+    WHERE tso.name = 'test_stat_tab'
+
+seq_scan|seq_tup_read|n_tup_ins|n_tup_upd|n_tup_del|n_live_tup|n_dead_tup|vacuum_count
+--------+------------+---------+---------+---------+----------+----------+------------
+       1|           1|        1|        0|        0|         1|         0|           0
+(1 row)
+
+step s2_table_drop: DROP TABLE test_stat_tab;
+step s2_commit: COMMIT;
+
 starting permutation: s1_track_counts_off s1_table_stats s1_track_counts_on
 pg_stat_force_next_flush
 ------------------------
diff --git a/src/test/isolation/expected/stats_1.out b/src/test/isolation/expected/stats_1.out
index e1d937784cb..2fade10e817 100644
--- a/src/test/isolation/expected/stats_1.out
+++ b/src/test/isolation/expected/stats_1.out
@@ -2253,6 +2253,46 @@ seq_scan|seq_tup_read|n_tup_ins|n_tup_upd|n_tup_del|n_live_tup|n_dead_tup|vacuum
 (1 row)
 
 
+starting permutation: s2_begin s2_table_select s1_sleep s1_table_stats s2_table_drop s2_commit
+pg_stat_force_next_flush
+------------------------
+                        
+(1 row)
+
+step s2_begin: BEGIN;
+step s2_table_select: SELECT * FROM test_stat_tab ORDER BY key, value;
+key|value
+---+-----
+k0 |    1
+(1 row)
+
+step s1_sleep: SELECT pg_sleep(1.5);
+pg_sleep
+--------
+        
+(1 row)
+
+step s1_table_stats: 
+    SELECT
+        pg_stat_get_numscans(tso.oid) AS seq_scan,
+        pg_stat_get_tuples_returned(tso.oid) AS seq_tup_read,
+        pg_stat_get_tuples_inserted(tso.oid) AS n_tup_ins,
+        pg_stat_get_tuples_updated(tso.oid) AS n_tup_upd,
+        pg_stat_get_tuples_deleted(tso.oid) AS n_tup_del,
+        pg_stat_get_live_tuples(tso.oid) AS n_live_tup,
+        pg_stat_get_dead_tuples(tso.oid) AS n_dead_tup,
+        pg_stat_get_vacuum_count(tso.oid) AS vacuum_count
+    FROM test_stat_oid AS tso
+    WHERE tso.name = 'test_stat_tab'
+
+seq_scan|seq_tup_read|n_tup_ins|n_tup_upd|n_tup_del|n_live_tup|n_dead_tup|vacuum_count
+--------+------------+---------+---------+---------+----------+----------+------------
+       0|           0|        1|        0|        0|         1|         0|           0
+(1 row)
+
+step s2_table_drop: DROP TABLE test_stat_tab;
+step s2_commit: COMMIT;
+
 starting permutation: s1_track_counts_off s1_table_stats s1_track_counts_on
 pg_stat_force_next_flush
 ------------------------
diff --git a/src/test/isolation/specs/stats.spec b/src/test/isolation/specs/stats.spec
index da16710da0f..1b0168e6176 100644
--- a/src/test/isolation/specs/stats.spec
+++ b/src/test/isolation/specs/stats.spec
@@ -50,6 +50,8 @@ step s1_rollback { ROLLBACK; }
 step s1_prepare_a { PREPARE TRANSACTION 'a'; }
 step s1_commit_prepared_a { COMMIT PREPARED 'a'; }
 step s1_rollback_prepared_a { ROLLBACK PREPARED 'a'; }
+# Has to be greater than PGSTAT_ANYTIME_FLUSH_INTERVAL
+step s1_sleep { SELECT pg_sleep(1.5); }
 
 # Function stats steps
 step s1_ff { SELECT pg_stat_force_next_flush(); }
@@ -138,6 +140,7 @@ step s2_commit { COMMIT; }
 step s2_commit_prepared_a { COMMIT PREPARED 'a'; }
 step s2_rollback_prepared_a { ROLLBACK PREPARED 'a'; }
 step s2_ff { SELECT pg_stat_force_next_flush(); }
+step s2_table_drop { DROP TABLE test_stat_tab; }
 
 # Function stats steps
 step s2_track_funcs_all { SET track_functions = 'all'; }
@@ -435,6 +438,15 @@ permutation
   s1_table_drop
   s1_table_stats
 
+### Check that some stats are updated (seq_scan and seq_tup_read)
+### while the transaction is still running
+permutation
+  s2_begin
+  s2_table_select
+  s1_sleep
+  s1_table_stats
+  s2_table_drop
+  s2_commit
 
 ### Check that we don't count changes with track counts off, but allow access
 ### to prior stats
-- 
2.34.1

