From 6bb9405297030a891acc0d5397de5fcd73af9ea6 Mon Sep 17 00:00:00 2001 From: Sami Imseih Date: Sun, 10 May 2026 07:06:04 -0500 Subject: [PATCH v4 1/4] pgstat: Allow pg_stat_force_next_flush() to work in-transaction Previously, pg_stat_force_next_flush() deferred the actual flush until after the transaction ended. Extend it to also flush immediately when called in-transaction. Non-transactional counters (numscans, tuples_returned, tuples_fetched, blocks_fetched, blocks_hit) are flushed right away since they reflect completed work that does not depend on transaction outcome. Transactional counters (tuples_inserted/updated/deleted and the derived live/dead tuple counts) are deferred until transaction end, since their final values depend on commit/abort. Also remove a test query that checked last_seq_scan/last_idx_scan inside a transaction; it only previously appeared to work because pg_stat_force_next_flush() previously did not flush in-transaction, and would now be unstable. --- src/backend/utils/activity/pgstat.c | 13 ++- src/backend/utils/activity/pgstat_relation.c | 99 +++++++++++++------- src/test/regress/expected/stats.out | 92 ++++++++++++++++-- src/test/regress/sql/stats.sql | 60 +++++++++++- 4 files changed, 218 insertions(+), 46 deletions(-) diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index c4fa14f138f..83f7cc819c9 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -729,7 +729,6 @@ pgstat_report_stat(bool force) bool nowait; pgstat_assert_is_up(); - Assert(!IsTransactionOrTransactionBlock()); /* "absorb" the forced flush even if there's nothing to flush */ if (pgStatForceNextFlush) @@ -815,12 +814,13 @@ pgstat_report_stat(bool force) /* * If some of the pending stats could not be flushed due to lock - * contention, let the caller know when to retry. + * contention, or only partially flushed due to in-transaction counters + * being deferred, let the caller know when to retry. */ if (partial_flush) { - /* force should have prevented us from getting here */ - Assert(!force); + /* with force, only active transaction state can cause a partial flush */ + Assert(!force || IsTransactionOrTransactionBlock()); /* remember since when stats have been pending */ if (pending_since == 0) @@ -842,6 +842,9 @@ pgstat_report_stat(bool force) void pgstat_force_next_flush(void) { + if (IsTransactionOrTransactionBlock()) + pgstat_report_stat(true); + pgStatForceNextFlush = true; } @@ -1414,7 +1417,7 @@ pgstat_flush_pending_entries(bool nowait) /* flush the stats, if possible */ did_flush = kind_info->flush_pending_cb(entry_ref, nowait); - Assert(did_flush || nowait); + Assert(did_flush || nowait || IsTransactionOrTransactionBlock()); /* determine next entry, before deleting the pending entry */ if (dlist_has_next(&pgStatPending, cur)) diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index 04f2eb21d0b..32ee6731eb7 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -828,64 +828,78 @@ pgstat_relation_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) /* * Ignore entries that didn't accumulate any actual counts, such as - * indexes that were opened by the planner but not used. + * indexes that were opened by the planner but not used. With + * in-transaction flushing an entry may be flushed multiple times, so keep + * it pending if it has active transaction state and commit will merge + * counters into it. */ if (pg_memory_is_all_zeros(&lstats->counts, sizeof(struct PgStat_TableCounts))) - return true; + return (lstats->trans == NULL); if (!pgstat_lock_entry(entry_ref, nowait)) return false; - /* add the values to the shared entry. */ + /* Flush non-transactional counters. */ tabentry = &shtabstats->stats; tabentry->numscans += lstats->counts.numscans; if (lstats->counts.numscans) { - TimestampTz t = GetCurrentTransactionStopTimestamp(); + TimestampTz t = IsTransactionOrTransactionBlock() ? + GetCurrentStatementStartTimestamp() : + GetCurrentTransactionStopTimestamp(); if (t > tabentry->lastscan) tabentry->lastscan = t; } tabentry->tuples_returned += lstats->counts.tuples_returned; tabentry->tuples_fetched += lstats->counts.tuples_fetched; - tabentry->tuples_inserted += lstats->counts.tuples_inserted; - tabentry->tuples_updated += lstats->counts.tuples_updated; - tabentry->tuples_deleted += lstats->counts.tuples_deleted; tabentry->tuples_hot_updated += lstats->counts.tuples_hot_updated; tabentry->tuples_newpage_updated += lstats->counts.tuples_newpage_updated; + tabentry->blocks_fetched += lstats->counts.blocks_fetched; + tabentry->blocks_hit += lstats->counts.blocks_hit; /* - * If table was truncated/dropped, first reset the live/dead counters. + * Flush transactional counters only when this relation has no active + * transaction state (i.e. no pending DML whose outcome depends on + * commit/abort). */ - if (lstats->counts.truncdropped) + if (lstats->trans == NULL) { - tabentry->live_tuples = 0; - tabentry->dead_tuples = 0; - tabentry->ins_since_vacuum = 0; - } + tabentry->tuples_inserted += lstats->counts.tuples_inserted; + tabentry->tuples_updated += lstats->counts.tuples_updated; + tabentry->tuples_deleted += lstats->counts.tuples_deleted; - tabentry->live_tuples += lstats->counts.delta_live_tuples; - tabentry->dead_tuples += lstats->counts.delta_dead_tuples; - tabentry->mod_since_analyze += lstats->counts.changed_tuples; + /* + * If table was truncated/dropped, first reset the live/dead counters. + */ + if (lstats->counts.truncdropped) + { + tabentry->live_tuples = 0; + tabentry->dead_tuples = 0; + tabentry->ins_since_vacuum = 0; + } - /* - * Using tuples_inserted to update ins_since_vacuum does mean that we'll - * track aborted inserts too. This isn't ideal, but otherwise probably - * not worth adding an extra field for. It may just amount to autovacuums - * triggering for inserts more often than they maybe should, which is - * probably not going to be common enough to be too concerned about here. - */ - tabentry->ins_since_vacuum += lstats->counts.tuples_inserted; + tabentry->live_tuples += lstats->counts.delta_live_tuples; + tabentry->dead_tuples += lstats->counts.delta_dead_tuples; + tabentry->mod_since_analyze += lstats->counts.changed_tuples; - tabentry->blocks_fetched += lstats->counts.blocks_fetched; - tabentry->blocks_hit += lstats->counts.blocks_hit; + /* + * Using tuples_inserted to update ins_since_vacuum does mean that + * we'll track aborted inserts too. This isn't ideal, but otherwise + * probably not worth adding an extra field for. It may just amount + * to autovacuums triggering for inserts more often than they maybe + * should, which is probably not going to be common enough to be too + * concerned about here. + */ + tabentry->ins_since_vacuum += lstats->counts.tuples_inserted; - /* Clamp live_tuples in case of negative delta_live_tuples */ - tabentry->live_tuples = Max(tabentry->live_tuples, 0); - /* Likewise for dead_tuples */ - tabentry->dead_tuples = Max(tabentry->dead_tuples, 0); + /* Clamp live_tuples in case of negative delta_live_tuples */ + tabentry->live_tuples = Max(tabentry->live_tuples, 0); + /* Likewise for dead_tuples */ + tabentry->dead_tuples = Max(tabentry->dead_tuples, 0); + } pgstat_unlock_entry(entry_ref); @@ -893,13 +907,30 @@ pgstat_relation_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) dbentry = pgstat_prep_database_pending(dboid); dbentry->tuples_returned += lstats->counts.tuples_returned; dbentry->tuples_fetched += lstats->counts.tuples_fetched; - dbentry->tuples_inserted += lstats->counts.tuples_inserted; - dbentry->tuples_updated += lstats->counts.tuples_updated; - dbentry->tuples_deleted += lstats->counts.tuples_deleted; dbentry->blocks_fetched += lstats->counts.blocks_fetched; dbentry->blocks_hit += lstats->counts.blocks_hit; - return true; + if (lstats->trans == NULL) + { + dbentry->tuples_inserted += lstats->counts.tuples_inserted; + dbentry->tuples_updated += lstats->counts.tuples_updated; + dbentry->tuples_deleted += lstats->counts.tuples_deleted; + return true; + } + + /* + * This is a partial, in-transaction flush. Zero out the counters we + * already flushed so they aren't double-counted on the next flush. + */ + lstats->counts.numscans = 0; + lstats->counts.tuples_returned = 0; + lstats->counts.tuples_fetched = 0; + lstats->counts.tuples_hot_updated = 0; + lstats->counts.tuples_newpage_updated = 0; + lstats->counts.blocks_fetched = 0; + lstats->counts.blocks_hit = 0; + + return false; } void diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out index bbb1db3c433..fc682818097 100644 --- a/src/test/regress/expected/stats.out +++ b/src/test/regress/expected/stats.out @@ -675,12 +675,6 @@ SELECT pg_stat_force_next_flush(); (1 row) -SELECT last_seq_scan, last_idx_scan FROM pg_stat_all_tables WHERE relid = 'test_last_scan'::regclass; - last_seq_scan | last_idx_scan ----------------+--------------- - | -(1 row) - COMMIT; SELECT stats_reset IS NOT NULL AS has_stats_reset FROM pg_stat_all_tables WHERE relid = 'test_last_scan'::regclass; @@ -2040,4 +2034,90 @@ SELECT fastpath_exceeded > :fastpath_exceeded_before FROM pg_stat_lock WHERE loc (1 row) DROP TABLE part_test; +-- +-- Test in-transaction flushes +-- +CREATE TABLE partial_flush(id int); +INSERT INTO partial_flush VALUES (1), (2), (3); +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +-- Record counters before the explicit transaction +SELECT seq_scan AS seq_scan_before, + seq_tup_read AS seq_tup_read_before, + n_tup_ins AS n_tup_ins_before, + n_tup_upd AS n_tup_upd_before + FROM pg_stat_user_tables WHERE relname = 'partial_flush' \gset +BEGIN; +SET LOCAL stats_fetch_consistency = none; +-- Generate both transaction-safe and transaction-unsafe counters. +SELECT count(*) FROM partial_flush; + count +------- + 3 +(1 row) + +INSERT INTO partial_flush VALUES (4), (5); +UPDATE partial_flush SET id = id WHERE id = 1; +-- Flush in-transaction +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +-- Transaction-safe counters should be visible in-transaction. +-- Transaction-unsafe counters (ins, upd) should NOT be flushed yet, +-- since their final values depend on whether the transaction commits. +SELECT seq_scan - :seq_scan_before AS seq_scan_delta, + seq_tup_read - :seq_tup_read_before AS seq_tup_read_delta, + n_tup_ins - :n_tup_ins_before AS n_tup_ins_delta, + n_tup_upd - :n_tup_upd_before AS n_tup_upd_delta + FROM pg_stat_user_tables WHERE relname = 'partial_flush'; + seq_scan_delta | seq_tup_read_delta | n_tup_ins_delta | n_tup_upd_delta +----------------+--------------------+-----------------+----------------- + 2 | 8 | 0 | 0 +(1 row) + +-- Generate more transaction-safe activity to verify no double counting. +SELECT count(*) FROM partial_flush; + count +------- + 5 +(1 row) + +-- Flush again in-transaction +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +-- Should show cumulative totals, not double-counted. +SELECT seq_scan - :seq_scan_before AS seq_scan_delta, + seq_tup_read - :seq_tup_read_before AS seq_tup_read_delta, + n_tup_ins - :n_tup_ins_before AS n_tup_ins_delta, + n_tup_upd - :n_tup_upd_before AS n_tup_upd_delta + FROM pg_stat_user_tables WHERE relname = 'partial_flush'; + seq_scan_delta | seq_tup_read_delta | n_tup_ins_delta | n_tup_upd_delta +----------------+--------------------+-----------------+----------------- + 3 | 13 | 0 | 0 +(1 row) + +COMMIT; +-- After commit, the remaining counters should be flushed. +SELECT seq_scan - :seq_scan_before AS seq_scan_delta, + seq_tup_read - :seq_tup_read_before AS seq_tup_read_delta, + n_tup_ins - :n_tup_ins_before AS n_tup_ins_delta, + n_tup_upd - :n_tup_upd_before AS n_tup_upd_delta + FROM pg_stat_user_tables WHERE relname = 'partial_flush'; + seq_scan_delta | seq_tup_read_delta | n_tup_ins_delta | n_tup_upd_delta +----------------+--------------------+-----------------+----------------- + 3 | 13 | 2 | 1 +(1 row) + +DROP TABLE partial_flush; -- End of Stats Test diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql index 610fd21fae4..74436ab211b 100644 --- a/src/test/regress/sql/stats.sql +++ b/src/test/regress/sql/stats.sql @@ -309,7 +309,6 @@ BEGIN; CREATE TEMPORARY TABLE test_last_scan(idx_col int primary key, noidx_col int); INSERT INTO test_last_scan(idx_col, noidx_col) VALUES(1, 1); SELECT pg_stat_force_next_flush(); -SELECT last_seq_scan, last_idx_scan FROM pg_stat_all_tables WHERE relid = 'test_last_scan'::regclass; COMMIT; SELECT stats_reset IS NOT NULL AS has_stats_reset @@ -1008,4 +1007,63 @@ SELECT fastpath_exceeded > :fastpath_exceeded_before FROM pg_stat_lock WHERE loc DROP TABLE part_test; +-- +-- Test in-transaction flushes +-- +CREATE TABLE partial_flush(id int); +INSERT INTO partial_flush VALUES (1), (2), (3); +SELECT pg_stat_force_next_flush(); + +-- Record counters before the explicit transaction +SELECT seq_scan AS seq_scan_before, + seq_tup_read AS seq_tup_read_before, + n_tup_ins AS n_tup_ins_before, + n_tup_upd AS n_tup_upd_before + FROM pg_stat_user_tables WHERE relname = 'partial_flush' \gset + +BEGIN; +SET LOCAL stats_fetch_consistency = none; + +-- Generate both transaction-safe and transaction-unsafe counters. +SELECT count(*) FROM partial_flush; +INSERT INTO partial_flush VALUES (4), (5); +UPDATE partial_flush SET id = id WHERE id = 1; + +-- Flush in-transaction +SELECT pg_stat_force_next_flush(); + +-- Transaction-safe counters should be visible in-transaction. +-- Transaction-unsafe counters (ins, upd) should NOT be flushed yet, +-- since their final values depend on whether the transaction commits. +SELECT seq_scan - :seq_scan_before AS seq_scan_delta, + seq_tup_read - :seq_tup_read_before AS seq_tup_read_delta, + n_tup_ins - :n_tup_ins_before AS n_tup_ins_delta, + n_tup_upd - :n_tup_upd_before AS n_tup_upd_delta + FROM pg_stat_user_tables WHERE relname = 'partial_flush'; + +-- Generate more transaction-safe activity to verify no double counting. +SELECT count(*) FROM partial_flush; + +-- Flush again in-transaction +SELECT pg_stat_force_next_flush(); + +-- Should show cumulative totals, not double-counted. +SELECT seq_scan - :seq_scan_before AS seq_scan_delta, + seq_tup_read - :seq_tup_read_before AS seq_tup_read_delta, + n_tup_ins - :n_tup_ins_before AS n_tup_ins_delta, + n_tup_upd - :n_tup_upd_before AS n_tup_upd_delta + FROM pg_stat_user_tables WHERE relname = 'partial_flush'; + +COMMIT; + +-- After commit, the remaining counters should be flushed. + +SELECT seq_scan - :seq_scan_before AS seq_scan_delta, + seq_tup_read - :seq_tup_read_before AS seq_tup_read_delta, + n_tup_ins - :n_tup_ins_before AS n_tup_ins_delta, + n_tup_upd - :n_tup_upd_before AS n_tup_upd_delta + FROM pg_stat_user_tables WHERE relname = 'partial_flush'; + +DROP TABLE partial_flush; + -- End of Stats Test -- 2.50.1 (Apple Git-155)