From 103d83c69ff0870f812e91b7f6491719bf7f39c6 Mon Sep 17 00:00:00 2001 From: Matheus Alcantara Date: Sat, 6 Sep 2025 11:29:02 -0300 Subject: [PATCH v8] Make AsyncQueueEntry's self contained Previously the asyncQueueProcessPageEntries() use the TransactionIdDidCommit() to check if the transaction that a notification belongs is committed or not. Although this work for almost all scenarios we may have some cases where if a notification is keep for to long on the queue and the VACUUM FREEZE is executed during this time it may remove clog files that is needed to check the transaction status of these notifications which will cause errors to listener backends when reading the async queue. This commit fix this issue by making the AsyncQueueEntry self contained by adding the "rollbacked" boolean field so asyncQueueProcessPageEntries() can use this to check if the transaction of the notification is rollbacked or not. We set rollbacked as false when adding the entry on the SLRU page buffer cache when PreCommit_Notify() is called and if an error occur before AtCommit_Notify() the AtAbort_Notify() will be called which will mark the rollbacked field as true. We do this by remembering the QUEUE_HEAD position before the PreCommit_Notify() start adding entries on the shared queue, and if the transaction crash we iterate from this saved position until the new QUEUE_HEAD position marking the entries as not committed. Also this commit include TAP tests to exercise the VACUUM FREEZE issue and also the scenario of an error being occur between the PreCommit_Notify() and AtCommit_Notify() calls. Author: Matheus Alcantara Co-authored-by: Arseniy Mukhin --- src/backend/commands/async.c | 186 +++++++++++++++++- src/test/modules/Makefile | 1 + src/test/modules/meson.build | 1 + src/test/modules/test_listen_notify/Makefile | 19 ++ .../modules/test_listen_notify/meson.build | 14 ++ .../test_listen_notify/t/001_xid_freeze.pl | 74 +++++++ .../t/002_aborted_tx_notifies.pl | 79 ++++++++ src/tools/pgindent/typedefs.list | 1 + 8 files changed, 374 insertions(+), 1 deletion(-) create mode 100644 src/test/modules/test_listen_notify/Makefile create mode 100644 src/test/modules/test_listen_notify/meson.build create mode 100644 src/test/modules/test_listen_notify/t/001_xid_freeze.pl create mode 100644 src/test/modules/test_listen_notify/t/002_aborted_tx_notifies.pl diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c index 4bd37d5beb5..7e4aa3a6487 100644 --- a/src/backend/commands/async.c +++ b/src/backend/commands/async.c @@ -79,6 +79,19 @@ * are way behind and should be kicked to make them advance their * pointers). * + * The notification entries added to the queue are self-contained and + * include a "committed" field to inform listener backends if the associated + * transaction has committed. We could use the TransactionIdDidCommit() but + * if a notification remain in the queue long enough for VACUUM FREEZE to + * remove the necessary pg_xact/ file, the listener backend will face errors + * to get the transaction status. To prevent this, the "committed" field is + * set to true during PreCommit_Notify() and if the transaction aborts between + * the PreCommit_Notify() and AtCommit_Notify(), the AtAbort_Notify() is + * called to mark these entries as uncommitted. To enable this, we save the + * queue's head position before adding new entries from the in-progress to + * commit transaction. If an abort occurs, AtAbort_Notify() uses this saved + * position to find and mark the entries as uncommitted. + * * Finally, after we are out of the transaction altogether and about to go * idle, we scan the queue for messages that need to be sent to our * frontend (which might be notifies from other backends, or self-notifies @@ -142,6 +155,7 @@ #include "miscadmin.h" #include "storage/ipc.h" #include "storage/lmgr.h" +#include "storage/procarray.h" #include "storage/procsignal.h" #include "tcop/tcopprot.h" #include "utils/builtins.h" @@ -180,6 +194,8 @@ typedef struct AsyncQueueEntry Oid dboid; /* sender's database OID */ TransactionId xid; /* sender's XID */ int32 srcPid; /* sender's PID */ + bool rollbacked; /* Is transaction that the entry belongs + * committed? */ char data[NAMEDATALEN + NOTIFY_PAYLOAD_MAX_LENGTH]; } AsyncQueueEntry; @@ -401,8 +417,27 @@ struct NotificationHash Notification *event; /* => the actual Notification struct */ }; +/* Information needed by At_AbortNotify() to remove entries from the queue for aborted transactions. */ +typedef struct AtAbortNotifyInfo +{ + /* + * head position before the transaction start adding entries on the shared + * queue + */ + QueuePosition previousHead; + + /* + * head position after the entries from the in-progress to commit + * transaction were added. + */ + QueuePosition head; + +} AtAbortNotifyInfo; + static NotificationList *pendingNotifies = NULL; +static AtAbortNotifyInfo *atAbortInfo = NULL; + /* * Inbound notifications are initially processed by HandleNotifyInterrupt(), * called from inside a signal handler. That just sets the @@ -457,6 +492,7 @@ static void AddEventToPendingNotifies(Notification *n); static uint32 notification_hash(const void *key, Size keysize); static int notification_match(const void *key1, const void *key2, Size keysize); static void ClearPendingActionsAndNotifies(void); +static void asyncQueueRollbackNotifications(void); /* * Compute the difference between two queue page numbers. @@ -922,6 +958,18 @@ PreCommit_Notify(void) LockSharedObject(DatabaseRelationId, InvalidOid, 0, AccessExclusiveLock); + /* + * Before start adding entries on the shared queue, save the current + * QUEUE_HEAD so if the current in-progress to commit transaction + * abort we can mark the notifications added by this aborted + * transaction as not committed. See AtAbortt_Notify() for more info. + */ + Assert(atAbortInfo == NULL); + atAbortInfo = palloc(sizeof(AtAbortNotifyInfo)); + LWLockAcquire(NotifyQueueLock, LW_SHARED); + atAbortInfo->previousHead = QUEUE_HEAD; + LWLockRelease(NotifyQueueLock); + /* Now push the notifications into the queue */ nextNotify = list_head(pendingNotifies->events); while (nextNotify != NULL) @@ -948,6 +996,17 @@ PreCommit_Notify(void) LWLockRelease(NotifyQueueLock); } + /* + * Save the new QUEUE_HEAD position so if another publisher add + * entries on the shared queue and successfully commit the transaction + * we don't change the committed status of these notifications while + * marking the notification from a aborted transaction as not + * committed. + */ + LWLockAcquire(NotifyQueueLock, LW_SHARED); + atAbortInfo->head = QUEUE_HEAD; + LWLockRelease(NotifyQueueLock); + /* Note that we don't clear pendingNotifies; AtCommit_Notify will. */ } } @@ -1402,6 +1461,13 @@ asyncQueueAddEntries(ListCell *nextNotify) /* Construct a valid queue entry in local variable qe */ asyncQueueNotificationToEntry(n, &qe); + /* + * Mark the entry as not rollbacked. If the transaction that this + * notification belongs fails to commit the AtAbort_Notify() will mark + * this entry as rollbacked. + */ + qe.rollbacked = false; + offset = QUEUE_POS_OFFSET(queue_head); /* Check whether the entry really fits on the current page */ @@ -1678,6 +1744,16 @@ AtAbort_Notify(void) if (amRegisteredListener && listenChannels == NIL) asyncQueueUnregister(); + /* + * AtAbort_Notify information is set when we are adding entries on the + * global shared queue at PreCommit_Notify(), so in case of an abort on + * the transaction between the PreCommit_Notify() and AtCommit_Notify() we + * use this information to mark the entries from the aborted transaction + * as not committed. + */ + if (atAbortInfo != NULL) + asyncQueueRollbackNotifications(); + /* And clean up */ ClearPendingActionsAndNotifies(); } @@ -2062,11 +2138,12 @@ asyncQueueProcessPageEntries(volatile QueuePosition *current, * because our transaction cannot (yet) have queued any * messages. */ + *current = thisentry; reachedStop = true; break; } - else if (TransactionIdDidCommit(qe->xid)) + else if (!qe->rollbacked) { /* qe->data is the null-terminated channel name */ char *channel = qe->data; @@ -2385,6 +2462,7 @@ ClearPendingActionsAndNotifies(void) */ pendingActions = NULL; pendingNotifies = NULL; + atAbortInfo = NULL; } /* @@ -2395,3 +2473,109 @@ check_notify_buffers(int *newval, void **extra, GucSource source) { return check_slru_buffers("notify_buffers", newval); } + + +/* + * Mark notifications added on an in-progress to commit transaction as not committed. + * + * Notifications added on the shared global queue are added with committed = + * true during PreCommit_Notify() call. If an error occur between the + * PreCommit_Notify() and AtCommit_Notify() the AtAbort_Notify() will be called + * and we need to mark these notifications added on the shared queue by the + * aborted transaction as not committed so that listener backends can skip + * these notifications when reading the queue. + * + * We previously rely on TransactionDidCommit() to check this but if a + * notification is keep for too long on the queue and the VACUUM FREEZE is + * executed during this period it can remove clog files that is needed to check + * the transaction status of this notification, so we make the notification + * entries self contained to skip this problem. + * + */ +static void +asyncQueueRollbackNotifications(void) +{ + QueuePosition current = atAbortInfo->previousHead; + QueuePosition head = atAbortInfo->head; + + /* + * Iterates from the position saved at the beginning of the transaction + * (previousHead) to the current head of the queue. We do this to mark all + * entries within this range as uncommitted in case of a transaction + * crash. + */ + for (;;) + { + int64 curpage = QUEUE_POS_PAGE(current); + int curoffset = QUEUE_POS_OFFSET(current); + LWLock *lock = SimpleLruGetBankLock(NotifyCtl, curpage); + int slotno; + + /* + * If we have reached the head, all entries from this transaction have + * been marked as not committed so break the loop. + */ + if (QUEUE_POS_EQUAL(current, head)) + break; + + /* + * Acquire an exclusive lock on the current SLRU page to ensure no + * other process can read or write to it while we are marking the + * entries. + */ + LWLockAcquire(lock, LW_EXCLUSIVE); + + /* Fetch the page from SLRU to mark entries as not committed. */ + slotno = SimpleLruReadPage(NotifyCtl, curpage, true, InvalidTransactionId); + + /* + * Loop through all entries on the current page. The loop will + * continue until we reach the end of the page or the current head. + */ + for (;;) + { + AsyncQueueEntry *qe; + bool reachedEndOfPage; + + /* + * Check again to stop processing the entries on the current page. + */ + if (QUEUE_POS_EQUAL(current, head)) + break; + + /* + * Get a pointer to the current entry within the shared page + * buffer. + */ + qe = (AsyncQueueEntry *) (NotifyCtl->shared->page_buffer[slotno] + curoffset); + + /* + * Just for sanity, all entries on the shared queue should be + * marked as not committed. + */ + Assert(!qe->rollbacked); + + /* Ensure that listener backends can not see these entries */ + Assert(TransactionIdIsInProgress(qe->xid)); + + /* + * Mark the entry as rollbacked so listener backends can skip this + * notification. + */ + qe->rollbacked = true; + + /* Advance our position. */ + reachedEndOfPage = asyncQueueAdvance(¤t, qe->length); + if (reachedEndOfPage) + break; + + /* + * Update the offset for the next iteration within the same page. + */ + curoffset = QUEUE_POS_OFFSET(current); + } + + /* Release the exclusive lock on the page. */ + LWLockRelease(lock); + } +} diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile index 902a7954101..a015c961d35 100644 --- a/src/test/modules/Makefile +++ b/src/test/modules/Makefile @@ -29,6 +29,7 @@ SUBDIRS = \ test_int128 \ test_integerset \ test_json_parser \ + test_listen_notify \ test_lfind \ test_lwlock_tranches \ test_misc \ diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build index 14fc761c4cf..6af33448d7b 100644 --- a/src/test/modules/meson.build +++ b/src/test/modules/meson.build @@ -28,6 +28,7 @@ subdir('test_ginpostinglist') subdir('test_int128') subdir('test_integerset') subdir('test_json_parser') +subdir('test_listen_notify') subdir('test_lfind') subdir('test_lwlock_tranches') subdir('test_misc') diff --git a/src/test/modules/test_listen_notify/Makefile b/src/test/modules/test_listen_notify/Makefile new file mode 100644 index 00000000000..c1eb4fde370 --- /dev/null +++ b/src/test/modules/test_listen_notify/Makefile @@ -0,0 +1,19 @@ +# src/test/modules/test_listen_notify/Makefile + +MODULE = test_listen_notify +PGFILEDESC = "test_listen_notify - regression testing for LISTEN/NOTIFY support" + +TAP_TESTS = 1 + +EXTRA_INSTALL=src/test/modules/xid_wraparound + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/test_listen_notify +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/src/test/modules/test_listen_notify/meson.build b/src/test/modules/test_listen_notify/meson.build new file mode 100644 index 00000000000..a68052cd353 --- /dev/null +++ b/src/test/modules/test_listen_notify/meson.build @@ -0,0 +1,14 @@ +# Copyright (c) 2022-2025, PostgreSQL Global Development Group + +tests += { + 'name': 'test_listen_notify', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'tap': { + 'tests': [ + 't/001_xid_freeze.pl', + 't/002_aborted_tx_notifies.pl' + ], + }, +} + diff --git a/src/test/modules/test_listen_notify/t/001_xid_freeze.pl b/src/test/modules/test_listen_notify/t/001_xid_freeze.pl new file mode 100644 index 00000000000..a8bbd268c0f --- /dev/null +++ b/src/test/modules/test_listen_notify/t/001_xid_freeze.pl @@ -0,0 +1,74 @@ +# Copyright (c) 2024-2025, PostgreSQL Global Development Group + +use strict; +use warnings FATAL => 'all'; +use File::Path qw(mkpath); +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +my $node = PostgreSQL::Test::Cluster->new('node'); +$node->init; +$node->start; + +# Check if the extension xid_wraparound is available, as it may be +# possible that this script is run with installcheck, where the module +# would not be installed by default. +if (!$node->check_extension('xid_wraparound')) +{ + plan skip_all => 'Extension xid_wraparound not installed'; +} + +# Setup +$node->safe_psql('postgres', 'CREATE EXTENSION xid_wraparound'); +$node->safe_psql('postgres', + 'CREATE TABLE t AS SELECT g AS a, g+2 AS b from generate_series(1,100000) g;' +); +$node->safe_psql('postgres', + 'ALTER DATABASE template0 WITH ALLOW_CONNECTIONS true'); + +# --- Start Session 1 and leave it idle in transaction +my $psql_session1 = $node->background_psql('postgres'); +$psql_session1->query_safe('listen s;', "Session 1 listens to 's'"); +$psql_session1->query_safe('begin;', "Session 1 starts a transaction"); + +# --- Session 2, multiple notify's, and commit --- +for my $i (1 .. 10) +{ + $node->safe_psql( + 'postgres', " + BEGIN; + NOTIFY s, '$i'; + COMMIT;"); +} + +# Consume enough XIDs to trigger truncation +$node->safe_psql('postgres', 'select consume_xids(10000000);'); + +# Execute update so the frozen xid of "t" table is updated to a xid greater +# than consume_xids() result +$node->safe_psql('postgres', 'UPDATE t SET a = a+b;'); + +# Remember current datfrozenxid before vacuum freeze to ensure that it is advanced. +my $datafronzenxid = $node->safe_psql('postgres', "select datfrozenxid from pg_database where datname = 'postgres'"); + +# Execute vacuum freeze on all databases +$node->command_ok([ 'vacuumdb', '--all', '--freeze', '--port', $node->port ], + "vacuumdb --all --freeze"); + +# Get the new datfrozenxid after vacuum freeze to ensure that is advanced but +# we can still get the notification status of the notification +my $datafronzenxid_freeze = $node->safe_psql('postgres', "select datfrozenxid from pg_database where datname = 'postgres'"); +ok($datafronzenxid_freeze > $datafronzenxid, 'datfrozenxid is advanced'); + +# On Session 1, commit and ensure that the all notifications is received +my $res = $psql_session1->query_safe('commit;', "commit listen s;"); +my $notifications_count = 0; +foreach my $i (split('\n', $res)) +{ + $notifications_count++; + like($i, qr/Asynchronous notification "s" with payload "$notifications_count" received/); +} +is($notifications_count, 10, 'received all committed notifications'); + +done_testing(); diff --git a/src/test/modules/test_listen_notify/t/002_aborted_tx_notifies.pl b/src/test/modules/test_listen_notify/t/002_aborted_tx_notifies.pl new file mode 100644 index 00000000000..dae7a24f5b2 --- /dev/null +++ b/src/test/modules/test_listen_notify/t/002_aborted_tx_notifies.pl @@ -0,0 +1,79 @@ +# Copyright (c) 2024-2025, PostgreSQL Global Development Group + +use strict; +use warnings FATAL => 'all'; +use File::Path qw(mkpath); +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Test checks that listeners do not receive notifications from aborted +# transaction even if notifications have been added to the listen/notify +# queue. To reproduce it we use the fact that serializable conflicts +# are checked after tx adds notifications to the queue. + +my $node = PostgreSQL::Test::Cluster->new('node'); +$node->init; +$node->start; + +# Setup +$node->safe_psql('postgres', 'CREATE TABLE t1 (a bigserial);'); + +# Listener +my $psql_listener = $node->background_psql('postgres'); +$psql_listener->query_safe('LISTEN ch;'); + +# Session1. Start SERIALIZABLE tx and add a notification. +my $psql_session1 = $node->background_psql('postgres'); +$psql_session1->query_safe(" + BEGIN ISOLATION LEVEL SERIALIZABLE; + SELECT * FROM t1; + INSERT INTO t1 DEFAULT VALUES; + NOTIFY ch,'committed_0'; + NOTIFY ch,'committed_1'; +"); + +# Session2. Start SERIALIZABLE tx, add a notification and introduce a conflict +# with session1. +my $psql_session2 = $node->background_psql('postgres', on_error_stop => 0); +$psql_session2->query_safe(" + BEGIN ISOLATION LEVEL SERIALIZABLE; + SELECT * FROM t1; + INSERT INTO t1 DEFAULT VALUES; +"); + +# Send notifications that should not be eventually delivered, as session2 +# transaction will be aborted. +my $message = 'aborted_' . 'a' x 1000; +for (my $i = 0; $i < 10; $i++) { + $psql_session2->query_safe("NOTIFY ch, '$i$message'"); +} + +# Session1 should be committed successfully. Listeners must receive session1 +# notifications. +$psql_session1->query_safe("COMMIT;"); + +# Session2 should be aborted due to the conflict with session1. Transaction +# is aborted after adding notifications to the listen/notify queue, but +# listeners should not receive session2 notifications. +$psql_session2->query("COMMIT;"); + +# send more notifications after aborted +$node->safe_psql('postgres', "NOTIFY ch, 'committed_2';"); +$node->safe_psql('postgres', "NOTIFY ch, 'committed_3';"); + +# fetch notifications +my $res = $psql_listener->query_safe('begin; commit;'); + +# check received notifications +my @lines = split('\n', $res); +is(@lines, 4, 'received all committed notifications'); +for (my $i = 0; $i < 4; $i++) { + like($lines[$i], qr/Asynchronous notification "ch" with payload "committed_$i" received/); +} + +ok($psql_listener->quit); +ok($psql_session1->quit); +ok($psql_session2->quit); + +done_testing(); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 377a7946585..385bbb16d64 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -160,6 +160,7 @@ ArrayType AsyncQueueControl AsyncQueueEntry AsyncRequest +AtAbortNotifyInfo AttInMetadata AttStatsSlot AttoptCacheEntry -- 2.51.0