From 15f90c7761680d8eab7b1315b4a37c17a2f68ccc Mon Sep 17 00:00:00 2001 From: alterego655 <824662526@qq.com> Date: Wed, 6 May 2026 16:40:55 +0800 Subject: [PATCH v2] Fix WAIT FOR LSN cleanup on subtransaction abort WAIT FOR LSN registers the current backend in shared memory before entering an interruptible wait loop. Top-level abort and backend exit already call WaitLSNCleanup(), but subtransaction abort did not. If an interrupt such as statement_timeout occurred while waiting inside a savepoint, rolling back to the savepoint left the backend marked as present in the WAIT FOR LSN heap. Clean up WAIT FOR LSN state from AbortSubTransaction() as well, and add a TAP test covering reuse of WAIT FOR LSN after a savepoint rollback. --- src/backend/access/transam/xact.c | 5 +++ src/backend/access/transam/xlogwait.c | 2 +- src/test/recovery/t/049_wait_for_lsn.pl | 48 +++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 48bc90c9673..5586fbe5b07 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -5289,6 +5289,11 @@ AbortSubTransaction(void) */ LWLockReleaseAll(); + /* + * Cleanup waiting for LSN if any. + */ + WaitLSNCleanup(); + pgstat_report_wait_end(); pgstat_progress_end_command(); diff --git a/src/backend/access/transam/xlogwait.c b/src/backend/access/transam/xlogwait.c index 18f78338330..582dde3b061 100644 --- a/src/backend/access/transam/xlogwait.c +++ b/src/backend/access/transam/xlogwait.c @@ -360,7 +360,7 @@ WaitLSNWakeup(WaitLSNType lsnType, XLogRecPtr currentLSN) } /* - * Clean up LSN waiters for exiting process + * Clean up any LSN wait state for the current process. */ void WaitLSNCleanup(void) diff --git a/src/test/recovery/t/049_wait_for_lsn.pl b/src/test/recovery/t/049_wait_for_lsn.pl index 9f8af351ba8..a51e31926f5 100644 --- a/src/test/recovery/t/049_wait_for_lsn.pl +++ b/src/test/recovery/t/049_wait_for_lsn.pl @@ -213,6 +213,54 @@ $output = $node_standby->safe_psql( WAIT FOR LSN '${lsn3}' WITH (timeout '10ms', no_throw);]); ok($output eq "timeout", "WAIT FOR returns correct status after timeout"); +# 4a. Check that aborting a subtransaction during WAIT FOR LSN cleans up +# shared wait-state. Poll pg_stat_activity before canceling the first WAIT +# FOR to ensure that the backend has registered itself in the waiters heap. +# After rolling back to the savepoint, a second WAIT FOR in the same backend +# must be able to register itself again. +my $subxact_lsn = $node_primary->safe_psql('postgres', + "SELECT pg_current_wal_insert_lsn() + 10000000000"); +my $subxact_appname = 'wait_for_lsn_subxact_cleanup'; +my $subxact_session = + $node_primary->background_psql('postgres', on_error_stop => 0); +$subxact_session->query_until( + qr/start/, qq[ + SET application_name = '$subxact_appname'; + BEGIN; + SAVEPOINT wait_cleanup; + \\echo start + WAIT FOR LSN '${subxact_lsn}' WITH (MODE 'primary_flush'); + ROLLBACK TO wait_cleanup; + WAIT FOR LSN '${subxact_lsn}' + WITH (MODE 'primary_flush', timeout '10ms', no_throw); + COMMIT; +]); +$node_primary->poll_query_until( + 'postgres', + "SELECT count(*) = 1 FROM pg_stat_activity + WHERE application_name = '$subxact_appname' + AND wait_event = 'WaitForWalFlush'" +) or die "WAIT FOR LSN did not enter the primary_flush wait path"; +my $subxact_cancelled = $node_primary->safe_psql( + 'postgres', + "SELECT pg_cancel_backend(pid) FROM pg_stat_activity + WHERE application_name = '$subxact_appname' + AND wait_event = 'WaitForWalFlush'" +); +is($subxact_cancelled, 't', "canceled WAIT FOR LSN in subtransaction"); +$subxact_session->quit; +chomp($subxact_session->{stdout}); +like( + $subxact_session->{stderr}, + qr/canceling statement due to user request/, + "query cancel interrupted WAIT FOR LSN in subtransaction"); +is($subxact_session->{stdout}, + "timeout", "second WAIT FOR LSN timed out after savepoint rollback"); +unlike( + $subxact_session->{stderr}, + qr/server closed the connection unexpectedly/, + "WAIT FOR LSN after savepoint rollback did not disconnect"); + # 5. Check mode validation: standby modes error on primary, primary mode errors # on standby, and primary_flush works on primary. Also check that WAIT FOR # triggers an error if called within a function, procedure, anonymous DO block, -- 2.51.0