From 7ba4d92d461bb7e5d7cd8c0a9b0b4eebeb30f5f1 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Wed, 20 May 2026 14:20:52 +0300 Subject: [PATCH v2] Skip pg_database.dathasloginevt cleanup on standby EventTriggerOnLogin() tries to clear pg_database.dathasloginevt when the database no longer has any login event triggers but the flag is still set. To make that safe against concurrent flag setters, it takes a conditional AccessExclusiveLock on the database object. On a hot standby, that lock acquisition fails outright with FATAL: cannot acquire lock mode AccessExclusiveLock on database objects while recovery is in progress because LockAcquireExtended() refuses locks stronger than RowExclusiveLock on database objects during recovery. The standby already replays the flag's value from the primary, so the dangling flag is the result of replaying a state in which the primary had already dropped its login event triggers but not yet run a login event trigger pass to clear the flag. Any session connecting to the standby in that window therefore fails to connect. Skip the cleanup on a standby. The flag will be cleared via WAL replay once the primary clears it on its side. Add a recovery TAP test that reproduces the original report: create and drop a login event trigger on the primary in one session, wait for the standby to replay, then verify that a fresh connection to the standby succeeds. Backpatch to v17, where the login event triggers were introduced. Author: Ayush Tiwari Reported-by: Egor Chindyaskin Reviewed-by: Fujii Masao Reviewed-by: Alexander Korotkov Discussion: https://postgr.es/m/19488-d7ccfca2bf6b74b0%40postgresql.org Backpatch-through: 17 --- src/backend/commands/event_trigger.c | 10 ++- src/test/recovery/meson.build | 1 + .../t/053_standby_login_event_trigger.pl | 78 +++++++++++++++++++ 3 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 src/test/recovery/t/053_standby_login_event_trigger.pl diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c index dcd2f5a09bb..adc6eabc0f4 100644 --- a/src/backend/commands/event_trigger.c +++ b/src/backend/commands/event_trigger.c @@ -937,8 +937,16 @@ EventTriggerOnLogin(void) * lock to prevent concurrent SetDatabaseHasLoginEventTriggers(), but we * don't want to hang the connection waiting on the lock. Thus, we are * just trying to acquire the lock conditionally. + * + * Skip this on a hot standby: the conditional AccessExclusiveLock on the + * database object would fail with "cannot acquire lock mode ... while + * recovery is in progress", which the caller would surface as a FATAL + * connection error. On a standby, we cannot (and must not) clear the + * pg_database flag ourselves; it will be cleared via WAL replay once the + * primary's next login event trigger run clears it on the primary. */ - else if (ConditionalLockSharedObject(DatabaseRelationId, MyDatabaseId, + else if (!RecoveryInProgress() && + ConditionalLockSharedObject(DatabaseRelationId, MyDatabaseId, 0, AccessExclusiveLock)) { /* diff --git a/src/test/recovery/meson.build b/src/test/recovery/meson.build index 36d789720a3..9eb8ed11425 100644 --- a/src/test/recovery/meson.build +++ b/src/test/recovery/meson.build @@ -61,6 +61,7 @@ tests += { 't/050_redo_segment_missing.pl', 't/051_effective_wal_level.pl', 't/052_checkpoint_segment_missing.pl', + 't/053_standby_login_event_trigger.pl', ], }, } diff --git a/src/test/recovery/t/053_standby_login_event_trigger.pl b/src/test/recovery/t/053_standby_login_event_trigger.pl new file mode 100644 index 00000000000..ebd8cc278d3 --- /dev/null +++ b/src/test/recovery/t/053_standby_login_event_trigger.pl @@ -0,0 +1,78 @@ +# Copyright (c) 2026, PostgreSQL Global Development Group +# +# Verify that connecting to a standby still works after a login event +# trigger has been created and dropped on the primary. +# +# CREATE EVENT TRIGGER ... ON login sets pg_database.dathasloginevt to +# true on the primary, but DROP EVENT TRIGGER does not clear it -- the +# next login event trigger pass clears the flag lazily on the primary. +# That dangling flag replicates to the standby. Before the +# RecoveryInProgress() guard in EventTriggerOnLogin(), the standby +# tried to clear the flag itself, which requires AccessExclusiveLock +# on the database object; that lock mode is forbidden during recovery, +# so the new connection died with FATAL. + +use strict; +use warnings FATAL => 'all'; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Set up primary and a streaming standby. +my $primary = PostgreSQL::Test::Cluster->new('primary'); +$primary->init(allows_streaming => 1); +$primary->start; + +my $backup_name = 'login_evt_backup'; +$primary->backup($backup_name); + +my $standby = PostgreSQL::Test::Cluster->new('standby'); +$standby->init_from_backup($primary, $backup_name, has_streaming => 1); +$standby->start; + +# Sanity check: a fresh standby with dathasloginevt = false accepts +# connections without exercising the cleanup branch. +$standby->safe_psql('postgres', 'SELECT 1'); + +# Create and drop a login event trigger on the primary in a single +# session. CREATE EVENT TRIGGER sets pg_database.dathasloginevt = +# true; mark it ENABLE ALWAYS so the scenario matches the original +# bug report. After DROP the flag remains set on disk until a +# subsequent login on the primary clears it -- so we must not open +# another connection to the primary that would clear it before the +# WAL containing dathasloginevt = true is shipped to the standby. +$primary->safe_psql( + 'postgres', q{ +CREATE FUNCTION init_session() RETURNS event_trigger +LANGUAGE plpgsql AS $$ BEGIN RAISE NOTICE 'init_session'; END $$; +CREATE EVENT TRIGGER init_session ON login + EXECUTE FUNCTION init_session(); +ALTER EVENT TRIGGER init_session ENABLE ALWAYS; +DROP EVENT TRIGGER init_session; +DROP FUNCTION init_session(); +}); + +# Wait for the standby to replay the CREATE/DROP catalog state. At +# this point the standby's pg_database.dathasloginevt is still true. +$primary->wait_for_replay_catchup($standby); + +# A new connection to the standby exercises EventTriggerOnLogin()'s +# cleanup branch. With the RecoveryInProgress() guard, that branch is +# skipped on the standby and the connection succeeds. Without it the +# session aborts with a FATAL about AccessExclusiveLock. Probing the +# flag itself via safe_psql is what triggers the cleanup path. +is( $standby->safe_psql( + 'postgres', + "SELECT dathasloginevt FROM pg_database WHERE datname = 'postgres'"), + 't', + 'standby accepts connection and reports dangling dathasloginevt'); + +# Repeat with a non-die psql to make sure no FATAL is emitted. +my ($ret, $stdout, $stderr) = $standby->psql('postgres', 'SELECT 1'); +is($ret, 0, 'standby accepts second connection with dangling dathasloginevt'); +unlike( + $stderr, + qr/cannot acquire lock mode AccessExclusiveLock/, + 'no AccessExclusiveLock FATAL on standby login'); + +done_testing(); -- 2.39.5 (Apple Git-154)