From dfdb8e0e37ecbce515bbff52bf742e47e0d063a6 Mon Sep 17 00:00:00 2001
From: Vitaly Davydov <v.davydov@postgrespro.ru>
Date: Tue, 9 Jun 2026 18:23:28 +0300
Subject: [PATCH 2/2] Add new deadlock conflict test in
 031_recovery_conflict.pl

It checks that the deadlock detector is triggered on the hot standby's
client backend process when a deadlock with the startup process occurs
in the scenario of an interference of log_startup_process_interval and
deadlock_timeout timeouts.

An infinite deadlock occurs between the standby's startup process and a
backend process when handling an XLOG_PRUNE_PAGE record. The issue arises
because the startup process fails to trigger the deadlock detector in the
conflicting backend.
---
 src/test/recovery/t/031_recovery_conflict.pl | 100 +++++++++++++++++++
 1 file changed, 100 insertions(+)

diff --git a/src/test/recovery/t/031_recovery_conflict.pl b/src/test/recovery/t/031_recovery_conflict.pl
index 7a740f69806..cac78443d0c 100644
--- a/src/test/recovery/t/031_recovery_conflict.pl
+++ b/src/test/recovery/t/031_recovery_conflict.pl
@@ -307,6 +307,106 @@ $psql_standby->quit;
 $node_standby->stop();
 $node_primary->stop();
 
+## RECOVERY CONFLICT 6: Startup and backend infinite deadlock on hot standby
+#
+# An infinite deadlock occurs between the standby's startup process and a
+# backend process when handling an XLOG_PRUNE_PAGE record. The issue arises
+# because the startup process fails to trigger the deadlock detector in the
+# conflicting backend.
+#
+# The root cause is interference between two timeouts: deadlock_timeout and
+# log_startup_progress_interval. Due to this interference, the
+# deadlock_timeout event never fires for the startup process.
+#
+# This behavior is a side effect of the timeout optimization logic implemented
+# in timeout.c. While this optimization improves performance by coalescing
+# timer events, it introduces a requirement that all timeout handlers must
+# verify if their specific timeout was indeed triggered before proceeding. In
+# this case, the handler responsible for invoking the deadlock detector does
+# not correctly account for this interference, leading to the failure.
+#
+# Discussion: https://www.postgresql.org/message-id/flat/44c24dcf-5710-410f-b1b6-d10b315f3d51%40postgrespro.ru
+#
+
+$test_db = 'postgres';
+$table1 = "test_startup_backend_deadlock_table_1";
+$table2 = "test_startup_backend_deadlock_table_2";
+
+$node_primary->append_conf('postgresql.conf', qq[
+	max_standby_streaming_delay = -1
+	log_startup_progress_interval = 2000
+	deadlock_timeout = 3000
+	autovacuum = off
+]);
+
+$node_primary->start();
+
+$node_standby->append_conf('postgresql.conf', qq[
+	max_standby_streaming_delay = -1
+	log_startup_progress_interval = 2000
+	deadlock_timeout = 3000
+	autovacuum = off
+]);
+
+$node_standby->start();
+
+$log_location = -s $node_standby->logfile;
+
+$node_primary->safe_psql('postgres', qq[
+	CREATE TABLE $table1(a int, b int);
+	CREATE TABLE $table2(a int, b int);
+	INSERT INTO $table1 VALUES (1);
+]);
+
+# Generate a few dead rows, to later be cleaned up by vacuum. Then acquire a
+# lock on another relation in a prepared xact, so it's held continuously by
+# the startup process. The standby psql will block acquiring that lock while
+# holding a pin that vacuum needs, triggering the deadlock.
+$node_primary->safe_psql($test_db, qq[
+	BEGIN;
+	INSERT INTO $table1(a) SELECT generate_series(1, 100) i;
+	ROLLBACK;
+]);
+
+$node_primary->safe_psql($test_db, qq[
+	BEGIN;
+	LOCK TABLE $table2;
+	PREPARE TRANSACTION 'lock';
+	INSERT INTO $table1(a) VALUES (170);
+	SELECT txid_current();
+]);
+
+$node_primary->wait_for_catchup($node_standby, 'replay', $node_primary->lsn('write'));
+
+$psql_standby = $node_standby->background_psql($test_db, on_error_stop => 0);
+
+$psql_standby->query_until(qr/^1$/m, qq[
+	BEGIN;
+	-- hold pin
+	DECLARE test_recovery_conflict_cursor CURSOR FOR SELECT a FROM $table1;
+	FETCH FORWARD FROM test_recovery_conflict_cursor;
+	-- wait for lock held by prepared transaction
+	SELECT * FROM $table2;
+]);
+
+ok(1, "cursor holding conflicting pin, also waiting for lock, established");
+
+# VACUUM will prune away rows, causing a buffer pin conflict, while standby
+# psql is waiting on lock.
+$node_primary->safe_psql($test_db, qq[VACUUM $table1;]);
+
+# Wait and check that the deadlock detector was triggered and found a deadlock
+# in the backend process (not in startup process).
+note("Waiting for deadlock detector to launch...");
+check_conflict_log("User transaction caused buffer deadlock with recovery.");
+
+# clean up for next tests
+$node_primary->safe_psql($test_db, qq[ROLLBACK PREPARED 'lock';]);
+
+$psql_standby->quit();
+
+$node_standby->stop();
+$node_primary->stop();
 
 done_testing();
 
-- 
2.43.0

