From a2287174d5c595752f7e5c447807af219c78582e Mon Sep 17 00:00:00 2001
From: Vitaly Davydov <v.davydov@postgrespro.ru>
Date: Wed, 20 May 2026 11:48:34 +0300
Subject: [PATCH 1/2] Fix deadlock detector activation in a recovery conflict

When the startup process in a deadlock with a backend, it sends the
signal to the backend to trigger the deadlock detector when
the deadlock timeout is elapsed (deadlock_timeout guc). Due to some
optimization in timeout.c, when spontaneous SIGALRM signals are
possible, which doesn't relate to any enabled timeout, the function
ResolveRecoveryConflictWithBufferPin can never send the signal to the
conflicting backend, becase the deadlock timeout will never be
triggered.

The patch fixes ResolveRecoveryConflictWithBufferPin by ignoring
spontaneous SIGALRM signals, that are possible in the current
implementation of timeout.c functionality.
---
 src/backend/storage/buffer/bufmgr.c | 23 ++++++++
 src/backend/storage/ipc/standby.c   | 81 ++++++++++++++++++++---------
 src/include/storage/bufmgr.h        |  1 +
 3 files changed, 80 insertions(+), 25 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index cc398db124d..7502b461786 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -4749,6 +4749,29 @@ BufferGetLSNAtomic(Buffer buffer)
 #endif
 }
 
+/*
+ * BufferGetRefCount
+ *		Return the current reference counter for a shared buffer.
+ */
+uint32
+BufferGetRefCount(Buffer buffer)
+{
+	BufferDesc *bufHdr;
+	uint64		buf_state;
+	uint32		buf_refcount;
+
+	Assert(BufferIsValid(buffer));
+	Assert(!BufferIsLocal(buffer));
+
+	bufHdr = GetBufferDescriptor(buffer - 1);
+
+	buf_state = LockBufHdr(bufHdr);
+	buf_refcount = BUF_STATE_GET_REFCOUNT(buf_state);
+	UnlockBufHdr(bufHdr);
+
+	return buf_refcount;
+}
+
 /* ---------------------------------------------------------------------
  *		DropRelationBuffers
  *
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index de9092fdf5b..8cea201df5f 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -790,14 +790,21 @@ cleanup:
  * Deadlocks are extremely rare, and relatively expensive to check for,
  * so we don't do a deadlock check right away ... only if we have had to wait
  * at least deadlock_timeout.
+ *
+ * The current process should be the waiter process and should have
+ * published the waited buffer via SetStartupBufferPinWaitBufId().
  */
 void
 ResolveRecoveryConflictWithBufferPin(void)
 {
 	TimestampTz ltime;
+	int			bufid;
 
 	Assert(InHotStandby);
 
+	bufid = GetStartupBufferPinWaitBufId();
+	Assert(bufid >= 0);
+
 	ltime = GetStandbyLimitTime();
 
 	if (GetCurrentTimestamp() >= ltime && ltime != 0)
@@ -833,35 +840,59 @@ ResolveRecoveryConflictWithBufferPin(void)
 		enable_timeouts(timeouts, cnt);
 	}
 
-	/*
-	 * Wait to be signaled by UnpinBuffer() or for the wait to be interrupted
-	 * by one of the timeouts established above.
-	 *
-	 * We assume that only UnpinBuffer() and the timeout requests established
-	 * above can wake us up here. WakeupRecovery() called by walreceiver or
-	 * SIGHUP signal handler, etc cannot do that because it uses the different
-	 * latch from that ProcWaitForSignal() waits on.
-	 */
-	ProcWaitForSignal(WAIT_EVENT_BUFFER_CLEANUP);
-
-	if (got_standby_delay_timeout)
-		SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN);
-	else if (got_standby_deadlock_timeout)
+	for (;;)
 	{
+		uint32		refcount;
+
 		/*
-		 * Send out a request for hot-standby backends to check themselves for
-		 * deadlocks.
+		 * Wait to be signaled by UnpinBuffer() or for the wait to be
+		 * interrupted by one of the timeouts established above.
 		 *
-		 * XXX The subsequent ResolveRecoveryConflictWithBufferPin() will wait
-		 * to be signaled by UnpinBuffer() again and send a request for
-		 * deadlocks check if deadlock_timeout happens. This causes the
-		 * request to continue to be sent every deadlock_timeout until the
-		 * buffer is unpinned or ltime is reached. This would increase the
-		 * workload in the startup process and backends. In practice it may
-		 * not be so harmful because the period that the buffer is kept pinned
-		 * is basically no so long. But we should fix this?
+		 * ProcWaitForSignal() can also wake up for unrelated reasons, so
+		 * recheck that the buffer is pinned by the current waiter process
+		 * only (reference counter should be 1). Continue waiting, if no
+		 * registered timeout is fired or the buffer is still pinned by other
+		 * processes as well.
+		 */
+		ProcWaitForSignal(WAIT_EVENT_BUFFER_CLEANUP);
+
+		/*
+		 * Once the reference count is 1, the waiter process itself is the
+		 * only backend pinning the buffer at the moment. There is a chance to
+		 * lock the buffer exclusively.
 		 */
-		SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK);
+		refcount = BufferGetRefCount(bufid + 1);
+		Assert(refcount > 0);
+
+		if (refcount == 0)
+			elog(ERROR,
+				 "buffer refcount dropped to zero while waiting for cleanup lock");
+		if (refcount == 1)
+			break;
+
+		if (got_standby_delay_timeout)
+		{
+			SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN);
+			break;
+		}
+		else if (got_standby_deadlock_timeout)
+		{
+			/*
+			 * Send out a request for hot-standby backends to check themselves
+			 * for deadlocks.
+			 *
+			 * XXX The subsequent ResolveRecoveryConflictWithBufferPin() will
+			 * wait to be signaled by UnpinBuffer() again and send a request
+			 * for deadlocks check if deadlock_timeout happens. This causes
+			 * the request to continue to be sent every deadlock_timeout until
+			 * the buffer is unpinned or ltime is reached. This would increase
+			 * the workload in the startup process and backends. In practice
+			 * it may not be so harmful because the period that the buffer is
+			 * kept pinned is basically no so long. But we should fix this?
+			 */
+			SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK);
+			break;
+		}
 	}
 
 	/*
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 6837b35fc6d..c38b620a3a1 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -311,6 +311,7 @@ extern void DropDatabaseBuffers(Oid dbid);
 
 extern bool BufferIsPermanent(Buffer buffer);
 extern XLogRecPtr BufferGetLSNAtomic(Buffer buffer);
+extern uint32 BufferGetRefCount(Buffer buffer);
 extern void BufferGetTag(Buffer buffer, RelFileLocator *rlocator,
 						 ForkNumber *forknum, BlockNumber *blknum);
 
-- 
2.43.0

