From 4777798b041f6543b4e568d5c00d74d139b7f738 Mon Sep 17 00:00:00 2001
From: Vitaly Davydov <v.davydov@postgrespro.ru>
Date: Wed, 10 Jun 2026 12:12:54 +0900
Subject: [PATCH v6-REL_15_STABLE] Fix deadlock detector activation in a
 recovery conflict

When the startup process in a deadlock with a backend, it sends the
signal to the backend to trigger the deadlock detector when
the deadlock timeout is elapsed (deadlock_timeout guc). Due to some
optimization in timeout.c, when spontaneous SIGALRM signals are
possible, which doesn't relate to any enabled timeout, the function
ResolveRecoveryConflictWithBufferPin can never send the signal to the
conflicting backend, becase the deadlock timeout will never be
triggered.

The patch fixes ResolveRecoveryConflictWithBufferPin by ignoring
spontaneous SIGALRM signals, that are possible in the current
implementation of timeout.c functionality.
---
 src/backend/storage/buffer/bufmgr.c | 81 +++++++++++++++++++++++++++--
 src/backend/storage/ipc/standby.c   | 70 ++++++++++++++++---------
 src/include/storage/bufmgr.h        |  1 +
 3 files changed, 123 insertions(+), 29 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index a02814a8f51..224cb677626 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -1847,6 +1847,32 @@ PinBuffer_Locked(BufferDesc *buf)
 	ResourceOwnerRememberBuffer(CurrentResourceOwner, b);
 }
 
+/*
+ * Register the current process as the pincount waiter for a shared buffer.
+ *
+ * The caller must hold the buffer header lock, pass the current buffer state
+ * returned by LockBufHdr(), and ensure that no other backend is already
+ * registered as the waiter.
+ */
+static void
+RegisterPinCountWaiter(BufferDesc *bufHdr, uint64 buf_state)
+{
+	Assert((buf_state & BM_PIN_COUNT_WAITER) == 0 ||
+		   bufHdr->wait_backend_pgprocno == MyProc->pgprocno);
+
+	if ((buf_state & BM_PIN_COUNT_WAITER) != 0 &&
+		bufHdr->wait_backend_pgprocno != MyProc->pgprocno)
+	{
+		UnlockBufHdr(bufHdr, buf_state);
+		elog(ERROR, "multiple processes attempting to wait for pincount 1");
+	}
+
+	bufHdr->wait_backend_pgprocno = MyProc->pgprocno;
+	PinCountWaitBuf = bufHdr;
+	buf_state |= BM_PIN_COUNT_WAITER;
+	UnlockBufHdr(bufHdr, buf_state);
+}
+
 /*
  * UnpinBuffer -- make buffer available for replacement.
  *
@@ -3102,6 +3128,56 @@ BufferGetLSNAtomic(Buffer buffer)
 	return lsn;
 }
 
+/*
+ * BufferIsReadyForCleanup
+ *		Recheck whether the startup process can retry cleanup lock acquisition.
+ *
+ * This is only for the hot-standby path in LockBufferForCleanup(), via
+ * ResolveRecoveryConflictWithBufferPin(), after ProcWaitForSignal() returns.
+ * The caller must already be registered as the shared buffer's
+ * BM_PIN_COUNT_WAITER.
+ *
+ * Returns true when the caller itself is the only remaining pin holder, so it
+ * can retry taking the cleanup lock. Returns false if other backends still
+ * pin the shared buffer. In that case, this function guarantees that the
+ * current backend remains registered as the pincount waiter to be woken when
+ * the buffer refcount drops to 1.
+ */
+bool
+BufferIsReadyForCleanup(Buffer buffer)
+{
+	BufferDesc *bufHdr;
+	uint64		buf_state;
+	uint32		buf_refcount;
+
+	Assert(BufferIsValid(buffer));
+	Assert(!BufferIsLocal(buffer));
+
+	bufHdr = GetBufferDescriptor(buffer - 1);
+	Assert(PinCountWaitBuf == bufHdr);
+
+	buf_state = LockBufHdr(bufHdr);
+	buf_refcount = BUF_STATE_GET_REFCOUNT(buf_state);
+	Assert(buf_refcount > 0);
+	Assert((buf_state & BM_PIN_COUNT_WAITER) == 0 ||
+		   bufHdr->wait_backend_pgprocno == MyProc->pgprocno);
+
+	if (buf_refcount == 1)
+	{
+		buf_state &= ~BM_PIN_COUNT_WAITER;
+		UnlockBufHdr(bufHdr, buf_state);
+		return true;
+	}
+
+	/*
+	 * If other processes still pin the buffer, register this process again as
+	 * the pincount waiter to wait again.
+	 */
+	RegisterPinCountWaiter(bufHdr, buf_state);
+
+	return false;
+}
+
 /* ---------------------------------------------------------------------
  *		DropRelFileNodeBuffers
  *
@@ -4356,10 +4432,7 @@ LockBufferForCleanup(Buffer buffer)
 			LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 			elog(ERROR, "multiple backends attempting to wait for pincount 1");
 		}
-		bufHdr->wait_backend_pgprocno = MyProc->pgprocno;
-		PinCountWaitBuf = bufHdr;
-		buf_state |= BM_PIN_COUNT_WAITER;
-		UnlockBufHdr(bufHdr, buf_state);
+		RegisterPinCountWaiter(bufHdr, buf_state);
 		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 
 		/* Wait to be signaled by UnpinBuffer() */
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index a36eb80e9ae..2b029cdcf10 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -750,14 +750,21 @@ cleanup:
  * Deadlocks are extremely rare, and relatively expensive to check for,
  * so we don't do a deadlock check right away ... only if we have had to wait
  * at least deadlock_timeout.
+ *
+ * The current process should be the waiter process and should have
+ * published the waited buffer via SetStartupBufferPinWaitBufId().
  */
 void
 ResolveRecoveryConflictWithBufferPin(void)
 {
 	TimestampTz ltime;
+	int			bufid;
 
 	Assert(InHotStandby);
 
+	bufid = GetStartupBufferPinWaitBufId();
+	Assert(bufid >= 0);
+
 	ltime = GetStandbyLimitTime();
 
 	if (GetCurrentTimestamp() >= ltime && ltime != 0)
@@ -793,35 +800,48 @@ ResolveRecoveryConflictWithBufferPin(void)
 		enable_timeouts(timeouts, cnt);
 	}
 
-	/*
-	 * Wait to be signaled by UnpinBuffer() or for the wait to be interrupted
-	 * by one of the timeouts established above.
-	 *
-	 * We assume that only UnpinBuffer() and the timeout requests established
-	 * above can wake us up here. WakeupRecovery() called by walreceiver or
-	 * SIGHUP signal handler, etc cannot do that because it uses the different
-	 * latch from that ProcWaitForSignal() waits on.
-	 */
-	ProcWaitForSignal(PG_WAIT_BUFFER_PIN);
-
-	if (got_standby_delay_timeout)
-		SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
-	else if (got_standby_deadlock_timeout)
+	for (;;)
 	{
 		/*
-		 * Send out a request for hot-standby backends to check themselves for
-		 * deadlocks.
+		 * Wait to be signaled by UnpinBuffer() or for the wait to be
+		 * interrupted by one of the timeouts established above.
 		 *
-		 * XXX The subsequent ResolveRecoveryConflictWithBufferPin() will wait
-		 * to be signaled by UnpinBuffer() again and send a request for
-		 * deadlocks check if deadlock_timeout happens. This causes the
-		 * request to continue to be sent every deadlock_timeout until the
-		 * buffer is unpinned or ltime is reached. This would increase the
-		 * workload in the startup process and backends. In practice it may
-		 * not be so harmful because the period that the buffer is kept pinned
-		 * is basically no so long. But we should fix this?
+		 * ProcWaitForSignal() can also wake up for unrelated reasons, so
+		 * recheck later whether cleanup can proceed.
+		 */
+		ProcWaitForSignal(PG_WAIT_BUFFER_PIN);
+
+		/*
+		 * Once the reference count is 1, the waiter process itself is the
+		 * only backend pinning the buffer at the moment. There is a chance to
+		 * lock the buffer exclusively.
 		 */
-		SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+		if (BufferIsReadyForCleanup(bufid + 1))
+			break;
+
+		if (got_standby_delay_timeout)
+		{
+			SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
+			break;
+		}
+		else if (got_standby_deadlock_timeout)
+		{
+			/*
+			 * Send out a request for hot-standby backends to check themselves
+			 * for deadlocks.
+			 *
+			 * XXX The subsequent ResolveRecoveryConflictWithBufferPin() will
+			 * wait to be signaled by UnpinBuffer() again and send a request
+			 * for deadlocks check if deadlock_timeout happens. This causes
+			 * the request to continue to be sent every deadlock_timeout until
+			 * the buffer is unpinned or ltime is reached. This would increase
+			 * the workload in the startup process and backends. In practice
+			 * it may not be so harmful because the period that the buffer is
+			 * kept pinned is basically no so long. But we should fix this?
+			 */
+			SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+			break;
+		}
 	}
 
 	/*
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 8e93d315a67..d0a92541ed8 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -226,6 +226,7 @@ extern XLogRecPtr BufferGetLSNAtomic(Buffer buffer);
 extern void PrintPinnedBufs(void);
 #endif
 extern Size BufferShmemSize(void);
+extern bool BufferIsReadyForCleanup(Buffer buffer);
 extern void BufferGetTag(Buffer buffer, RelFileNode *rnode,
 						 ForkNumber *forknum, BlockNumber *blknum);
 
-- 
2.43.0