From 385cbfe08d0e780c350bad78c9d1e4f283294a21 Mon Sep 17 00:00:00 2001 From: "masao.fujii" Date: Wed, 10 Jun 2026 12:12:54 +0900 Subject: [PATCH v4 1/2] Fix deadlock detector activation in a recovery conflict When the startup process in a deadlock with a backend, it sends the signal to the backend to trigger the deadlock detector when the deadlock timeout is elapsed (deadlock_timeout guc). Due to some optimization in timeout.c, when spontaneous SIGALRM signals are possible, which doesn't relate to any enabled timeout, the function ResolveRecoveryConflictWithBufferPin can never send the signal to the conflicting backend, becase the deadlock timeout will never be triggered. The patch fixes ResolveRecoveryConflictWithBufferPin by ignoring spontaneous SIGALRM signals, that are possible in the current implementation of timeout.c functionality. --- src/backend/storage/buffer/bufmgr.c | 75 +++++++++++++++++++++++++++-- src/backend/storage/ipc/standby.c | 71 +++++++++++++++++---------- src/include/storage/bufmgr.h | 1 + 3 files changed, 117 insertions(+), 30 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index cc398db124d..55fe467315e 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -3455,6 +3455,28 @@ WakePinCountWaiter(BufferDesc *buf) UnlockBufHdr(buf); } +/* + * Register the current process as the pincount waiter for a shared buffer. + * + * The caller must hold the buffer header lock, pass the current buffer state + * returned by LockBufHdr(), and ensure that no other backend is already + * registered as the waiter. + */ +static void +RegisterPinCountWaiter(BufferDesc *bufHdr, uint64 buf_state) +{ + Assert((buf_state & BM_PIN_COUNT_WAITER) == 0 || + bufHdr->wait_backend_pgprocno == MyProcNumber); + + if ((buf_state & BM_PIN_COUNT_WAITER) != 0 && + bufHdr->wait_backend_pgprocno != MyProcNumber) + elog(ERROR, "multiple processes attempting to wait for pincount 1"); + + bufHdr->wait_backend_pgprocno = MyProcNumber; + PinCountWaitBuf = bufHdr; + UnlockBufHdrExt(bufHdr, buf_state, BM_PIN_COUNT_WAITER, 0, 0); +} + /* * UnpinBuffer -- make buffer available for replacement. * @@ -4749,6 +4771,53 @@ BufferGetLSNAtomic(Buffer buffer) #endif } +/* + * BufferShouldWaitForCleanupLockForStandby + * Recheck whether the startup process still needs to wait. + * + * This is only for the hot-standby path in ResolveRecoveryConflictWithBufferPin() + * after ProcWaitForSignal() returns. + * + * Returns true if other backends still pin the shared buffer. In that case, + * this function guarantees that the current backend remains registered as the + * pincount waiter to be woken when the buffer refcount drops to 1. Returns + * false when the caller itself is the only remaining pin holder, so the + * caller can retry taking the cleanup lock. + */ +bool +BufferShouldWaitForCleanupLockForStandby(Buffer buffer) +{ + BufferDesc *bufHdr; + uint64 buf_state; + uint32 buf_refcount; + + Assert(BufferIsValid(buffer)); + Assert(!BufferIsLocal(buffer)); + + bufHdr = GetBufferDescriptor(buffer - 1); + Assert(PinCountWaitBuf == bufHdr); + + buf_state = LockBufHdr(bufHdr); + buf_refcount = BUF_STATE_GET_REFCOUNT(buf_state); + Assert(buf_refcount > 0); + Assert((buf_state & BM_PIN_COUNT_WAITER) == 0 || + bufHdr->wait_backend_pgprocno == MyProcNumber); + + if (buf_refcount == 1) + { + UnlockBufHdr(bufHdr); + return false; + } + + /* + * If other processes still pin the buffer, register this process again as + * the pincount waiter to wait agan. + */ + RegisterPinCountWaiter(bufHdr, buf_state); + + return true; +} + /* --------------------------------------------------------------------- * DropRelationBuffers * @@ -6741,11 +6810,7 @@ LockBufferForCleanup(Buffer buffer) LockBuffer(buffer, BUFFER_LOCK_UNLOCK); elog(ERROR, "multiple backends attempting to wait for pincount 1"); } - bufHdr->wait_backend_pgprocno = MyProcNumber; - PinCountWaitBuf = bufHdr; - UnlockBufHdrExt(bufHdr, buf_state, - BM_PIN_COUNT_WAITER, 0, - 0); + RegisterPinCountWaiter(bufHdr, buf_state); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); /* Wait to be signaled by UnpinBuffer() */ diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index de9092fdf5b..17e80f0d624 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -790,14 +790,21 @@ cleanup: * Deadlocks are extremely rare, and relatively expensive to check for, * so we don't do a deadlock check right away ... only if we have had to wait * at least deadlock_timeout. + * + * The current process should be the waiter process and should have + * published the waited buffer via SetStartupBufferPinWaitBufId(). */ void ResolveRecoveryConflictWithBufferPin(void) { TimestampTz ltime; + int bufid; Assert(InHotStandby); + bufid = GetStartupBufferPinWaitBufId(); + Assert(bufid >= 0); + ltime = GetStandbyLimitTime(); if (GetCurrentTimestamp() >= ltime && ltime != 0) @@ -833,35 +840,49 @@ ResolveRecoveryConflictWithBufferPin(void) enable_timeouts(timeouts, cnt); } - /* - * Wait to be signaled by UnpinBuffer() or for the wait to be interrupted - * by one of the timeouts established above. - * - * We assume that only UnpinBuffer() and the timeout requests established - * above can wake us up here. WakeupRecovery() called by walreceiver or - * SIGHUP signal handler, etc cannot do that because it uses the different - * latch from that ProcWaitForSignal() waits on. - */ - ProcWaitForSignal(WAIT_EVENT_BUFFER_CLEANUP); - - if (got_standby_delay_timeout) - SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN); - else if (got_standby_deadlock_timeout) + for (;;) { /* - * Send out a request for hot-standby backends to check themselves for - * deadlocks. + * Wait to be signaled by UnpinBuffer() or for the wait to be + * interrupted by one of the timeouts established above. * - * XXX The subsequent ResolveRecoveryConflictWithBufferPin() will wait - * to be signaled by UnpinBuffer() again and send a request for - * deadlocks check if deadlock_timeout happens. This causes the - * request to continue to be sent every deadlock_timeout until the - * buffer is unpinned or ltime is reached. This would increase the - * workload in the startup process and backends. In practice it may - * not be so harmful because the period that the buffer is kept pinned - * is basically no so long. But we should fix this? + * ProcWaitForSignal() can also wake up for unrelated reasons, so + * recheck later that the buffer is pinned by the current waiter + * process only (reference counter should be 1). + */ + ProcWaitForSignal(WAIT_EVENT_BUFFER_CLEANUP); + + /* + * Once the reference count is 1, the waiter process itself is the + * only backend pinning the buffer at the moment. There is a chance to + * lock the buffer exclusively. */ - SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK); + if (!BufferShouldWaitForCleanupLockForStandby(bufid + 1)) + break; + + if (got_standby_delay_timeout) + { + SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN); + break; + } + else if (got_standby_deadlock_timeout) + { + /* + * Send out a request for hot-standby backends to check themselves + * for deadlocks. + * + * XXX The subsequent ResolveRecoveryConflictWithBufferPin() will + * wait to be signaled by UnpinBuffer() again and send a request + * for deadlocks check if deadlock_timeout happens. This causes + * the request to continue to be sent every deadlock_timeout until + * the buffer is unpinned or ltime is reached. This would increase + * the workload in the startup process and backends. In practice + * it may not be so harmful because the period that the buffer is + * kept pinned is basically no so long. But we should fix this? + */ + SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK); + break; + } } /* diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 6837b35fc6d..999862825ae 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -311,6 +311,7 @@ extern void DropDatabaseBuffers(Oid dbid); extern bool BufferIsPermanent(Buffer buffer); extern XLogRecPtr BufferGetLSNAtomic(Buffer buffer); +extern bool BufferShouldWaitForCleanupLockForStandby(Buffer buffer); extern void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum); -- 2.53.0