From 481aa2bd5b5ada647ffa57575c7b8436c3d17c40 Mon Sep 17 00:00:00 2001 From: Vitaly Davydov Date: Wed, 10 Jun 2026 12:12:54 +0900 Subject: [PATCH v5] Fix deadlock detector activation in a recovery conflict When the startup process in a deadlock with a backend, it sends the signal to the backend to trigger the deadlock detector when the deadlock timeout is elapsed (deadlock_timeout guc). Due to some optimization in timeout.c, when spontaneous SIGALRM signals are possible, which doesn't relate to any enabled timeout, the function ResolveRecoveryConflictWithBufferPin can never send the signal to the conflicting backend, becase the deadlock timeout will never be triggered. The patch fixes ResolveRecoveryConflictWithBufferPin by ignoring spontaneous SIGALRM signals, that are possible in the current implementation of timeout.c functionality. --- src/backend/storage/buffer/bufmgr.c | 77 +++++++++++++++++++++++++++-- src/backend/storage/ipc/standby.c | 70 ++++++++++++++++---------- src/include/storage/bufmgr.h | 1 + 3 files changed, 118 insertions(+), 30 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index d6c0cc1f6d4..2c3260e904a 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -3455,6 +3455,28 @@ WakePinCountWaiter(BufferDesc *buf) UnlockBufHdr(buf); } +/* + * Register the current process as the pincount waiter for a shared buffer. + * + * The caller must hold the buffer header lock, pass the current buffer state + * returned by LockBufHdr(), and ensure that no other backend is already + * registered as the waiter. + */ +static void +RegisterPinCountWaiter(BufferDesc *bufHdr, uint64 buf_state) +{ + Assert((buf_state & BM_PIN_COUNT_WAITER) == 0 || + bufHdr->wait_backend_pgprocno == MyProcNumber); + + if ((buf_state & BM_PIN_COUNT_WAITER) != 0 && + bufHdr->wait_backend_pgprocno != MyProcNumber) + elog(ERROR, "multiple processes attempting to wait for pincount 1"); + + bufHdr->wait_backend_pgprocno = MyProcNumber; + PinCountWaitBuf = bufHdr; + UnlockBufHdrExt(bufHdr, buf_state, BM_PIN_COUNT_WAITER, 0, 0); +} + /* * UnpinBuffer -- make buffer available for replacement. * @@ -4749,6 +4771,55 @@ BufferGetLSNAtomic(Buffer buffer) #endif } +/* + * BufferIsReadyForCleanup + * Recheck whether the startup process can retry cleanup lock acquisition. + * + * This is only for the hot-standby path in LockBufferForCleanup(), via + * ResolveRecoveryConflictWithBufferPin(), after ProcWaitForSignal() returns. + * The caller must already be registered as the shared buffer's + * BM_PIN_COUNT_WAITER. + * + * Returns true when the caller itself is the only remaining pin holder, so it + * can retry taking the cleanup lock. Returns false if other backends still + * pin the shared buffer. In that case, this function guarantees that the + * current backend remains registered as the pincount waiter to be woken when + * the buffer refcount drops to 1. + */ +bool +BufferIsReadyForCleanup(Buffer buffer) +{ + BufferDesc *bufHdr; + uint64 buf_state; + uint32 buf_refcount; + + Assert(BufferIsValid(buffer)); + Assert(!BufferIsLocal(buffer)); + + bufHdr = GetBufferDescriptor(buffer - 1); + Assert(PinCountWaitBuf == bufHdr); + + buf_state = LockBufHdr(bufHdr); + buf_refcount = BUF_STATE_GET_REFCOUNT(buf_state); + Assert(buf_refcount > 0); + Assert((buf_state & BM_PIN_COUNT_WAITER) == 0 || + bufHdr->wait_backend_pgprocno == MyProcNumber); + + if (buf_refcount == 1) + { + UnlockBufHdr(bufHdr); + return true; + } + + /* + * If other processes still pin the buffer, register this process again as + * the pincount waiter to wait again. + */ + RegisterPinCountWaiter(bufHdr, buf_state); + + return false; +} + /* --------------------------------------------------------------------- * DropRelationBuffers * @@ -6741,11 +6812,7 @@ LockBufferForCleanup(Buffer buffer) LockBuffer(buffer, BUFFER_LOCK_UNLOCK); elog(ERROR, "multiple backends attempting to wait for pincount 1"); } - bufHdr->wait_backend_pgprocno = MyProcNumber; - PinCountWaitBuf = bufHdr; - UnlockBufHdrExt(bufHdr, buf_state, - BM_PIN_COUNT_WAITER, 0, - 0); + RegisterPinCountWaiter(bufHdr, buf_state); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); /* Wait to be signaled by UnpinBuffer() */ diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index de9092fdf5b..f802870392a 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -790,14 +790,21 @@ cleanup: * Deadlocks are extremely rare, and relatively expensive to check for, * so we don't do a deadlock check right away ... only if we have had to wait * at least deadlock_timeout. + * + * The current process should be the waiter process and should have + * published the waited buffer via SetStartupBufferPinWaitBufId(). */ void ResolveRecoveryConflictWithBufferPin(void) { TimestampTz ltime; + int bufid; Assert(InHotStandby); + bufid = GetStartupBufferPinWaitBufId(); + Assert(bufid >= 0); + ltime = GetStandbyLimitTime(); if (GetCurrentTimestamp() >= ltime && ltime != 0) @@ -833,35 +840,48 @@ ResolveRecoveryConflictWithBufferPin(void) enable_timeouts(timeouts, cnt); } - /* - * Wait to be signaled by UnpinBuffer() or for the wait to be interrupted - * by one of the timeouts established above. - * - * We assume that only UnpinBuffer() and the timeout requests established - * above can wake us up here. WakeupRecovery() called by walreceiver or - * SIGHUP signal handler, etc cannot do that because it uses the different - * latch from that ProcWaitForSignal() waits on. - */ - ProcWaitForSignal(WAIT_EVENT_BUFFER_CLEANUP); - - if (got_standby_delay_timeout) - SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN); - else if (got_standby_deadlock_timeout) + for (;;) { /* - * Send out a request for hot-standby backends to check themselves for - * deadlocks. + * Wait to be signaled by UnpinBuffer() or for the wait to be + * interrupted by one of the timeouts established above. * - * XXX The subsequent ResolveRecoveryConflictWithBufferPin() will wait - * to be signaled by UnpinBuffer() again and send a request for - * deadlocks check if deadlock_timeout happens. This causes the - * request to continue to be sent every deadlock_timeout until the - * buffer is unpinned or ltime is reached. This would increase the - * workload in the startup process and backends. In practice it may - * not be so harmful because the period that the buffer is kept pinned - * is basically no so long. But we should fix this? + * ProcWaitForSignal() can also wake up for unrelated reasons, so + * recheck later whether cleanup can proceed. + */ + ProcWaitForSignal(WAIT_EVENT_BUFFER_CLEANUP); + + /* + * Once the reference count is 1, the waiter process itself is the + * only backend pinning the buffer at the moment. There is a chance to + * lock the buffer exclusively. */ - SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK); + if (BufferIsReadyForCleanup(bufid + 1)) + break; + + if (got_standby_delay_timeout) + { + SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN); + break; + } + else if (got_standby_deadlock_timeout) + { + /* + * Send out a request for hot-standby backends to check themselves + * for deadlocks. + * + * XXX The subsequent ResolveRecoveryConflictWithBufferPin() will + * wait to be signaled by UnpinBuffer() again and send a request + * for deadlocks check if deadlock_timeout happens. This causes + * the request to continue to be sent every deadlock_timeout until + * the buffer is unpinned or ltime is reached. This would increase + * the workload in the startup process and backends. In practice + * it may not be so harmful because the period that the buffer is + * kept pinned is basically no so long. But we should fix this? + */ + SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK); + break; + } } /* diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 6837b35fc6d..10519342194 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -311,6 +311,7 @@ extern void DropDatabaseBuffers(Oid dbid); extern bool BufferIsPermanent(Buffer buffer); extern XLogRecPtr BufferGetLSNAtomic(Buffer buffer); +extern bool BufferIsReadyForCleanup(Buffer buffer); extern void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum); -- 2.53.0