From c1d7d0899b1ae2b038faa6ff0875159af1f1292e Mon Sep 17 00:00:00 2001 From: alterego655 <824662526@qq.com> Date: Tue, 23 Jun 2026 23:10:19 +0800 Subject: [PATCH v2] bufmgr: avoid waiting without a guaranteed pin-count wakeup Since 5310fac6e0f and c75ebc657ffc, shared buffer pins can be released while BM_LOCKED is set. That allows the last conflicting pin to disappear after LockBufferForCleanup() observes a refcount greater than one, but before BM_PIN_COUNT_WAITER becomes visible to unpinners. In that ordering, the unpin does not request a wakeup, and LockBufferForCleanup() can go to sleep even though the cleanup-lock condition has already been satisfied. Publish BM_PIN_COUNT_WAITER while retaining the buffer header lock, then recheck the refcount before releasing the content lock. If only our pin remains, clear the waiter state and return with the cleanup lock already acquired. Otherwise, release the locks and wait as before. --- src/backend/storage/buffer/bufmgr.c | 67 ++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 21 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index d6c0cc1f6d4..8c7ce012a67 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -6715,24 +6715,7 @@ LockBufferForCleanup(Buffer buffer) { /* Successfully acquired exclusive lock with pincount 1 */ UnlockBufHdr(bufHdr); - - /* - * Emit the log message if recovery conflict on buffer pin was - * resolved but the startup process waited longer than - * deadlock_timeout for it. - */ - if (logged_recovery_conflict) - LogRecoveryConflict(RECOVERY_CONFLICT_BUFFERPIN, - waitStart, GetCurrentTimestamp(), - NULL, false); - - if (waiting) - { - /* reset ps display to remove the suffix if we added one */ - set_ps_display_remove_suffix(); - waiting = false; - } - return; + goto cleanup_lock_acquired; } /* Failed, so mark myself as waiting for pincount 1 */ if (buf_state & BM_PIN_COUNT_WAITER) @@ -6743,9 +6726,32 @@ LockBufferForCleanup(Buffer buffer) } bufHdr->wait_backend_pgprocno = MyProcNumber; PinCountWaitBuf = bufHdr; - UnlockBufHdrExt(bufHdr, buf_state, - BM_PIN_COUNT_WAITER, 0, - 0); + + /* + * Publish BM_PIN_COUNT_WAITER while retaining the buffer header lock. + * The shared refcount can be decremented while BM_LOCKED is set, so + * use an atomic operation that preserves concurrent refcount changes. + */ + pg_atomic_fetch_or_u64(&bufHdr->state, BM_PIN_COUNT_WAITER); + + /* + * Recheck the refcount after publishing the waiter flag, while shared + * refcount increments are still prevented by BM_LOCKED. If only our + * pin remains, the cleanup-lock condition has already been satisfied, + * so remove the waiter state and return without sleeping. + */ + buf_state = pg_atomic_read_u64(&bufHdr->state); + + if (BUF_STATE_GET_REFCOUNT(buf_state) == 1) + { + UnlockBufHdrExt(bufHdr, buf_state, + 0, BM_PIN_COUNT_WAITER, + 0); + PinCountWaitBuf = NULL; + goto cleanup_lock_acquired; + } + + UnlockBufHdr(bufHdr); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); /* Wait to be signaled by UnpinBuffer() */ @@ -6816,6 +6822,25 @@ LockBufferForCleanup(Buffer buffer) PinCountWaitBuf = NULL; /* Loop back and try again */ } + +cleanup_lock_acquired: + + /* + * Emit the log message if recovery conflict on buffer pin was resolved + * but the startup process waited longer than deadlock_timeout for it. + */ + if (logged_recovery_conflict) + LogRecoveryConflict(RECOVERY_CONFLICT_BUFFERPIN, + waitStart, GetCurrentTimestamp(), + NULL, false); + + if (waiting) + { + /* reset ps display to remove the suffix if we added one */ + set_ps_display_remove_suffix(); + waiting = false; + } + return; } /* -- 2.51.0