From b25d491a05a43fb7adf014b2580c71ec7adb75a2 Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Mon, 8 Aug 2016 16:43:35 -0700 Subject: [PATCH 1/2] Allow muliple backends to wait for cleanup lock. --- src/backend/storage/buffer/buf_init.c | 3 +- src/backend/storage/buffer/bufmgr.c | 57 +++++++++++++++++++++++------------ src/include/storage/buf_internals.h | 4 ++- src/include/storage/proc.h | 2 ++ 4 files changed, 45 insertions(+), 21 deletions(-) diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c index a4163cf..2aad030 100644 --- a/src/backend/storage/buffer/buf_init.c +++ b/src/backend/storage/buffer/buf_init.c @@ -134,7 +134,8 @@ InitBufferPool(void) CLEAR_BUFFERTAG(buf->tag); pg_atomic_init_u32(&buf->state, 0); - buf->wait_backend_pid = 0; + dlist_init(&buf->pin_count_waiters); + pg_atomic_write_u32(&buf->nwaiters, 0); buf->buf_id = i; diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 76ade37..f2f4ab9 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -38,6 +38,7 @@ #include "catalog/storage.h" #include "executor/instrument.h" #include "lib/binaryheap.h" +#include "lib/ilist.h" #include "miscadmin.h" #include "pg_trace.h" #include "pgstat.h" @@ -1730,15 +1731,19 @@ UnpinBuffer(BufferDesc *buf, bool fixOwner) */ buf_state = LockBufHdr(buf); - if ((buf_state & BM_PIN_COUNT_WAITER) && - BUF_STATE_GET_REFCOUNT(buf_state) == 1) + if (buf_state & BM_PIN_COUNT_WAITER) { - /* we just released the last pin other than the waiter's */ - int wait_backend_pid = buf->wait_backend_pid; + dlist_mutable_iter iter; - buf_state &= ~BM_PIN_COUNT_WAITER; + if (pg_atomic_read_u32(&buf->nwaiters) == 1) + buf_state &= ~BM_PIN_COUNT_WAITER; + + dlist_foreach_modify(iter, &buf->pin_count_waiters) + { + PGPROC *waiter = dlist_container(PGPROC, clWaitLink, iter.cur); + ProcSendSignal(waiter->pid); + } UnlockBufHdr(buf, buf_state); - ProcSendSignal(wait_backend_pid); } else UnlockBufHdr(buf, buf_state); @@ -3513,8 +3518,17 @@ UnlockBuffers(void) * got a cancel/die interrupt before getting the signal. */ if ((buf_state & BM_PIN_COUNT_WAITER) != 0 && - buf->wait_backend_pid == MyProcPid) - buf_state &= ~BM_PIN_COUNT_WAITER; + pg_atomic_read_u32(&buf->nwaiters) == 1) + { + dlist_mutable_iter iter; + + dlist_foreach_modify(iter, &buf->pin_count_waiters) + { + PGPROC *waiter = dlist_container(PGPROC, clWaitLink, iter.cur); + if (waiter->pid == MyProcPid) + buf_state &= ~BM_PIN_COUNT_WAITER; + } + } UnlockBufHdr(buf, buf_state); @@ -3616,20 +3630,24 @@ LockBufferForCleanup(Buffer buffer) buf_state = LockBufHdr(bufHdr); Assert(BUF_STATE_GET_REFCOUNT(buf_state) > 0); - if (BUF_STATE_GET_REFCOUNT(buf_state) == 1) + /* + * If refcount == 1 then we can break immediately. + * In case of refcount > 1, if refcount == (nwaiters + 1) then break. + * Because refcount include other processes and itself, but nwaiters + * includes only other processes. + */ + if (BUF_STATE_GET_REFCOUNT(buf_state) == 1 || + ((BUF_STATE_GET_REFCOUNT(buf_state) - 1)== + pg_atomic_read_u32(&bufHdr->nwaiters))) { /* Successfully acquired exclusive lock with pincount 1 */ UnlockBufHdr(bufHdr, buf_state); return; } /* Failed, so mark myself as waiting for pincount 1 */ - if (buf_state & BM_PIN_COUNT_WAITER) - { - UnlockBufHdr(bufHdr, buf_state); - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - elog(ERROR, "multiple backends attempting to wait for pincount 1"); - } - bufHdr->wait_backend_pid = MyProcPid; + pg_atomic_fetch_add_u32(&bufHdr->nwaiters, 1); + dlist_push_tail(&bufHdr->pin_count_waiters, &MyProc->clWaitLink); + PinCountWaitBuf = bufHdr; buf_state |= BM_PIN_COUNT_WAITER; UnlockBufHdr(bufHdr, buf_state); @@ -3662,9 +3680,10 @@ LockBufferForCleanup(Buffer buffer) * better be safe. */ buf_state = LockBufHdr(bufHdr); - if ((buf_state & BM_PIN_COUNT_WAITER) != 0 && - bufHdr->wait_backend_pid == MyProcPid) - buf_state &= ~BM_PIN_COUNT_WAITER; + + dlist_delete(&MyProc->clWaitLink); + pg_atomic_fetch_sub_u32(&bufHdr->nwaiters, 1); + UnlockBufHdr(bufHdr, buf_state); PinCountWaitBuf = NULL; diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index e0dfb2f..90fcbd7 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -182,7 +182,9 @@ typedef struct BufferDesc /* state of the tag, containing flags, refcount and usagecount */ pg_atomic_uint32 state; - int wait_backend_pid; /* backend PID of pin-count waiter */ + dlist_head pin_count_waiters; /* backend PIDs of pin-count waiters */ + pg_atomic_uint32 nwaiters; + int freeNext; /* link in freelist chain */ LWLock content_lock; /* to lock access to buffer contents */ diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index f576f05..4cd9416 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -123,6 +123,8 @@ struct PGPROC LOCKMASK heldLocks; /* bitmask for lock types already held on this * lock object by this backend */ + dlist_node clWaitLink; /* position in Cleanup Lock wait list */ + /* * Info to allow us to wait for synchronous replication, if needed. * waitLSN is InvalidXLogRecPtr if not waiting; set only by user backend. -- 2.8.1