From c77f2755dca4dec83a5f16df634b07d3e8b91a96 Mon Sep 17 00:00:00 2001 From: Zhiguo Zhou Date: Thu, 29 May 2025 16:55:42 +0800 Subject: [PATCH v1] Optimize shared LWLock acquisition for high-core-count systems This patch introduces optimizations to reduce lock acquisition overhead in LWLock by merging the read and update operations for the LW_SHARED lock's state. This eliminates the need for separate atomic instructions, which is critical for improving performance on high-core-count systems. Key changes: - Extended LW_SHARED_MASK by 1 bit and shifted LW_VAL_EXCLUSIVE by 1 bit to ensure compatibility with the upper bound of MAX_BACKENDS * 2. - Added a `willwait` parameter to `LWLockAttemptLock` to disable the optimization when the caller is unwilling to wait, avoiding conflicts between the reference count and the LW_VAL_EXCLUSIVE flag. - Updated `LWLockReleaseInternal` to use `pg_atomic_fetch_and_u32` for clearing lock state flags atomically. - Adjusted related functions (`LWLockAcquire`, `LWLockConditionalAcquire`, `LWLockAcquireOrWait`) to pass the `willwait` parameter appropriately. Key optimization ideas: It is only activated when willwait=true, ensuring that the reference count does not grow unchecked and overflow into the LW_VAL_EXCLUSIVE bit. Three scenarios can occur when acquiring a shared lock: 1) Lock is free: atomically increment reference count and acquire 2) Lock held in shared mode: atomically increment reference count and acquire 3) Lock held exclusively: atomically increment reference count but fail to acquire Scenarios 1 and 2 work as expected - we successfully increment the count and acquire the lock. Scenario 3 is counterintuitive: we increment the reference count even though we cannot acquire the lock due to the exclusive holder. This creates a temporarily invalid reference count, but it's acceptable because: - The LW_VAL_EXCLUSIVE flag takes precedence in determining lock state - Each process retries at most twice before blocking on a semaphore - This bounds the "overcounted" references to MAX_BACKENDS * 2 - The bound fits within LW_SHARED_MASK capacity - The lock->state including "overcounted" references is reset when the exclusive lock is released. These changes improve scalability and reduce contention in workloads with frequent LWLock operations on servers with many cores. --- src/backend/storage/lmgr/lwlock.c | 104 +++++++++++++++++++++++++----- 1 file changed, 88 insertions(+), 16 deletions(-) diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 46f44bc4511..613a35021b8 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -97,20 +97,41 @@ #define LW_FLAG_BITS 3 #define LW_FLAG_MASK (((1<state); + uint32 excl = (state & LW_VAL_EXCLUSIVE) != 0; + uint32 shared = excl ? 0 : state & LW_SHARED_MASK; ereport(LOG, (errhidestmt(true), @@ -284,8 +307,8 @@ PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode) errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u rOK %d", MyProcPid, where, T_NAME(lock), lock, - (state & LW_VAL_EXCLUSIVE) != 0, - state & LW_SHARED_MASK, + excl, + shared, (state & LW_FLAG_HAS_WAITERS) != 0, pg_atomic_read_u32(&lock->nwaiters), (state & LW_FLAG_RELEASE_OK) != 0))); @@ -790,14 +813,53 @@ GetLWLockIdentifier(uint32 classId, uint16 eventId) * This function will not block waiting for a lock to become free - that's the * caller's job. * + * willwait: true if the caller is willing to wait for the lock to become free + * false if the caller is not willing to wait. + * * Returns true if the lock isn't free and we need to wait. */ static bool -LWLockAttemptLock(LWLock *lock, LWLockMode mode) +LWLockAttemptLock(LWLock *lock, LWLockMode mode, bool willwait) { uint32 old_state; Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED); + /* + * Optimized shared lock acquisition using atomic fetch-and-add. + * + * This optimization aims to lower the cost of acquiring shared locks + * by reducing the number of atomic operations, which can be expensive + * on systems with many CPU cores. + * + * It is only activated when willwait=true, ensuring that the reference + * count does not grow unchecked and overflow into the LW_VAL_EXCLUSIVE bit. + * + * Three scenarios can occur when acquiring a shared lock: + * 1) Lock is free: atomically increment reference count and acquire + * 2) Lock held in shared mode: atomically increment reference count and acquire + * 3) Lock held exclusively: atomically increment reference count but fail to acquire + * + * Scenarios 1 and 2 work as expected - we successfully increment the count + * and acquire the lock. + * + * Scenario 3 is counterintuitive: we increment the reference count even though + * we cannot acquire the lock due to the exclusive holder. This creates a + * temporarily invalid reference count, but it's acceptable because: + * - The LW_VAL_EXCLUSIVE flag takes precedence in determining lock state + * - Each process retries at most twice before blocking on a semaphore + * - This bounds the "overcounted" references to MAX_BACKENDS * 2 + * - The bound fits within LW_SHARED_MASK capacity + * - The lock->state including "overcounted" references is reset when the exclusive + * lock is released. + * + * See LW_SHARED_MASK definition comments for additional details. + */ + if (willwait && mode == LW_SHARED) + { + old_state = pg_atomic_fetch_add_u32(&lock->state, LW_VAL_SHARED); + Assert((old_state & LW_LOCK_MASK) != LW_LOCK_MASK); + return (old_state & LW_VAL_EXCLUSIVE) != 0; + } /* * Read once outside the loop, later iterations will get the newer value @@ -1242,7 +1304,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode) * Try to grab the lock the first time, we're not in the waitqueue * yet/anymore. */ - mustwait = LWLockAttemptLock(lock, mode); + mustwait = LWLockAttemptLock(lock, mode, true); if (!mustwait) { @@ -1265,7 +1327,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode) LWLockQueueSelf(lock, mode); /* we're now guaranteed to be woken up if necessary */ - mustwait = LWLockAttemptLock(lock, mode); + mustwait = LWLockAttemptLock(lock, mode, true); /* ok, grabbed the lock the second time round, need to undo queueing */ if (!mustwait) @@ -1296,6 +1358,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode) for (;;) { + /* When signaled, lock->state has been zero-initialized by the previous holder */ PGSemaphoreLock(proc->sem); if (proc->lwWaiting == LW_WS_NOT_WAITING) break; @@ -1368,7 +1431,7 @@ LWLockConditionalAcquire(LWLock *lock, LWLockMode mode) HOLD_INTERRUPTS(); /* Check for the lock */ - mustwait = LWLockAttemptLock(lock, mode); + mustwait = LWLockAttemptLock(lock, mode, false); if (mustwait) { @@ -1435,13 +1498,13 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode) * NB: We're using nearly the same twice-in-a-row lock acquisition * protocol as LWLockAcquire(). Check its comments for details. */ - mustwait = LWLockAttemptLock(lock, mode); + mustwait = LWLockAttemptLock(lock, mode, true); if (mustwait) { LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE); - mustwait = LWLockAttemptLock(lock, mode); + mustwait = LWLockAttemptLock(lock, mode, true); if (mustwait) { @@ -1461,6 +1524,7 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode) for (;;) { + /* When signaled, lock->state has been zero-initialized by the previous holder */ PGSemaphoreLock(proc->sem); if (proc->lwWaiting == LW_WS_NOT_WAITING) break; @@ -1843,7 +1907,15 @@ LWLockReleaseInternal(LWLock *lock, LWLockMode mode) * others, even if we still have to wakeup other waiters. */ if (mode == LW_EXCLUSIVE) - oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE); + { + /* + * To release the exclusive lock, all bits of LW_LOCK_MASK, + * including any "overcounted" increments from blocked readers, + * are cleared. + */ + oldstate = pg_atomic_fetch_and_u32(&lock->state, ~LW_LOCK_MASK); + oldstate &= ~LW_LOCK_MASK; + } else oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED); -- 2.43.0