diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 4f88d3f..0d92a30 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -36,13 +36,27 @@
 /* We use the ShmemLock spinlock to protect LWLockAssign */
 extern slock_t *ShmemLock;
 
+/*
+ * The part of LWLock struct that needs to be swappable with an atomic
+ * compare-and-swap instruction. Assuming we have an atomic 64-bit CAS
+ * instruction, sizeof(cas_t) must be <= 8.
+ */
+typedef union
+{
+	struct
+	{
+		bool	mutex;			/* Protects LWLock and queue of PGPROCs */
+		bool	releaseOK;		/* T if ok to release waiters */
+		bool	waiters;		/* T if wait queue is not empty (head != NULL) */
+		char	exclusive;		/* # of exclusive holders (0 or 1) */
+		uint32	shared;			/* # of shared holders (0..MaxBackends) */
+	} f;
+	uint64 val;
+} LWLock_cas_t;
 
 typedef struct LWLock
 {
-	slock_t		mutex;			/* Protects LWLock and queue of PGPROCs */
-	bool		releaseOK;		/* T if ok to release waiters */
-	char		exclusive;		/* # of exclusive holders (0 or 1) */
-	int			shared;			/* # of shared holders (0..MaxBackends) */
+	LWLock_cas_t		casval;
 	PGPROC	   *head;			/* head of list of waiting PGPROCs */
 	PGPROC	   *tail;			/* tail of list of waiting PGPROCs */
 	/* tail is undefined when head is NULL */
@@ -267,6 +281,9 @@ CreateLWLocks(void)
 	char	   *ptr;
 	int			id;
 
+	StaticAssertStmt(sizeof(LWLock_cas_t) == sizeof(uint64),
+					 "unexpected size of LWLock_cas_t");
+
 	/* Allocate space */
 	ptr = (char *) ShmemAlloc(spaceLocks);
 
@@ -283,10 +300,10 @@ CreateLWLocks(void)
 	 */
 	for (id = 0, lock = LWLockArray; id < numLocks; id++, lock++)
 	{
-		SpinLockInit(&lock->lock.mutex);
-		lock->lock.releaseOK = true;
-		lock->lock.exclusive = 0;
-		lock->lock.shared = 0;
+		lock->lock.casval.val = 0; /* clear possible padding */
+		lock->lock.casval.f.releaseOK = true;
+		lock->lock.casval.f.exclusive = 0;
+		lock->lock.casval.f.shared = 0;
 		lock->lock.head = NULL;
 		lock->lock.tail = NULL;
 	}
@@ -344,6 +361,7 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
 	PGPROC	   *proc = MyProc;
 	bool		retry = false;
 	int			extraWaits = 0;
+	LWLock_cas_t oldval;
 
 	PRINT_LWDEBUG("LWLockAcquire", lockid, lock);
 
@@ -392,27 +410,30 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
 	 * cycle because the lock is not free when a released waiter finally gets
 	 * to run.	See pgsql-hackers archives for 29-Dec-01.
 	 */
+	oldval.val = 0;
+	oldval.f.mutex = false;
+	oldval.f.releaseOK = true;
+	oldval.f.exclusive = 0;
+	oldval.f.shared = 0;
 	for (;;)
 	{
 		bool		mustwait;
+		LWLock_cas_t newval;
+		LWLock_cas_t x;
 
 		/* Acquire mutex.  Time spent holding mutex should be short! */
-#ifdef LWLOCK_STATS
-		spin_delay_counts[lockid] += SpinLockAcquire(&lock->mutex);
-#else
-		SpinLockAcquire(&lock->mutex);
-#endif
+		newval = oldval;
 
 		/* If retrying, allow LWLockRelease to release waiters again */
 		if (retry)
-			lock->releaseOK = true;
+			newval.f.releaseOK = true;
 
 		/* If I can get the lock, do so quickly. */
 		if (mode == LW_EXCLUSIVE)
 		{
-			if (lock->exclusive == 0 && lock->shared == 0)
+			if (oldval.f.exclusive == 0 && oldval.f.shared == 0)
 			{
-				lock->exclusive++;
+				newval.f.exclusive++;
 				mustwait = false;
 			}
 			else
@@ -420,14 +441,29 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
 		}
 		else
 		{
-			if (lock->exclusive == 0)
+			if (oldval.f.exclusive == 0)
 			{
-				lock->shared++;
+				newval.f.shared++;
 				mustwait = false;
 			}
 			else
 				mustwait = true;
 		}
+		newval.f.mutex = mustwait;
+		oldval.f.mutex = false;
+		x.val = __sync_val_compare_and_swap(&lock->casval.val, oldval.val, newval.val);
+		if (x.val != oldval.val)
+		{
+			/* failed to acquire lock or mutex, retry */
+			oldval = x;
+			continue;
+		}
+
+#ifdef LWLOCK_STATS
+		spin_delay_counts[lockid] += SpinLockAcquire(&lock->mutex);
+#else
+		//SpinLockAcquire(&lock->mutex);
+#endif
 
 		if (!mustwait)
 			break;				/* got the lock */
@@ -452,7 +488,11 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
 		lock->tail = proc;
 
 		/* Can release the mutex now */
-		SpinLockRelease(&lock->mutex);
+		oldval = newval;
+		newval.f.waiters = true;
+		newval.f.mutex = false;
+		x.val = __sync_val_compare_and_swap(&lock->casval.val, oldval.val, newval.val);
+		Assert(x.val == oldval.val);
 
 		/*
 		 * Wait until awakened.
@@ -492,7 +532,6 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
 	}
 
 	/* We are done updating shared state of the lock itself. */
-	SpinLockRelease(&lock->mutex);
 
 	TRACE_POSTGRESQL_LWLOCK_ACQUIRE(lockid, mode);
 
@@ -518,6 +557,9 @@ LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
 {
 	volatile LWLock *lock = &(LWLockArray[lockid].lock);
 	bool		mustwait;
+	LWLock_cas_t oldval;
+	LWLock_cas_t newval;
+	LWLock_cas_t x;
 
 	PRINT_LWDEBUG("LWLockConditionalAcquire", lockid, lock);
 
@@ -533,14 +575,20 @@ LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
 	HOLD_INTERRUPTS();
 
 	/* Acquire mutex.  Time spent holding mutex should be short! */
-	SpinLockAcquire(&lock->mutex);
-
+	oldval.val = 0;
+	oldval.f.mutex = false;
+	oldval.f.releaseOK = true;
+	oldval.f.exclusive = 0;
+	oldval.f.shared = 0;
+
+retry:
+	newval = oldval;
 	/* If I can get the lock, do so quickly. */
 	if (mode == LW_EXCLUSIVE)
 	{
-		if (lock->exclusive == 0 && lock->shared == 0)
+		if (oldval.f.exclusive == 0 && oldval.f.shared == 0)
 		{
-			lock->exclusive++;
+			newval.f.exclusive++;
 			mustwait = false;
 		}
 		else
@@ -548,17 +596,29 @@ LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
 	}
 	else
 	{
-		if (lock->exclusive == 0)
+		if (oldval.f.exclusive == 0)
 		{
-			lock->shared++;
+			newval.f.shared++;
 			mustwait = false;
 		}
 		else
 			mustwait = true;
 	}
 
+	if (!mustwait)
+	{
+		newval.f.mutex = false;
+		oldval.f.mutex = false;
+		x.val = __sync_val_compare_and_swap(&lock->casval.val, oldval.val, newval.val);
+		if (x.val != oldval.val)
+		{
+			/* XXX would it be better to give up on CAS race? */
+			oldval = x;
+			goto retry;
+		}
+	}
+
 	/* We are done updating shared state of the lock itself. */
-	SpinLockRelease(&lock->mutex);
 
 	if (mustwait)
 	{
@@ -598,6 +658,9 @@ LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode)
 	PGPROC	   *proc = MyProc;
 	bool		mustwait;
 	int			extraWaits = 0;
+	LWLock_cas_t oldval;
+	LWLock_cas_t newval;
+	LWLock_cas_t x;
 
 	PRINT_LWDEBUG("LWLockAcquireOrWait", lockid, lock);
 
@@ -619,14 +682,21 @@ LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode)
 	HOLD_INTERRUPTS();
 
 	/* Acquire mutex.  Time spent holding mutex should be short! */
-	SpinLockAcquire(&lock->mutex);
+	oldval.val = 0;
+	oldval.f.mutex = false;
+	oldval.f.releaseOK = true;
+	oldval.f.exclusive = 0;
+	oldval.f.shared = 0;
+
+retry:
+	newval = oldval;
 
 	/* If I can get the lock, do so quickly. */
 	if (mode == LW_EXCLUSIVE)
 	{
-		if (lock->exclusive == 0 && lock->shared == 0)
+		if (oldval.f.exclusive == 0 && oldval.f.shared == 0)
 		{
-			lock->exclusive++;
+			newval.f.exclusive++;
 			mustwait = false;
 		}
 		else
@@ -634,15 +704,24 @@ LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode)
 	}
 	else
 	{
-		if (lock->exclusive == 0)
+		if (oldval.f.exclusive == 0)
 		{
-			lock->shared++;
+			newval.f.shared++;
 			mustwait = false;
 		}
 		else
 			mustwait = true;
 	}
 
+	newval.f.mutex = mustwait;
+	oldval.f.mutex = false;
+	x.val = __sync_val_compare_and_swap(&lock->casval.val, oldval.val, newval.val);
+	if (x.val != oldval.val)
+	{
+		oldval = x;
+		goto retry;
+	}
+
 	if (mustwait)
 	{
 		/*
@@ -665,7 +744,11 @@ LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode)
 		lock->tail = proc;
 
 		/* Can release the mutex now */
-		SpinLockRelease(&lock->mutex);
+		oldval = newval;
+		newval.f.mutex = false;
+		newval.f.waiters = true;
+		x.val = __sync_val_compare_and_swap(&lock->casval.val, oldval.val, newval.val);
+		Assert(x.val == oldval.val);
 
 		/*
 		 * Wait until awakened.  Like in LWLockAcquire, be prepared for bogus
@@ -694,8 +777,7 @@ LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode)
 	}
 	else
 	{
-		/* We are done updating shared state of the lock itself. */
-		SpinLockRelease(&lock->mutex);
+		/* No need to update the lock */
 	}
 
 	/*
@@ -731,6 +813,9 @@ LWLockRelease(LWLockId lockid)
 	PGPROC	   *head;
 	PGPROC	   *proc;
 	int			i;
+	LWLock_cas_t oldval;
+	LWLock_cas_t newval;
+	LWLock_cas_t x;
 
 	PRINT_LWDEBUG("LWLockRelease", lockid, lock);
 
@@ -749,16 +834,38 @@ LWLockRelease(LWLockId lockid)
 	for (; i < num_held_lwlocks; i++)
 		held_lwlocks[i] = held_lwlocks[i + 1];
 
-	/* Acquire mutex.  Time spent holding mutex should be short! */
-	SpinLockAcquire(&lock->mutex);
+	oldval.val = 0;
+	oldval.f.mutex = false;
+	oldval.f.releaseOK = true;
+	/* guess that we're holding the lock in exclusive mode, and no waiters */
+	oldval.f.waiters = 0;
+	oldval.f.exclusive = 1;
+	oldval.f.shared = 0;
+
+retry:
+	newval = oldval;
 
 	/* Release my hold on lock */
-	if (lock->exclusive > 0)
-		lock->exclusive--;
+	if (oldval.f.exclusive > 0)
+		newval.f.exclusive--;
 	else
 	{
-		Assert(lock->shared > 0);
-		lock->shared--;
+		Assert(oldval.f.shared > 0);
+		newval.f.shared--;
+	}
+
+	/*
+	 * If there are waiters, need to acquire mutex to manipulate the waiter
+	 * list.
+	 */
+	newval.f.mutex = oldval.f.waiters;
+	oldval.f.mutex = false;
+	x.val = __sync_val_compare_and_swap(&lock->casval.val, oldval.val, newval.val);
+	if (x.val != oldval.val)
+	{
+		/* someone else was holding mutex, retry */
+		oldval = x;
+		goto retry;
 	}
 
 	/*
@@ -767,10 +874,17 @@ LWLockRelease(LWLockId lockid)
 	 * if someone has already awakened waiters that haven't yet acquired the
 	 * lock.
 	 */
-	head = lock->head;
+	if (oldval.f.waiters)
+	{
+		head = lock->head;
+		Assert(head != NULL);
+	}
+	else
+		head = NULL;
 	if (head != NULL)
 	{
-		if (lock->exclusive == 0 && lock->shared == 0 && lock->releaseOK)
+		oldval = newval;
+		if (newval.f.exclusive == 0 && newval.f.shared == 0 && newval.f.releaseOK)
 		{
 			/*
 			 * Remove the to-be-awakened PGPROCs from the queue.
@@ -812,17 +926,23 @@ LWLockRelease(LWLockId lockid)
 			if (proc->lwWaitMode != LW_WAIT_UNTIL_FREE)
 				releaseOK = false;
 
-			lock->releaseOK = releaseOK;
+			newval.f.releaseOK = releaseOK;
 		}
 		else
 		{
 			/* lock is still held, can't awaken anything */
 			head = NULL;
 		}
+
+		/* release mutex */
+		newval.f.mutex = false;
+		if (lock->head == NULL)
+			newval.f.waiters = false;
+		x.val = __sync_val_compare_and_swap(&lock->casval.val, oldval.val, newval.val);
+		Assert(x.val == oldval.val);
 	}
 
 	/* We are done updating shared state of the lock itself. */
-	SpinLockRelease(&lock->mutex);
 
 	TRACE_POSTGRESQL_LWLOCK_RELEASE(lockid);
 
