From 5553f4211e9d6159a99f3de5a21d1a29baeee73a Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvherre@alvh.no-ip.org>
Date: Mon, 23 Dec 2013 16:07:51 -0300
Subject: [PATCH 1/6] Optimize locking a tuple already locked by another
 subxact

The original coding tried to avoid locking tuples twice when other
subtransactions of the current top transaction already held the
requested lock, but the coding was bogus and some cases were slow, as
reported by Oskari Saarenmaa in bug #8470.

In order to make it work, HeapTupleSatisfiesUpdate is changed so that it
always returns HeapTupleBeingUpdated when the tuple is locked, even if
the locker is the current transaction or another subtransaction of the
current transaction; that case previously returned HeapTupleMayBeUpdated
and was handled specially by callers, but it turns out to be less
error-prone to instead have them check whether the reported lock is held
by the current transaction.  That way, more optimizations are enabled in
compute_new_xmax_infomask.

The difference in performance between 9.2 and patched 9.3 is now more
reasonable -- in my measurements it is indistinguishable from noise in
the test case supplied in bug #8470 (instead of 17x worse), and up to 2x
worse in a tweaked version (instead of 56x worse).
---
 contrib/pgrowlocks/pgrowlocks.c        |    9 +--
 src/backend/access/heap/heapam.c       |  105 +++++++++++++++++++++-----------
 src/backend/access/transam/multixact.c |   33 ----------
 src/backend/utils/time/tqual.c         |   20 +++---
 src/include/access/multixact.h         |    1 -
 5 files changed, 82 insertions(+), 86 deletions(-)

diff --git a/contrib/pgrowlocks/pgrowlocks.c b/contrib/pgrowlocks/pgrowlocks.c
index 075d781..9ef90cd 100644
--- a/contrib/pgrowlocks/pgrowlocks.c
+++ b/contrib/pgrowlocks/pgrowlocks.c
@@ -138,14 +138,9 @@ pgrowlocks(PG_FUNCTION_ARGS)
 		infomask = tuple->t_data->t_infomask;
 
 		/*
-		 * a tuple is locked if HTSU returns BeingUpdated, and if it returns
-		 * MayBeUpdated but the Xmax is valid and pointing at us.
+		 * A tuple is locked if HTSU returns BeingUpdated.
 		 */
-		if (htsu == HeapTupleBeingUpdated ||
-			(htsu == HeapTupleMayBeUpdated &&
-			 !(infomask & HEAP_XMAX_INVALID) &&
-			 !(infomask & HEAP_XMAX_IS_MULTI) &&
-			 (xmax == GetCurrentTransactionIdIfAny())))
+		if (htsu == HeapTupleBeingUpdated)
 		{
 			char	  **values;
 
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index b5977da..8523d08 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2596,6 +2596,28 @@ l1:
 		TransactionId xwait;
 		uint16		infomask;
 
+		/*
+		 * If any subtransaction of the current top transaction already holds
+		 * a lock on this tuple, (and no one else does,) we must skip sleeping
+		 * on the xwait; that would lead to us sleeping on our own transaction,
+		 * which is unlikely to end up well.
+		 *
+		 * Note we don't need to do anything about this when the xwait is a
+		 * multixact, because that code is already prepared to ignore
+		 * subtransactions of the current top transaction.
+		 *
+		 * This must be done before acquiring our tuple lock, to avoid
+		 * deadlocks with other transaction that are already waiting on the
+		 * lock we hold.
+		 */
+		if (!(tp.t_data->t_infomask & HEAP_XMAX_IS_MULTI) &&
+			TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tp.t_data)))
+		{
+			Assert(HEAP_XMAX_IS_LOCKED_ONLY(tp.t_data->t_infomask));
+
+			goto perform_deletion;
+		}
+
 		/* must copy state data before unlocking buffer */
 		xwait = HeapTupleHeaderGetRawXmax(tp.t_data);
 		infomask = tp.t_data->t_infomask;
@@ -2669,6 +2691,7 @@ l1:
 			UpdateXmaxHintBits(tp.t_data, buffer, xwait);
 		}
 
+perform_deletion:
 		/*
 		 * We may overwrite if previous xmax aborted, or if it committed but
 		 * only locked the tuple without updating it.
@@ -3089,6 +3112,30 @@ l2:
 		 * heap_update directly.
 		 */
 
+		/*
+		 * If any subtransaction of the current top transaction already holds
+		 * a lock on this tuple, (and no one else does,) we must skip sleeping
+		 * on the xwait; that would lead to us sleeping on our own transaction,
+		 * which is unlikely to end up well.
+		 *
+		 * Note we don't need to do anything about this when the xwait is a
+		 * multixact, because that code is already prepared to ignore
+		 * subtransactions of the current top transaction.
+		 *
+		 * This must be done before acquiring our tuple lock, to avoid
+		 * deadlocks with other transaction that are already waiting on the
+		 * lock we hold.
+		 */
+		if (!(oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI) &&
+			TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(oldtup.t_data)))
+		{
+			Assert(HEAP_XMAX_IS_LOCKED_ONLY(oldtup.t_data->t_infomask));
+
+			can_continue = true;
+			locker_remains = true;
+			goto perform_update;
+		}
+
 		/* must copy state data before unlocking buffer */
 		xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data);
 		infomask = oldtup.t_data->t_infomask;
@@ -3223,6 +3270,7 @@ l2:
 			}
 		}
 
+perform_update:
 		result = can_continue ? HeapTupleMayBeUpdated : HeapTupleUpdated;
 	}
 
@@ -3945,7 +3993,7 @@ l3:
 		TransactionId xwait;
 		uint16		infomask;
 		uint16		infomask2;
-		bool		require_sleep;
+		bool		require_sleep = true;
 		ItemPointerData t_ctid;
 
 		/* must copy state data before unlocking buffer */
@@ -3997,6 +4045,22 @@ l3:
 
 			pfree(members);
 		}
+		else if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
+				 TransactionIdIsCurrentTransactionId(xwait))
+		{
+			/*
+			 * If we already hold a lock on this tuple, we can fall through to
+			 * either create a new MultiXact including the previous lock and
+			 * ours, or just keep the old lock.
+			 */
+			LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+			/* If xmax changed while we weren't looking, start over */
+			if ((tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
+				!TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
+									 xwait))
+				goto l3;
+			goto perform_locking;
+		}
 
 		/*
 		 * Acquire tuple lock to establish our priority for the tuple.
@@ -4330,6 +4394,8 @@ l3:
 
 		/* By here, we're certain that we hold buffer exclusive lock again */
 
+perform_locking:
+
 		/*
 		 * We may lock if previous xmax aborted, or if it committed but only
 		 * locked the tuple without updating it; or if we didn't have to wait
@@ -4365,37 +4431,6 @@ failed:
 	old_infomask = tuple->t_data->t_infomask;
 
 	/*
-	 * We might already hold the desired lock (or stronger), possibly under a
-	 * different subtransaction of the current top transaction.  If so, there
-	 * is no need to change state or issue a WAL record.  We already handled
-	 * the case where this is true for xmax being a MultiXactId, so now check
-	 * for cases where it is a plain TransactionId.
-	 *
-	 * Note in particular that this covers the case where we already hold
-	 * exclusive lock on the tuple and the caller only wants key share or
-	 * share lock. It would certainly not do to give up the exclusive lock.
-	 */
-	if (!(old_infomask & (HEAP_XMAX_INVALID |
-						  HEAP_XMAX_COMMITTED |
-						  HEAP_XMAX_IS_MULTI)) &&
-		(mode == LockTupleKeyShare ?
-		 (HEAP_XMAX_IS_KEYSHR_LOCKED(old_infomask) ||
-		  HEAP_XMAX_IS_SHR_LOCKED(old_infomask) ||
-		  HEAP_XMAX_IS_EXCL_LOCKED(old_infomask)) :
-		 mode == LockTupleShare ?
-		 (HEAP_XMAX_IS_SHR_LOCKED(old_infomask) ||
-		  HEAP_XMAX_IS_EXCL_LOCKED(old_infomask)) :
-		 (HEAP_XMAX_IS_EXCL_LOCKED(old_infomask))) &&
-		TransactionIdIsCurrentTransactionId(xmax))
-	{
-		LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
-		/* Probably can't hold tuple lock here, but may as well check */
-		if (have_tuple_lock)
-			UnlockTupleTuplock(relation, tid, mode);
-		return HeapTupleMayBeUpdated;
-	}
-
-	/*
 	 * If this is the first possibly-multixact-able operation in the current
 	 * transaction, set my per-backend OldestMemberMXactId setting. We can be
 	 * certain that the transaction will never become a member of any older
@@ -4622,9 +4657,9 @@ l5:
 		 */
 		if (!MultiXactIdIsRunning(xmax))
 		{
-			if (HEAP_XMAX_IS_LOCKED_ONLY(old_infomask) ||
-				TransactionIdDidAbort(MultiXactIdGetUpdateXid(xmax,
-															  old_infomask)))
+			if ((HEAP_XMAX_IS_LOCKED_ONLY(old_infomask) ||
+				 !TransactionIdDidCommit(MultiXactIdGetUpdateXid(xmax,
+																 old_infomask))))
 			{
 				/*
 				 * Reset these bits and restart; otherwise fall through to
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 03581be..55a8ca7 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -1275,39 +1275,6 @@ retry:
 }
 
 /*
- * MultiXactHasRunningRemoteMembers
- * 		Does the given multixact have still-live members from
- * 		transactions other than our own?
- */
-bool
-MultiXactHasRunningRemoteMembers(MultiXactId multi)
-{
-	MultiXactMember *members;
-	int			nmembers;
-	int			i;
-
-	nmembers = GetMultiXactIdMembers(multi, &members, true);
-	if (nmembers <= 0)
-		return false;
-
-	for (i = 0; i < nmembers; i++)
-	{
-		/* not interested in our own members */
-		if (TransactionIdIsCurrentTransactionId(members[i].xid))
-			continue;
-
-		if (TransactionIdIsInProgress(members[i].xid))
-		{
-			pfree(members);
-			return true;
-		}
-	}
-
-	pfree(members);
-	return false;
-}
-
-/*
  * mxactMemberComparator
  *		qsort comparison function for MultiXactMember
  *
diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c
index 44b4ddc..ad8c7a0 100644
--- a/src/backend/utils/time/tqual.c
+++ b/src/backend/utils/time/tqual.c
@@ -701,20 +701,20 @@ HeapTupleSatisfiesUpdate(HeapTupleHeader tuple, CommandId curcid,
 
 				if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
 				{
-					if (MultiXactHasRunningRemoteMembers(xmax))
+					if (MultiXactIdIsRunning(xmax))
 						return HeapTupleBeingUpdated;
 					else
 						return HeapTupleMayBeUpdated;
 				}
 
-				/* if locker is gone, all's well */
+				/*
+				 * If the locker is gone, then there is nothing of interest
+				 * left in this Xmax; otherwise, report the tuple as
+				 * locked/updated.
+				 */
 				if (!TransactionIdIsInProgress(xmax))
 					return HeapTupleMayBeUpdated;
-
-				if (!TransactionIdIsCurrentTransactionId(xmax))
-					return HeapTupleBeingUpdated;
-				else
-					return HeapTupleMayBeUpdated;
+				return HeapTupleBeingUpdated;
 			}
 
 			if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
@@ -726,10 +726,10 @@ HeapTupleSatisfiesUpdate(HeapTupleHeader tuple, CommandId curcid,
 				/* not LOCKED_ONLY, so it has to have an xmax */
 				Assert(TransactionIdIsValid(xmax));
 
-				/* updating subtransaction must have aborted */
+				/* deleting subtransaction must have aborted */
 				if (!TransactionIdIsCurrentTransactionId(xmax))
 				{
-					if (MultiXactHasRunningRemoteMembers(HeapTupleHeaderGetRawXmax(tuple)))
+					if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple)))
 						return HeapTupleBeingUpdated;
 					return HeapTupleMayBeUpdated;
 				}
@@ -846,7 +846,7 @@ HeapTupleSatisfiesUpdate(HeapTupleHeader tuple, CommandId curcid,
 	if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
 	{
 		if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
-			return HeapTupleMayBeUpdated;
+			return HeapTupleBeingUpdated;
 		if (HeapTupleHeaderGetCmax(tuple) >= curcid)
 			return HeapTupleSelfUpdated;		/* updated after scan started */
 		else
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 5f82907..0e3b273 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -89,7 +89,6 @@ extern bool MultiXactIdIsRunning(MultiXactId multi);
 extern void MultiXactIdSetOldestMember(void);
 extern int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **xids,
 					  bool allow_old);
-extern bool MultiXactHasRunningRemoteMembers(MultiXactId multi);
 extern bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2);
 extern bool MultiXactIdPrecedesOrEquals(MultiXactId multi1,
 							MultiXactId multi2);
-- 
1.7.10.4

