From 3ddeac5eb01da1642a6c7eb9a290a3ded08045dd Mon Sep 17 00:00:00 2001
From: Petr Jelinek <pjmodos@pjmodos.net>
Date: Thu, 15 Dec 2016 09:43:17 +0100
Subject: [PATCH 2/2] Improve behavior of ReplicationSlotCleanup()

Make sure we have slot locked properly for modification everywhere and
also cleanup MyReplicationSlot so to reduce code duplication.
---
 src/backend/replication/slot.c      | 58 ++++++++++++++++++++++++++++---------
 src/backend/replication/walsender.c |  3 --
 src/backend/storage/lmgr/proc.c     |  6 +---
 src/backend/tcop/postgres.c         |  4 ---
 4 files changed, 45 insertions(+), 26 deletions(-)

diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index d8ed005..ed50e49 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -412,30 +412,60 @@ ReplicationSlotRelease(void)
 }
 
 /*
- * Cleanup all temporary slots created in current session.
+ * Search the replication slot list for temporary slot owned by current
+ * session and return it. Returns NULL if not found.
  */
-void
-ReplicationSlotCleanup()
+static ReplicationSlot *
+FindMyNextTempSlot(void)
 {
-	int			i;
-
-	Assert(MyReplicationSlot == NULL);
+	int					i;
+	ReplicationSlot	   *slot;
 
-	/*
-	 * No need for locking as we are only interested in slots active in
-	 * current process and those are not touched by other processes.
-	 */
+	LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
 	for (i = 0; i < max_replication_slots; i++)
 	{
-		ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
+		slot = &ReplicationSlotCtl->replication_slots[i];
 
-		if (s->active_pid == MyProcPid)
+		SpinLockAcquire(&slot->mutex);
+		if (slot->active_pid == MyProcPid)
 		{
-			Assert(s->in_use && s->data.persistency == RS_TEMPORARY);
+			Assert(slot->in_use && slot->data.persistency == RS_TEMPORARY);
 
-			ReplicationSlotDropPtr(s);
+			SpinLockRelease(&slot->mutex);
+			LWLockRelease(ReplicationSlotControlLock);
+			return slot;
 		}
+		else
+			SpinLockRelease(&slot->mutex);
 	}
+	LWLockRelease(ReplicationSlotControlLock);
+
+	return NULL;
+}
+
+/*
+ * Cleanup all any slot state we might have. This includes releasing any
+ * active replication slot in current session and dropping all temporary
+ * slots created in current session.
+ */
+void
+ReplicationSlotCleanup(void)
+{
+	ReplicationSlot	   *slot;
+
+	if (MyReplicationSlot != NULL)
+		ReplicationSlotRelease();
+
+	/*
+	 * Find all temp slots and drop them. This does repeated scans of the
+	 * array so it's theoretically quadratic algorithm, but in practice
+	 * single session does not have reason to create many temporary slots
+	 * so the negative performance effects should be minimal.
+	 * If this turns out to be problematic in terms of performance we'll
+	 * need to rethink locking guarantees around the replication slots.
+	 */
+	while ((slot = FindMyNextTempSlot()) != NULL)
+		ReplicationSlotDropPtr(slot);
 }
 
 /*
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index b14d821..c90ca92 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -263,9 +263,6 @@ WalSndErrorCleanup(void)
 		sendFile = -1;
 	}
 
-	if (MyReplicationSlot != NULL)
-		ReplicationSlotRelease();
-
 	ReplicationSlotCleanup();
 
 	replication_active = false;
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index e9555f2..d84a4b4 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -806,11 +806,7 @@ ProcKill(int code, Datum arg)
 	/* Cancel any pending condition variable sleep, too */
 	ConditionVariableCancelSleep();
 
-	/* Make sure active replication slots are released */
-	if (MyReplicationSlot != NULL)
-		ReplicationSlotRelease();
-
-	/* Also cleanup all the temporary slots. */
+	/* Release replication slots held by current session. */
 	ReplicationSlotCleanup();
 
 	/*
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index b179231..7213ffd 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -3875,10 +3875,6 @@ PostgresMain(int argc, char *argv[],
 		 * so releasing here is fine. There's another cleanup in ProcKill()
 		 * ensuring we'll correctly cleanup on FATAL errors as well.
 		 */
-		if (MyReplicationSlot != NULL)
-			ReplicationSlotRelease();
-
-		/* We also want to cleanup temporary slots on error. */
 		ReplicationSlotCleanup();
 
 		/*
-- 
2.7.4

