From 0e816454c57b851f4aa2743ea776b1e604a27d77 Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Tue, 20 Dec 2016 16:47:35 +0900
Subject: [PATCH 2/2] Minimize window between history file and end-of-recovery
 record

Once a standby node is promoted, this makes the assignment of the new
timeline number booked earlier as the history file gets archived immediately.
This way the other nodes are aware that this new timeline number is taken
and should not be assigned to other nodes.

The window between which the history file is archived and the end-of-recovery
record is written cannot be zeroed, but this way it is minimized as much as
possible. The new order of actions prevents as well a corrupted data directory
on failure.
---
 src/backend/access/transam/xlog.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index ea2c523e6e..f23feb47e8 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7210,6 +7210,24 @@ StartupXLOG(void)
 		else
 			snprintf(reason, sizeof(reason), "no recovery target specified");
 
+		/*
+		 * We are now done reading the old WAL.  Turn off archive fetching if it
+		 * was active, and make a writable copy of the last WAL segment. (Note
+		 * that we also have a copy of the last block of the old WAL in readBuf;
+		 * we will use that below.)
+		 */
+		exitArchiveRecovery(EndOfLogTLI, EndOfLog);
+
+		/*
+		 * Write the timeline history file, and have it archived. After this
+		 * point (or rather, as soon as the file is archived), the timeline
+		 * will appear as "taken" in the WAL archive and to any standby servers.
+		 * If we crash before actually switching to the new timeline, standby
+		 * servers will nevertheless think that we switched to the new timeline,
+		 * and will try to connect to the new timeline. To minimize the window
+		 * for that, try to do as little as possible between here and writing the
+		 * end-of-recovery record.
+		 */
 		writeTimeLineHistory(ThisTimeLineID, recoveryTargetTLI,
 							 EndRecPtr, reason);
 	}
@@ -7219,15 +7237,6 @@ StartupXLOG(void)
 	XLogCtl->PrevTimeLineID = PrevTimeLineID;
 
 	/*
-	 * We are now done reading the old WAL.  Turn off archive fetching if it
-	 * was active, and make a writable copy of the last WAL segment. (Note
-	 * that we also have a copy of the last block of the old WAL in readBuf;
-	 * we will use that below.)
-	 */
-	if (ArchiveRecoveryRequested)
-		exitArchiveRecovery(EndOfLogTLI, EndOfLog);
-
-	/*
 	 * Prepare to write WAL starting at EndOfLog position, and init xlog
 	 * buffer cache using the block containing the last record from the
 	 * previous incarnation.
-- 
2.11.0

