From b2862294f52b7262d7ec0cb2379688b2176169ea Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Thu, 28 Jun 2018 14:38:50 +0900
Subject: [PATCH 2/2] Minimize window between history file and end-of-recovery
 record

Once a standby node is promoted, this makes the assignment of the new
timeline number booked earlier as the history file gets archived
immediately.  This way the other nodes are aware that this new timeline
number is taken and should not be assigned to other nodes.

The window between which the history file is archived and the
end-of-recovery record is written cannot be zeroed, but this way it is
minimized as much as possible. The new order of actions prevents as well
a corrupted data directory on failure.

Reported-by: Magnus Hagander
Author: Heikki Linnakangas
Reviewed-by: Michael Paquier
Discussion: https://postgr.es/m/CABUevEz09XY2EevA2dLjPCY-C5UO4Hq=XxmXLmF6ipNFecbShQ@mail.gmail.com
---
 src/backend/access/transam/xlog.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 3695258e6f..f36d8049bb 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7523,6 +7523,24 @@ StartupXLOG(void)
 		else
 			snprintf(reason, sizeof(reason), "no recovery target specified");
 
+		/*
+		 * We are now done reading the old WAL.  Turn off archive fetching if
+		 * it was active, and make a writable copy of the last WAL segment.
+		 * (Note that we also have a copy of the last block of the old WAL in
+		 * readBuf; we will use that below.)
+		 */
+		exitArchiveRecovery(EndOfLogTLI, EndOfLog);
+
+		/*
+		 * Write the timeline history file, and have it archived. After this
+		 * point (or rather, as soon as the file is archived), the timeline
+		 * will appear as "taken" in the WAL archive and to any standby
+		 * servers. If we crash before actually switching to the new timeline,
+		 * standby servers will nevertheless think that we switched to the new
+		 * timeline, and will try to connect to the new timeline. To minimize
+		 * the window for that, try to do as little as possible between here
+		 * and writing the end-of-recovery record.
+		 */
 		writeTimeLineHistory(ThisTimeLineID, recoveryTargetTLI,
 							 EndRecPtr, reason);
 	}
@@ -7531,15 +7549,6 @@ StartupXLOG(void)
 	XLogCtl->ThisTimeLineID = ThisTimeLineID;
 	XLogCtl->PrevTimeLineID = PrevTimeLineID;
 
-	/*
-	 * We are now done reading the old WAL.  Turn off archive fetching if it
-	 * was active, and make a writable copy of the last WAL segment. (Note
-	 * that we also have a copy of the last block of the old WAL in readBuf;
-	 * we will use that below.)
-	 */
-	if (ArchiveRecoveryRequested)
-		exitArchiveRecovery(EndOfLogTLI, EndOfLog);
-
 	/*
 	 * Prepare to write WAL starting at EndOfLog location, and init xlog
 	 * buffer cache using the block containing the last record from the
-- 
2.18.0

