From 184f72ac34e7b787527dfa8ed76c1fbd2d970407 Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horiguchi.kyotaro@lab.ntt.co.jp>
Date: Tue, 10 Feb 2015 14:56:23 +0900
Subject: [PATCH] Make walreceiver to keep regular reply message even on heavy
 load v2.

Wal receiver cannot send receiver reply message while it is receiving
continuous WAL stream caused by heavy load or something else. This
patch makes wal receiver to send reply message even on such a
situation.
---
 src/backend/replication/walreceiver.c | 59 ++++++++++++++++++++---------------
 1 file changed, 33 insertions(+), 26 deletions(-)

diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index c2d4ed3..43d218d 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -91,6 +91,12 @@ static XLogSegNo recvSegNo = 0;
 static uint32 recvOff = 0;
 
 /*
+ * The time when the last wal receiver reply was sent. This is used to escape
+ * from receiving loop so that replay messages are kept regulary.
+ */
+static TimestampTz walRcvReplySendTime = 0;
+
+/*
  * Flags set by interrupt handlers of walreceiver for later service in the
  * main loop.
  */
@@ -423,32 +429,34 @@ WalReceiverMain(void)
 					 * Process the received data, and any subsequent data we
 					 * can read without blocking.
 					 */
-					for (;;)
+					while (len > 0)
 					{
-						if (len > 0)
-						{
-							/*
-							 * Something was received from master, so reset
-							 * timeout
-							 */
-							last_recv_timestamp = GetCurrentTimestamp();
-							ping_sent = false;
-							XLogWalRcvProcessMsg(buf[0], &buf[1], len - 1);
-						}
-						else if (len == 0)
-							break;
-						else if (len < 0)
-						{
-							ereport(LOG,
-									(errmsg("replication terminated by primary server"),
-									 errdetail("End of WAL reached on timeline %u at %X/%X.",
-											   startpointTLI,
-											   (uint32) (LogstreamResult.Write >> 32), (uint32) LogstreamResult.Write)));
-							endofwal = true;
-							break;
-						}
+						/*
+						 * Something was received from master, so reset
+						 * timeout
+						 */
+						last_recv_timestamp = GetCurrentTimestamp();
+						ping_sent = false;
+						XLogWalRcvProcessMsg(buf[0], &buf[1], len - 1);
+
+						/* Send reply if needed */
+						if (TimestampDifferenceExceeds(walRcvReplySendTime,
+											last_recv_timestamp,
+											wal_receiver_status_interval * 1000))
+							XLogWalRcvSendReply(true, false);
+
 						len = walrcv_receive(0, &buf);
 					}
+					if (len < 0)
+					{
+						ereport(LOG,
+								(errmsg("replication terminated by primary server"),
+								 errdetail("End of WAL reached on timeline %u at %X/%X.",
+										   startpointTLI,
+										   (uint32) (LogstreamResult.Write >> 32), (uint32) LogstreamResult.Write)));
+						endofwal = true;
+						break;
+					}
 
 					/* Let the master know that we received some data. */
 					XLogWalRcvSendReply(false, false);
@@ -1040,7 +1048,6 @@ XLogWalRcvSendReply(bool force, bool requestReply)
 	static XLogRecPtr writePtr = 0;
 	static XLogRecPtr flushPtr = 0;
 	XLogRecPtr	applyPtr;
-	static TimestampTz sendTime = 0;
 	TimestampTz now;
 
 	/*
@@ -1065,10 +1072,10 @@ XLogWalRcvSendReply(bool force, bool requestReply)
 	if (!force
 		&& writePtr == LogstreamResult.Write
 		&& flushPtr == LogstreamResult.Flush
-		&& !TimestampDifferenceExceeds(sendTime, now,
+		&& !TimestampDifferenceExceeds(walRcvReplySendTime, now,
 									   wal_receiver_status_interval * 1000))
 		return;
-	sendTime = now;
+	walRcvReplySendTime = now;
 
 	/* Construct a new message */
 	writePtr = LogstreamResult.Write;
-- 
2.1.0.GIT

