From 078e81ce389680fb98a8b5c2fb6cfb3fb42f892e Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathandbossart@gmail.com>
Date: Fri, 23 Dec 2022 16:35:25 -0800
Subject: [PATCH v6 1/3] Move the code to restore files via the shell to a
 separate file.

This is preparatory work for allowing more extensibility in this
area.
---
 src/backend/access/transam/Makefile        |   1 +
 src/backend/access/transam/meson.build     |   1 +
 src/backend/access/transam/shell_restore.c | 158 +++++++++++++++++++++
 src/backend/access/transam/xlog.c          |  37 +++--
 src/backend/access/transam/xlogarchive.c   | 120 +---------------
 src/include/access/xlogarchive.h           |   7 +-
 6 files changed, 197 insertions(+), 127 deletions(-)
 create mode 100644 src/backend/access/transam/shell_restore.c

diff --git a/src/backend/access/transam/Makefile b/src/backend/access/transam/Makefile
index 661c55a9db..099c315d03 100644
--- a/src/backend/access/transam/Makefile
+++ b/src/backend/access/transam/Makefile
@@ -19,6 +19,7 @@ OBJS = \
 	multixact.o \
 	parallel.o \
 	rmgr.o \
+	shell_restore.o \
 	slru.o \
 	subtrans.o \
 	timeline.o \
diff --git a/src/backend/access/transam/meson.build b/src/backend/access/transam/meson.build
index 8920c1bfce..3031c2f6cf 100644
--- a/src/backend/access/transam/meson.build
+++ b/src/backend/access/transam/meson.build
@@ -7,6 +7,7 @@ backend_sources += files(
   'multixact.c',
   'parallel.c',
   'rmgr.c',
+  'shell_restore.c',
   'slru.c',
   'subtrans.c',
   'timeline.c',
diff --git a/src/backend/access/transam/shell_restore.c b/src/backend/access/transam/shell_restore.c
new file mode 100644
index 0000000000..f52f0b92a4
--- /dev/null
+++ b/src/backend/access/transam/shell_restore.c
@@ -0,0 +1,158 @@
+/*-------------------------------------------------------------------------
+ *
+ * shell_restore.c
+ *
+ * These recovery functions use a user-specified shell command (e.g., the
+ * restore_command GUC).
+ *
+ * Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		src/backend/access/transam/shell_restore.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <signal.h>
+
+#include "access/xlogarchive.h"
+#include "access/xlogrecovery.h"
+#include "common/archive.h"
+#include "common/percentrepl.h"
+#include "storage/ipc.h"
+#include "utils/wait_event.h"
+
+static void ExecuteRecoveryCommand(const char *command,
+								   const char *commandName, bool failOnSignal,
+								   uint32 wait_event_info,
+								   const char *lastRestartPointFileName);
+
+bool
+shell_restore(const char *file, const char *path,
+			  const char *lastRestartPointFileName)
+{
+	char	   *cmd;
+	int			rc;
+
+	/* Build the restore command to execute */
+	cmd = BuildRestoreCommand(recoveryRestoreCommand, path, file,
+							  lastRestartPointFileName);
+
+	ereport(DEBUG3,
+			(errmsg_internal("executing restore command \"%s\"", cmd)));
+
+	/*
+	 * Copy xlog from archival storage to XLOGDIR
+	 */
+	fflush(NULL);
+	pgstat_report_wait_start(WAIT_EVENT_RESTORE_COMMAND);
+	rc = system(cmd);
+	pgstat_report_wait_end();
+
+	pfree(cmd);
+
+	/*
+	 * Remember, we rollforward UNTIL the restore fails so failure here is
+	 * just part of the process... that makes it difficult to determine
+	 * whether the restore failed because there isn't an archive to restore,
+	 * or because the administrator has specified the restore program
+	 * incorrectly.  We have to assume the former.
+	 *
+	 * However, if the failure was due to any sort of signal, it's best to
+	 * punt and abort recovery.  (If we "return false" here, upper levels will
+	 * assume that recovery is complete and start up the database!) It's
+	 * essential to abort on child SIGINT and SIGQUIT, because per spec
+	 * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
+	 * those it's a good bet we should have gotten it too.
+	 *
+	 * On SIGTERM, assume we have received a fast shutdown request, and exit
+	 * cleanly. It's pure chance whether we receive the SIGTERM first, or the
+	 * child process. If we receive it first, the signal handler will call
+	 * proc_exit, otherwise we do it here. If we or the child process received
+	 * SIGTERM for any other reason than a fast shutdown request, postmaster
+	 * will perform an immediate shutdown when it sees us exiting
+	 * unexpectedly.
+	 *
+	 * We treat hard shell errors such as "command not found" as fatal, too.
+	 */
+	if (wait_result_is_signal(rc, SIGTERM))
+		proc_exit(1);
+
+	ereport(wait_result_is_any_signal(rc, true) ? FATAL : DEBUG2,
+			(errmsg("could not restore file \"%s\" from archive: %s",
+					file, wait_result_to_str(rc))));
+
+	return (rc == 0);
+}
+
+void
+shell_archive_cleanup(const char *lastRestartPointFileName)
+{
+	ExecuteRecoveryCommand(archiveCleanupCommand, "archive_cleanup_command",
+						   false, WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND,
+						   lastRestartPointFileName);
+}
+
+void
+shell_recovery_end(const char *lastRestartPointFileName)
+{
+	ExecuteRecoveryCommand(recoveryEndCommand, "recovery_end_command", true,
+						   WAIT_EVENT_RECOVERY_END_COMMAND,
+						   lastRestartPointFileName);
+}
+
+/*
+ * Attempt to execute an external shell command during recovery.
+ *
+ * 'command' is the shell command to be executed, 'commandName' is a
+ * human-readable name describing the command emitted in the logs. If
+ * 'failOnSignal' is true and the command is killed by a signal, a FATAL
+ * error is thrown. Otherwise a WARNING is emitted.
+ *
+ * This is currently used for recovery_end_command and archive_cleanup_command.
+ */
+static void
+ExecuteRecoveryCommand(const char *command, const char *commandName,
+					   bool failOnSignal, uint32 wait_event_info,
+					   const char *lastRestartPointFileName)
+{
+	char	   *xlogRecoveryCmd;
+	int			rc;
+
+	Assert(command && commandName);
+
+	/*
+	 * construct the command to be executed
+	 */
+	xlogRecoveryCmd = replace_percent_placeholders(command, commandName, "r",
+												   lastRestartPointFileName);
+
+	ereport(DEBUG3,
+			(errmsg_internal("executing %s \"%s\"", commandName, command)));
+
+	/*
+	 * execute the constructed command
+	 */
+	fflush(NULL);
+	pgstat_report_wait_start(wait_event_info);
+	rc = system(xlogRecoveryCmd);
+	pgstat_report_wait_end();
+
+	pfree(xlogRecoveryCmd);
+
+	if (rc != 0)
+	{
+		/*
+		 * If the failure was due to any sort of signal, it's best to punt and
+		 * abort recovery.  See comments in shell_restore().
+		 */
+		ereport((failOnSignal && wait_result_is_any_signal(rc, true)) ? FATAL : WARNING,
+		/*------
+		   translator: First %s represents a postgresql.conf parameter name like
+		  "recovery_end_command", the 2nd is the value of that parameter, the
+		  third an already translated error message. */
+				(errmsg("%s \"%s\": %s", commandName,
+						command, wait_result_to_str(rc))));
+	}
+}
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 0070d56b0b..8f47fb7570 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -692,6 +692,7 @@ static char *GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli);
 static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos);
 static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos);
 static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr);
+static void GetOldestRestartPointFileName(char *fname);
 
 static void WALInsertLockAcquire(void);
 static void WALInsertLockAcquireExclusive(void);
@@ -4887,10 +4888,12 @@ CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog,
 	 * Execute the recovery_end_command, if any.
 	 */
 	if (recoveryEndCommand && strcmp(recoveryEndCommand, "") != 0)
-		ExecuteRecoveryCommand(recoveryEndCommand,
-							   "recovery_end_command",
-							   true,
-							   WAIT_EVENT_RECOVERY_END_COMMAND);
+	{
+		char		lastRestartPointFname[MAXFNAMELEN];
+
+		GetOldestRestartPointFileName(lastRestartPointFname);
+		shell_recovery_end(lastRestartPointFname);
+	}
 
 	/*
 	 * We switched to a new timeline. Clean up segments on the old timeline.
@@ -7307,10 +7310,12 @@ CreateRestartPoint(int flags)
 	 * Finally, execute archive_cleanup_command, if any.
 	 */
 	if (archiveCleanupCommand && strcmp(archiveCleanupCommand, "") != 0)
-		ExecuteRecoveryCommand(archiveCleanupCommand,
-							   "archive_cleanup_command",
-							   false,
-							   WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND);
+	{
+		char		lastRestartPointFname[MAXFNAMELEN];
+
+		GetOldestRestartPointFileName(lastRestartPointFname);
+		shell_archive_cleanup(lastRestartPointFname);
+	}
 
 	return true;
 }
@@ -8884,6 +8889,22 @@ GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli)
 	LWLockRelease(ControlFileLock);
 }
 
+/*
+ * Returns the WAL file name for the last checkpoint or restartpoint.  This is
+ * the oldest WAL file that we still need if we have to restart recovery.
+ */
+static void
+GetOldestRestartPointFileName(char *fname)
+{
+	XLogRecPtr	restartRedoPtr;
+	TimeLineID	restartTli;
+	XLogSegNo	restartSegNo;
+
+	GetOldestRestartPoint(&restartRedoPtr, &restartTli);
+	XLByteToSeg(restartRedoPtr, restartSegNo, wal_segment_size);
+	XLogFileName(fname, restartTli, restartSegNo, wal_segment_size);
+}
+
 /* Thin wrapper around ShutdownWalRcv(). */
 void
 XLogShutdownWalRcv(void)
diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c
index fcc87ff44f..b5cb060d55 100644
--- a/src/backend/access/transam/xlogarchive.c
+++ b/src/backend/access/transam/xlogarchive.c
@@ -23,7 +23,6 @@
 #include "access/xlog_internal.h"
 #include "access/xlogarchive.h"
 #include "common/archive.h"
-#include "common/percentrepl.h"
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "postmaster/startup.h"
@@ -57,9 +56,8 @@ RestoreArchivedFile(char *path, const char *xlogfname,
 					bool cleanupEnabled)
 {
 	char		xlogpath[MAXPGPATH];
-	char	   *xlogRestoreCmd;
 	char		lastRestartPointFname[MAXPGPATH];
-	int			rc;
+	bool		ret;
 	struct stat stat_buf;
 	XLogSegNo	restartSegNo;
 	XLogRecPtr	restartRedoPtr;
@@ -150,15 +148,6 @@ RestoreArchivedFile(char *path, const char *xlogfname,
 	else
 		XLogFileName(lastRestartPointFname, 0, 0L, wal_segment_size);
 
-	/* Build the restore command to execute */
-	xlogRestoreCmd = BuildRestoreCommand(recoveryRestoreCommand,
-										 xlogpath, xlogfname,
-										 lastRestartPointFname);
-
-	ereport(DEBUG3,
-			(errmsg_internal("executing restore command \"%s\"",
-							 xlogRestoreCmd)));
-
 	/*
 	 * Check signals before restore command and reset afterwards.
 	 */
@@ -167,15 +156,11 @@ RestoreArchivedFile(char *path, const char *xlogfname,
 	/*
 	 * Copy xlog from archival storage to XLOGDIR
 	 */
-	fflush(NULL);
-	pgstat_report_wait_start(WAIT_EVENT_RESTORE_COMMAND);
-	rc = system(xlogRestoreCmd);
-	pgstat_report_wait_end();
+	ret = shell_restore(xlogfname, xlogpath, lastRestartPointFname);
 
 	PostRestoreCommand();
-	pfree(xlogRestoreCmd);
 
-	if (rc == 0)
+	if (ret)
 	{
 		/*
 		 * command apparently succeeded, but let's make sure the file is
@@ -231,37 +216,6 @@ RestoreArchivedFile(char *path, const char *xlogfname,
 		}
 	}
 
-	/*
-	 * Remember, we rollforward UNTIL the restore fails so failure here is
-	 * just part of the process... that makes it difficult to determine
-	 * whether the restore failed because there isn't an archive to restore,
-	 * or because the administrator has specified the restore program
-	 * incorrectly.  We have to assume the former.
-	 *
-	 * However, if the failure was due to any sort of signal, it's best to
-	 * punt and abort recovery.  (If we "return false" here, upper levels will
-	 * assume that recovery is complete and start up the database!) It's
-	 * essential to abort on child SIGINT and SIGQUIT, because per spec
-	 * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
-	 * those it's a good bet we should have gotten it too.
-	 *
-	 * On SIGTERM, assume we have received a fast shutdown request, and exit
-	 * cleanly. It's pure chance whether we receive the SIGTERM first, or the
-	 * child process. If we receive it first, the signal handler will call
-	 * proc_exit, otherwise we do it here. If we or the child process received
-	 * SIGTERM for any other reason than a fast shutdown request, postmaster
-	 * will perform an immediate shutdown when it sees us exiting
-	 * unexpectedly.
-	 *
-	 * We treat hard shell errors such as "command not found" as fatal, too.
-	 */
-	if (wait_result_is_signal(rc, SIGTERM))
-		proc_exit(1);
-
-	ereport(wait_result_is_any_signal(rc, true) ? FATAL : DEBUG2,
-			(errmsg("could not restore file \"%s\" from archive: %s",
-					xlogfname, wait_result_to_str(rc))));
-
 not_available:
 
 	/*
@@ -275,74 +229,6 @@ not_available:
 	return false;
 }
 
-/*
- * Attempt to execute an external shell command during recovery.
- *
- * 'command' is the shell command to be executed, 'commandName' is a
- * human-readable name describing the command emitted in the logs. If
- * 'failOnSignal' is true and the command is killed by a signal, a FATAL
- * error is thrown. Otherwise a WARNING is emitted.
- *
- * This is currently used for recovery_end_command and archive_cleanup_command.
- */
-void
-ExecuteRecoveryCommand(const char *command, const char *commandName,
-					   bool failOnSignal, uint32 wait_event_info)
-{
-	char	   *xlogRecoveryCmd;
-	char		lastRestartPointFname[MAXPGPATH];
-	int			rc;
-	XLogSegNo	restartSegNo;
-	XLogRecPtr	restartRedoPtr;
-	TimeLineID	restartTli;
-
-	Assert(command && commandName);
-
-	/*
-	 * Calculate the archive file cutoff point for use during log shipping
-	 * replication. All files earlier than this point can be deleted from the
-	 * archive, though there is no requirement to do so.
-	 */
-	GetOldestRestartPoint(&restartRedoPtr, &restartTli);
-	XLByteToSeg(restartRedoPtr, restartSegNo, wal_segment_size);
-	XLogFileName(lastRestartPointFname, restartTli, restartSegNo,
-				 wal_segment_size);
-
-	/*
-	 * construct the command to be executed
-	 */
-	xlogRecoveryCmd = replace_percent_placeholders(command, commandName, "r", lastRestartPointFname);
-
-	ereport(DEBUG3,
-			(errmsg_internal("executing %s \"%s\"", commandName, command)));
-
-	/*
-	 * execute the constructed command
-	 */
-	fflush(NULL);
-	pgstat_report_wait_start(wait_event_info);
-	rc = system(xlogRecoveryCmd);
-	pgstat_report_wait_end();
-
-	pfree(xlogRecoveryCmd);
-
-	if (rc != 0)
-	{
-		/*
-		 * If the failure was due to any sort of signal, it's best to punt and
-		 * abort recovery.  See comments in RestoreArchivedFile().
-		 */
-		ereport((failOnSignal && wait_result_is_any_signal(rc, true)) ? FATAL : WARNING,
-		/*------
-		   translator: First %s represents a postgresql.conf parameter name like
-		  "recovery_end_command", the 2nd is the value of that parameter, the
-		  third an already translated error message. */
-				(errmsg("%s \"%s\": %s", commandName,
-						command, wait_result_to_str(rc))));
-	}
-}
-
-
 /*
  * A file was restored from the archive under a temporary filename (path),
  * and now we want to keep it. Rename it under the permanent filename in
diff --git a/src/include/access/xlogarchive.h b/src/include/access/xlogarchive.h
index 31ff206034..299304703e 100644
--- a/src/include/access/xlogarchive.h
+++ b/src/include/access/xlogarchive.h
@@ -20,8 +20,6 @@
 extern bool RestoreArchivedFile(char *path, const char *xlogfname,
 								const char *recovername, off_t expectedSize,
 								bool cleanupEnabled);
-extern void ExecuteRecoveryCommand(const char *command, const char *commandName,
-								   bool failOnSignal, uint32 wait_event_info);
 extern void KeepFileRestoredFromArchive(const char *path, const char *xlogfname);
 extern void XLogArchiveNotify(const char *xlog);
 extern void XLogArchiveNotifySeg(XLogSegNo segno, TimeLineID tli);
@@ -32,4 +30,9 @@ extern bool XLogArchiveIsReady(const char *xlog);
 extern bool XLogArchiveIsReadyOrDone(const char *xlog);
 extern void XLogArchiveCleanup(const char *xlog);
 
+extern bool shell_restore(const char *file, const char *path,
+						  const char *lastRestartPointFileName);
+extern void shell_archive_cleanup(const char *lastRestartPointFileName);
+extern void shell_recovery_end(const char *lastRestartPointFileName);
+
 #endif							/* XLOG_ARCHIVE_H */
-- 
2.25.1

