From 601dac12627658902b4413b9a0c318f235b8e48f Mon Sep 17 00:00:00 2001
From: Nathan Bossart <bossartn@amazon.com>
Date: Sun, 5 Dec 2021 21:42:52 -0800
Subject: [PATCH v4 4/8] Move pgsql_tmp file removal to custodian process.

With this change, startup (and restart after a crash) simply
renames the pgsql_tmp directories, and the custodian process
actually removes all the files in the staged directories as well as
the staged directories themselves.  This should help avoid long
startup delays due to many leftover temporary files.
---
 src/backend/postmaster/custodian.c  | 13 +++++++++++-
 src/backend/postmaster/postmaster.c | 14 ++++++++-----
 src/backend/storage/file/fd.c       | 32 +++++++++++++++++++++--------
 3 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/src/backend/postmaster/custodian.c b/src/backend/postmaster/custodian.c
index dd86f0f5ce..79bc4a7065 100644
--- a/src/backend/postmaster/custodian.c
+++ b/src/backend/postmaster/custodian.c
@@ -194,7 +194,18 @@ CustodianMain(void)
 
 		start_time = (pg_time_t) time(NULL);
 
-		/* TODO: offloaded tasks go here */
+		/*
+		 * Remove any pgsql_tmp directories that have been staged for deletion.
+		 * Since pgsql_tmp directories can accumulate many files, removing all
+		 * of the files during startup (which we used to do) can take a very
+		 * long time.  To avoid delaying startup, we simply have startup rename
+		 * the temporary directories, and we clean them up here.
+		 *
+		 * pgsql_tmp directories are not staged or cleaned in single-user mode,
+		 * so we don't need any extra handling outside of the custodian process
+		 * for this.
+		 */
+		RemovePgTempFiles(false, false);
 
 		/* Calculate how long to sleep */
 		end_time = (pg_time_t) time(NULL);
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index a6bc9feabd..a8303a6482 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -1389,9 +1389,11 @@ PostmasterMain(int argc, char *argv[])
 	/*
 	 * Remove old temporary files.  At this point there can be no other
 	 * Postgres processes running in this directory, so this should be safe.
+	 *
+	 * Note that this just stages the pgsql_tmp directories for deletion.  The
+	 * custodian process is responsible for actually removing the files.
 	 */
 	RemovePgTempFiles(true, true);
-	RemovePgTempFiles(false, false);
 
 	/*
 	 * Initialize stats collection subsystem (this does NOT start the
@@ -4137,12 +4139,14 @@ PostmasterStateMachine(void)
 		ereport(LOG,
 				(errmsg("all server processes terminated; reinitializing")));
 
-		/* remove leftover temporary files after a crash */
+		/*
+		 * Remove leftover temporary files after a crash.
+		 *
+		 * Note that this just stages the pgsql_tmp directories for deletion.
+		 * The custodian process is responsible for actually removing the files.
+		 */
 		if (remove_temp_files_after_crash)
-		{
 			RemovePgTempFiles(true, true);
-			RemovePgTempFiles(false, false);
-		}
 
 		/* allow background workers to immediately restart */
 		ResetBackgroundWorkerCrashTimes();
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index d3019a4b67..5d39a31d14 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -97,9 +97,12 @@
 #include "pgstat.h"
 #include "port/pg_iovec.h"
 #include "portability/mem.h"
+#include "postmaster/interrupt.h"
 #include "postmaster/startup.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
+#include "storage/latch.h"
+#include "storage/proc.h"
 #include "utils/guc.h"
 #include "utils/resowner_private.h"
 
@@ -1640,9 +1643,9 @@ PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode)
  *
  * Directories created within the top-level temporary directory should begin
  * with PG_TEMP_FILE_PREFIX, so that they can be identified as temporary and
- * deleted at startup by RemovePgTempFiles().  Further subdirectories below
- * that do not need any particular prefix.
-*/
+ * deleted by RemovePgTempFiles().  Further subdirectories below that do not
+ * need any particular prefix.
+ */
 void
 PathNameCreateTemporaryDir(const char *basedir, const char *directory)
 {
@@ -1840,9 +1843,9 @@ OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError)
  *
  * If the file is inside the top-level temporary directory, its name should
  * begin with PG_TEMP_FILE_PREFIX so that it can be identified as temporary
- * and deleted at startup by RemovePgTempFiles().  Alternatively, it can be
- * inside a directory created with PathNameCreateTemporaryDir(), in which case
- * the prefix isn't needed.
+ * and deleted by RemovePgTempFiles().  Alternatively, it can be inside a
+ * directory created with PathNameCreateTemporaryDir(), in which case the prefix
+ * isn't needed.
  */
 File
 PathNameCreateTemporaryFile(const char *path, bool error_on_failure)
@@ -3175,7 +3178,8 @@ RemovePgTempFiles(bool stage, bool remove_relation_files)
 	 */
 	spc_dir = AllocateDir("pg_tblspc");
 
-	while ((spc_de = ReadDirExtended(spc_dir, "pg_tblspc", LOG)) != NULL)
+	while (!ShutdownRequestPending &&
+		   (spc_de = ReadDirExtended(spc_dir, "pg_tblspc", LOG)) != NULL)
 	{
 		if (strcmp(spc_de->d_name, ".") == 0 ||
 			strcmp(spc_de->d_name, "..") == 0)
@@ -3211,6 +3215,14 @@ RemovePgTempFiles(bool stage, bool remove_relation_files)
 	 * would create a race condition.  It's done separately, earlier in
 	 * postmaster startup.
 	 */
+
+	/*
+	 * If we just staged some pgsql_tmp directories for removal, wake up the
+	 * custodian process so that it deletes all the files in the staged
+	 * directories as well as the directories themselves.
+	 */
+	if (stage && ProcGlobal->custodianLatch)
+		SetLatch(ProcGlobal->custodianLatch);
 }
 
 /*
@@ -3315,7 +3327,8 @@ RemoveStagedPgTempDirs(const char *spc_dir)
 	struct dirent *de;
 
 	dir = AllocateDir(spc_dir);
-	while ((de = ReadDirExtended(dir, spc_dir, LOG)) != NULL)
+	while (!ShutdownRequestPending &&
+		   (de = ReadDirExtended(dir, spc_dir, LOG)) != NULL)
 	{
 		if (strncmp(de->d_name, PG_TEMP_DIR_TO_REMOVE_PREFIX,
 					strlen(PG_TEMP_DIR_TO_REMOVE_PREFIX)) != 0)
@@ -3354,7 +3367,8 @@ RemovePgTempDir(const char *tmpdirname, bool missing_ok, bool unlink_all)
 	if (temp_dir == NULL && errno == ENOENT && missing_ok)
 		return;
 
-	while ((temp_de = ReadDirExtended(temp_dir, tmpdirname, LOG)) != NULL)
+	while (!ShutdownRequestPending &&
+		   (temp_de = ReadDirExtended(temp_dir, tmpdirname, LOG)) != NULL)
 	{
 		if (strcmp(temp_de->d_name, ".") == 0 ||
 			strcmp(temp_de->d_name, "..") == 0)
-- 
2.25.1

