From a870303f4bd62b7c653a4bef53ed6d2748268bc0 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@enterprisedb.com>
Date: Tue, 1 Feb 2022 16:50:31 -0500
Subject: [PATCH 1/3] do only critical work during single-user vacuum?

Feb 01 John Naylor
---
 doc/src/sgml/maintenance.sgml       |  12 ++--
 doc/src/sgml/ref/vacuum.sgml        |  22 ++++++
 src/backend/access/transam/varsup.c |   4 +-
 src/backend/commands/vacuum.c       | 107 +++++++++++++++++++++++++---
 src/include/commands/vacuum.h       |   5 ++
 5 files changed, 134 insertions(+), 16 deletions(-)

diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml
index 36f975b1e5b..5c360499504 100644
--- a/doc/src/sgml/maintenance.sgml
+++ b/doc/src/sgml/maintenance.sgml
@@ -629,17 +629,19 @@ HINT:  To avoid a database shutdown, execute a database-wide VACUUM in that data
 
 <programlisting>
 ERROR:  database is not accepting commands to avoid wraparound data loss in database "mydb"
-HINT:  Stop the postmaster and vacuum that database in single-user mode.
+HINT:  Stop the postmaster and run "VACUUM (EMERGENCY)" in that database in single-user mode.
 </programlisting>
 
     The three-million-transaction safety margin exists to let the
     administrator recover without data loss, by manually executing the
-    required <command>VACUUM</command> commands.  However, since the system will not
+    required <command>VACUUM</command> command.  However, since the system will not
     execute commands once it has gone into the safety shutdown mode,
     the only way to do this is to stop the server and start the server in single-user
-    mode to execute <command>VACUUM</command>.  The shutdown mode is not enforced
-    in single-user mode.  See the <xref linkend="app-postgres"/> reference
-    page for details about using single-user mode.
+    mode to execute <command>VACUUM (EMERGENCY)</command>. The <literal>EMERGENCY</literal> option
+    is recommended, since it enables the vacuum to complete as quickly as possible
+    while still leaving a safety margin for when the system comes back online again.
+    The shutdown mode is not enforced in single-user mode.
+    See the <xref linkend="app-postgres"/> reference page for details about using single-user mode.
    </para>
 
    <sect3 id="vacuum-for-multixact-wraparound">
diff --git a/doc/src/sgml/ref/vacuum.sgml b/doc/src/sgml/ref/vacuum.sgml
index 3df32b58ee6..2dab01ff376 100644
--- a/doc/src/sgml/ref/vacuum.sgml
+++ b/doc/src/sgml/ref/vacuum.sgml
@@ -295,6 +295,28 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] [ ANALYZE ] [ <replaceable class="paramet
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><literal>EMERGENCY</literal></term>
+    <listitem>
+     <para>
+      Special vacuum mode intended for when a forced shutdown has happened to avoid transaction ID wraparound.
+      It performs a database-wide vacuum on tables, toast tables, and materialized views whose
+      xid age or mxid age is older than 1 billion. To complete as quickly as possible, an emergency
+      vacuum will skip truncation and index cleanup, and will skip toast tables whose age has not
+      exceeded the cutoff.
+     </para>
+
+     <para>
+      While this option could be used while the postmaster is running, it is expected that the wraparound
+      failsafe mechanism will automatically work in the same way to prevent imminent shutdown.
+      When <literal>EMERGENCY</literal> is specified no tables may be listed, since it is designed to
+      select candidate relations from the entire database.
+      The only other option that may be combined with <literal>VERBOSE</literal>, although in single-user mode no client messages are
+      output.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><replaceable class="parameter">boolean</replaceable></term>
     <listitem>
diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c
index 748120a0125..ee9d33dba37 100644
--- a/src/backend/access/transam/varsup.c
+++ b/src/backend/access/transam/varsup.c
@@ -128,14 +128,14 @@ GetNewTransactionId(bool isSubXact)
 						(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 						 errmsg("database is not accepting commands to avoid wraparound data loss in database \"%s\"",
 								oldest_datname),
-						 errhint("Stop the postmaster and vacuum that database in single-user mode.\n"
+						 errhint("Stop the postmaster and run \"VACUUM (EMERGENCY)\" in that database in single-user mode.\n"
 								 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
 			else
 				ereport(ERROR,
 						(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 						 errmsg("database is not accepting commands to avoid wraparound data loss in database with OID %u",
 								oldest_datoid),
-						 errhint("Stop the postmaster and vacuum that database in single-user mode.\n"
+						 errhint("Stop the postmaster and run \"VACUUM (EMERGENCY)\" in that database in single-user mode.\n"
 								 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
 		}
 		else if (TransactionIdFollowsOrEquals(xid, xidWarnLimit))
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index d1dadc54e47..e36159e827f 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -52,6 +52,7 @@
 #include "storage/proc.h"
 #include "storage/procarray.h"
 #include "utils/acl.h"
+#include "utils/builtins.h"
 #include "utils/fmgroids.h"
 #include "utils/guc.h"
 #include "utils/memutils.h"
@@ -86,7 +87,7 @@ int			VacuumCostBalanceLocal = 0;
 
 /* non-export function prototypes */
 static List *expand_vacuum_rel(VacuumRelation *vrel, int options);
-static List *get_all_vacuum_rels(int options);
+static List *get_all_vacuum_rels(VacuumParams *params);
 static void vac_truncate_clog(TransactionId frozenXID,
 							  MultiXactId minMulti,
 							  TransactionId lastSaneFrozenXid,
@@ -114,6 +115,7 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
 	bool		full = false;
 	bool		disable_page_skipping = false;
 	bool		process_toast = true;
+	bool		emergency = false;
 	ListCell   *lc;
 
 	/* index_cleanup and truncate values unspecified for now */
@@ -123,6 +125,10 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
 	/* By default parallel vacuum is enabled */
 	params.nworkers = 0;
 
+	/* By default don't skip any tables */
+	params.min_xid_age = 0;
+	params.min_mxid_age = 0;
+
 	/* Parse options list */
 	foreach(lc, vacstmt->options)
 	{
@@ -200,6 +206,8 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
 					params.nworkers = nworkers;
 			}
 		}
+		else if (strcmp(opt->defname, "emergency") == 0)
+			emergency = defGetBoolean(opt);
 		else
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
@@ -216,7 +224,8 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
 		(freeze ? VACOPT_FREEZE : 0) |
 		(full ? VACOPT_FULL : 0) |
 		(disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0) |
-		(process_toast ? VACOPT_PROCESS_TOAST : 0);
+		(process_toast ? VACOPT_PROCESS_TOAST : 0) |
+		(emergency ? VACOPT_EMERGENCY : 0);
 
 	/* sanity checks on options */
 	Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
@@ -246,17 +255,70 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
 		}
 	}
 
+	if (emergency)
+	{
+		/* exclude incompatible options */
+		foreach(lc, vacstmt->options)
+		{
+			DefElem    *opt = (DefElem *) lfirst(lc);
+
+			if (strcmp(opt->defname, "emergency") != 0 &&
+				strcmp(opt->defname, "verbose") != 0 &&
+				defGetBoolean(opt))
+
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+								errmsg("option \"%s\" is incompatible with EMERGENCY", opt->defname),
+								parser_errposition(pstate, opt->location)));
+		}
+
+		/* prevent specifying a list of tables, to keep it simple */
+		if (vacstmt->rels != NULL)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						errmsg("a relation list is not supported with the EMERGENCY option")));
+
+		/* skip unnecessary work, similar to failsafe mode */
+
+		params.index_cleanup = VACOPTVALUE_DISABLED;
+		params.truncate = VACOPTVALUE_DISABLED;
+
+		/* Hard-code 1 billion for the thresholds to avoid making assumptions
+		* about the configuration. This leaves some headroom for when the user
+		* returns to normal mode while also minimizing work.
+		* WIP: consider passing these constants via the params struct
+		*/
+		// params.min_xid_age = 1000 * 1000 * 1000;
+		// params.min_mxid_age = 1000 * 1000 * 1000;
+		// FIXME to speed up testing
+		params.min_xid_age = 1000 * 1000;
+		params.min_mxid_age = 1000 * 1000;
+	}
+
 	/*
-	 * All freeze ages are zero if the FREEZE option is given; otherwise pass
-	 * them as -1 which means to use the default values.
+	 * Set freeze ages to zero where appropriate; otherwise pass
+	 * them as -1 which means to use the configured values.
 	 */
 	if (params.options & VACOPT_FREEZE)
 	{
+		/* All freeze ages are zero if the FREEZE option is given */
 		params.freeze_min_age = 0;
 		params.freeze_table_age = 0;
 		params.multixact_freeze_min_age = 0;
 		params.multixact_freeze_table_age = 0;
 	}
+	else if (params.options & VACOPT_EMERGENCY)
+	{
+		/* It's highly likely any table selected will be eligible for aggressive vacuum, but make sure */
+		params.freeze_table_age = 0;
+		params.multixact_freeze_table_age = 0;
+
+		// WIP: It might be worth trying to do less work here, such as max age / 2 :
+		// params.freeze_min_age = 100 * 1000 * 1000;
+		// params.multixact_freeze_min_age = 200 * 1000 * 1000;
+		params.freeze_min_age = -1;
+		params.multixact_freeze_min_age = -1;
+	}
 	else
 	{
 		params.freeze_min_age = -1;
@@ -404,7 +466,7 @@ vacuum(List *relations, VacuumParams *params,
 		relations = newrels;
 	}
 	else
-		relations = get_all_vacuum_rels(params->options);
+		relations = get_all_vacuum_rels(params);
 
 	/*
 	 * Decide whether we need to start/commit our own transactions.
@@ -461,7 +523,10 @@ vacuum(List *relations, VacuumParams *params,
 		ListCell   *cur;
 
 		in_vacuum = true;
-		VacuumCostActive = (VacuumCostDelay > 0);
+		if (params->VACOPT_EMERGENCY)
+			VacuumCostActive = false;
+		else
+			VacuumCostActive = (VacuumCostDelay > 0);
 		VacuumCostBalance = 0;
 		VacuumPageHit = 0;
 		VacuumPageMiss = 0;
@@ -888,12 +953,14 @@ expand_vacuum_rel(VacuumRelation *vrel, int options)
  * the current database.  The list is built in vac_context.
  */
 static List *
-get_all_vacuum_rels(int options)
+get_all_vacuum_rels(VacuumParams *params)
 {
 	List	   *vacrels = NIL;
 	Relation	pgclass;
 	TableScanDesc scan;
 	HeapTuple	tuple;
+	int32 		table_xid_age,
+				table_mxid_age;
 
 	pgclass = table_open(RelationRelationId, AccessShareLock);
 
@@ -906,15 +973,37 @@ get_all_vacuum_rels(int options)
 		Oid			relid = classForm->oid;
 
 		/* check permissions of relation */
-		if (!vacuum_is_relation_owner(relid, classForm, options))
+		if (!vacuum_is_relation_owner(relid, classForm, params->options))
 			continue;
 
+		if (params->options & VACOPT_EMERGENCY)
+		{
+			/*
+			* Only consider relations able to hold unfrozen XIDs (anything else
+			* should have InvalidTransactionId in relfrozenxid anyway).
+			*/
+			if (classForm->relkind != RELKIND_RELATION &&
+				classForm->relkind != RELKIND_MATVIEW &&
+				classForm->relkind != RELKIND_TOASTVALUE)
+			{
+				Assert(!TransactionIdIsValid(classForm->relfrozenxid));
+				Assert(!MultiXactIdIsValid(classForm->relminmxid));
+				continue;
+			}
+
+			table_xid_age = DirectFunctionCall1(xid_age, classForm->relfrozenxid);
+			table_mxid_age = DirectFunctionCall1(mxid_age, classForm->relminmxid);
+
+			if ((table_xid_age < params->min_xid_age) &&
+				(table_mxid_age < params->min_mxid_age))
+				continue;
+		}
 		/*
 		 * We include partitioned tables here; depending on which operation is
 		 * to be performed, caller will decide whether to process or ignore
 		 * them.
 		 */
-		if (classForm->relkind != RELKIND_RELATION &&
+		else if (classForm->relkind != RELKIND_RELATION &&
 			classForm->relkind != RELKIND_MATVIEW &&
 			classForm->relkind != RELKIND_PARTITIONED_TABLE)
 			continue;
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 5d0bdfa4279..8aab6641628 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -188,6 +188,7 @@ typedef struct VacAttrStats
 #define VACOPT_SKIP_LOCKED 0x20 /* skip if cannot get lock */
 #define VACOPT_PROCESS_TOAST 0x40	/* process the TOAST table, if any */
 #define VACOPT_DISABLE_PAGE_SKIPPING 0x80	/* don't skip any pages */
+#define VACOPT_EMERGENCY 0x100	/* do minimal freezing work to prevent or get out of shutdown */
 
 /*
  * Values used by index_cleanup and truncate params.
@@ -233,6 +234,10 @@ typedef struct VacuumParams
 	 * disabled.
 	 */
 	int			nworkers;
+
+	/* cutoff ages for selecting tables to vacuum, 0 is the default */
+	int 		min_xid_age;
+	int 		min_mxid_age;
 } VacuumParams;
 
 /*
-- 
2.17.1

