From da5696b9026fa916ae991f7da616062c5b19e705 Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Date: Thu, 31 Aug 2023 11:49:10 +0900
Subject: [PATCH v29 1/2] Introduce undo log implementation

This patch adds a simple implementation of UNDO log feature.
---
 src/backend/access/transam/Makefile        |   1 +
 src/backend/access/transam/rmgr.c          |   4 +-
 src/backend/access/transam/simpleundolog.c | 343 +++++++++++++++++++++
 src/backend/access/transam/twophase.c      |   3 +
 src/backend/access/transam/xact.c          |  24 ++
 src/backend/access/transam/xlog.c          |  20 +-
 src/backend/catalog/storage.c              | 171 ++++++++++
 src/backend/storage/file/reinit.c          |  78 +++++
 src/backend/storage/smgr/smgr.c            |   9 +
 src/bin/initdb/initdb.c                    |  17 +
 src/bin/pg_rewind/parsexlog.c              |   2 +-
 src/bin/pg_waldump/rmgrdesc.c              |   2 +-
 src/include/access/rmgr.h                  |   2 +-
 src/include/access/rmgrlist.h              |  44 +--
 src/include/access/simpleundolog.h         |  36 +++
 src/include/catalog/storage.h              |   3 +
 src/include/catalog/storage_ulog.h         |  35 +++
 src/include/catalog/storage_xlog.h         |   9 +
 src/include/storage/reinit.h               |   2 +
 src/include/storage/smgr.h                 |   1 +
 src/tools/pgindent/typedefs.list           |   6 +
 21 files changed, 780 insertions(+), 32 deletions(-)
 create mode 100644 src/backend/access/transam/simpleundolog.c
 create mode 100644 src/include/access/simpleundolog.h
 create mode 100644 src/include/catalog/storage_ulog.h

diff --git a/src/backend/access/transam/Makefile b/src/backend/access/transam/Makefile
index 661c55a9db..531505cbbd 100644
--- a/src/backend/access/transam/Makefile
+++ b/src/backend/access/transam/Makefile
@@ -21,6 +21,7 @@ OBJS = \
 	rmgr.o \
 	slru.o \
 	subtrans.o \
+	simpleundolog.o \
 	timeline.o \
 	transam.o \
 	twophase.o \
diff --git a/src/backend/access/transam/rmgr.c b/src/backend/access/transam/rmgr.c
index 7d67eda5f7..840cbdecd3 100644
--- a/src/backend/access/transam/rmgr.c
+++ b/src/backend/access/transam/rmgr.c
@@ -35,8 +35,8 @@
 #include "utils/relmapper.h"
 
 /* must be kept in sync with RmgrData definition in xlog_internal.h */
-#define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup,mask,decode) \
-	{ name, redo, desc, identify, startup, cleanup, mask, decode },
+#define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup,mask,decode,undo) \
+	{ name, redo, desc, identify, startup, cleanup, mask, decode},
 
 RmgrData	RmgrTable[RM_MAX_ID + 1] = {
 #include "access/rmgrlist.h"
diff --git a/src/backend/access/transam/simpleundolog.c b/src/backend/access/transam/simpleundolog.c
new file mode 100644
index 0000000000..ebbacce298
--- /dev/null
+++ b/src/backend/access/transam/simpleundolog.c
@@ -0,0 +1,343 @@
+/*-------------------------------------------------------------------------
+ *
+ * simpleundolog.c
+ *		Simple implementation of PostgreSQL transaction-undo-log manager
+ *
+ * In this module, procedures required during a transaction abort are
+ * logged. Persisting this information becomes crucial, particularly for
+ * ensuring reliable post-processing during the restart following a transaction
+ * crash. At present, in this module, logging of information is performed by
+ * simply appending data to a created file.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/access/transam/clog.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/simpleundolog.h"
+#include "access/twophase_rmgr.h"
+#include "access/parallel.h"
+#include "access/xact.h"
+#include "catalog/storage_ulog.h"
+#include "storage/fd.h"
+
+#define ULOG_FILE_MAGIC 0x12345678
+
+typedef struct UndoLogFileHeader
+{
+	int32 magic;
+	bool  prepared;
+} UndoLogFileHeader;
+
+typedef struct UndoDescData
+{
+	const char *name;
+	void	(*rm_undo) (SimpleUndoLogRecord *record, bool prepared);
+} UndoDescData;
+
+/* must be kept in sync with RmgrData definition in xlog_internal.h */
+#define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup,mask,decode,undo) \
+	{ name, undo },
+
+UndoDescData	UndoRoutines[RM_MAX_ID + 1] = {
+#include "access/rmgrlist.h"
+};
+#undef PG_RMGR
+
+#if defined(O_DSYNC)
+static int undo_sync_mode = O_DSYNC;
+#elif defined(O_SYNC)
+static int undo_sync_mode = O_SYNC;
+#else
+static int undo_sync_mode = 0;
+#endif
+
+static char current_ulogfile_name[MAXPGPATH];
+static int	current_ulogfile_fd = -1;
+static int  current_xid = InvalidTransactionId;
+static UndoLogFileHeader current_fhdr;
+
+static void
+undolog_check_file_header(void)
+{
+	if (read(current_ulogfile_fd, &current_fhdr, sizeof(current_fhdr)) < 0)
+		ereport(PANIC,
+				errcode_for_file_access(),
+				errmsg("could not read undolog file \"%s\": %m",
+					   current_ulogfile_name));
+	if (current_fhdr.magic != ULOG_FILE_MAGIC)
+		ereport(PANIC,
+				errcode_for_file_access(),
+				errmsg("invalid undolog file \"%s\": magic don't match",
+					   current_ulogfile_name));
+}
+
+static bool
+undolog_open_current_file(TransactionId xid, bool forread, bool append)
+{
+	int omode;
+
+	if (current_ulogfile_fd >= 0)
+	{
+		/* use existing open file */
+		if (current_xid == xid)
+		{
+			if (append)
+				return true;
+
+			if (lseek(current_ulogfile_fd,
+					  sizeof(UndoLogFileHeader), SEEK_SET) < 0)
+				ereport(PANIC,
+						errcode_for_file_access(),
+						errmsg("could not seek undolog file \"%s\": %m",
+							   current_ulogfile_name));
+		}
+
+		close(current_ulogfile_fd);
+		current_ulogfile_fd = -1;
+		ReleaseExternalFD();
+	}
+
+	current_xid = xid;
+	if (!TransactionIdIsValid(xid))
+		return false;
+
+	omode = PG_BINARY | undo_sync_mode;
+
+	if (forread)
+		omode |= O_RDONLY;
+	else
+	{
+		omode |= O_RDWR;
+
+		if (!append)
+			omode |= O_TRUNC;
+	}
+
+	snprintf(current_ulogfile_name, MAXPGPATH, "%s/%08x",
+			 SIMPLE_UNDOLOG_DIR, xid);
+	current_ulogfile_fd = BasicOpenFile(current_ulogfile_name, omode);
+	if (current_ulogfile_fd >= 0)
+		undolog_check_file_header();
+	else
+	{
+		if (forread)
+			return false;
+
+		current_fhdr.magic = ULOG_FILE_MAGIC;
+		current_fhdr.prepared = false;
+
+		omode |= O_CREAT;
+		current_ulogfile_fd = BasicOpenFile(current_ulogfile_name, omode);
+		if (current_ulogfile_fd < 0)
+			ereport(PANIC,
+					errcode_for_file_access(),
+					errmsg("could not create undolog file \"%s\": %m",
+						   current_ulogfile_name));
+
+		if (write(current_ulogfile_fd, &current_fhdr, sizeof(current_fhdr)) < 0)
+			ereport(PANIC,
+					errcode_for_file_access(),
+					errmsg("could not write undolog file \"%s\": %m",
+						   current_ulogfile_name));
+	}
+
+	/*
+	 * move file pointer to the end of the file. we do this not using O_APPEND,
+	 * to allow us to modify data at any location in the file. We already moved
+	 * to the first record in the case of !append.
+	 */
+	if (append)
+	{
+		if (lseek(current_ulogfile_fd, 0, SEEK_END) < 0)
+			ereport(PANIC,
+					errcode_for_file_access(),
+					errmsg("could not seek undolog file \"%s\": %m",
+						   current_ulogfile_name));
+	}
+	ReserveExternalFD();
+
+	return true;
+}
+
+/*
+ * Write ulog record
+ */
+void
+SimpleUndoLogWrite(RmgrId rmgr, uint8 info,
+				   TransactionId xid, void *data, int len)
+{
+	int reclen = sizeof(SimpleUndoLogRecord) + len;
+	SimpleUndoLogRecord *rec = palloc(reclen);
+	pg_crc32c	undodata_crc;
+
+	Assert(!IsParallelWorker());
+	Assert(xid != InvalidTransactionId);
+
+	undolog_open_current_file(xid, false, true);
+
+	rec->ul_tot_len = reclen;
+	rec->ul_rmid = rmgr;
+	rec->ul_info = info;
+	rec->ul_xid = current_xid;
+
+	memcpy((char *)rec + sizeof(SimpleUndoLogRecord), data, len);
+
+	/* Calculate CRC of the data */
+	INIT_CRC32C(undodata_crc);
+	COMP_CRC32C(undodata_crc, rec,
+				reclen - offsetof(SimpleUndoLogRecord, ul_rmid));
+	rec->ul_crc = undodata_crc;
+
+
+	if (write(current_ulogfile_fd, rec, reclen) < 0)
+				ereport(ERROR,
+						errcode_for_file_access(),
+						errmsg("could not write to undolog file \"%s\": %m",
+							   current_ulogfile_name));
+}
+
+static void
+SimpleUndoLogUndo(bool cleanup)
+{
+	int		bufsize;
+	char   *buf;
+
+	bufsize = 1024;
+	buf = palloc(bufsize);
+
+	Assert(current_ulogfile_fd >= 0);
+
+	while (read(current_ulogfile_fd, buf, sizeof(SimpleUndoLogRecord)) ==
+		   sizeof(SimpleUndoLogRecord))
+	{
+		SimpleUndoLogRecord *rec = (SimpleUndoLogRecord *) buf;
+		int readlen = rec->ul_tot_len - sizeof(SimpleUndoLogRecord);
+		int ret;
+
+		if (rec->ul_tot_len > bufsize)
+		{
+			bufsize *= 2;
+			buf = repalloc(buf, bufsize);
+		}
+
+		ret = read(current_ulogfile_fd,
+				   buf + sizeof(SimpleUndoLogRecord), readlen);
+		if (ret != readlen)
+		{
+			if (ret < 0)
+				ereport(ERROR,
+						errcode_for_file_access(),
+						errmsg("could not read undo log file \"%s\": %m",
+							   current_ulogfile_name));
+
+			ereport(ERROR,
+					errcode_for_file_access(),
+					errmsg("reading undo log expected %d bytes, but actually %d: %s",
+						   readlen, ret, current_ulogfile_name));
+
+		}
+
+		UndoRoutines[rec->ul_rmid].rm_undo(rec,
+										   current_fhdr.prepared && cleanup);
+	}
+}
+
+void
+AtEOXact_SimpleUndoLog(bool isCommit, TransactionId xid)
+{
+	if (IsParallelWorker())
+		return;
+
+	if (!undolog_open_current_file(xid, true, false))
+		return;
+
+	if (!isCommit)
+		SimpleUndoLogUndo(false);
+
+	if (current_ulogfile_fd > 0)
+	{
+		if (close(current_ulogfile_fd) != 0)
+			ereport(PANIC, errcode_for_file_access(),
+					errmsg("could not close file \"%s\": %m",
+						   current_ulogfile_name));
+
+		current_ulogfile_fd = -1;
+		ReleaseExternalFD();
+		durable_unlink(current_ulogfile_name, FATAL);
+	}
+
+	return;
+}
+
+void
+UndoLogCleanup(void)
+{
+	DIR		   *dirdesc;
+	struct dirent *de;
+	char      **loglist;
+	int			loglistspace = 128;
+	int			loglistlen = 0;
+	int			i;
+
+	loglist = palloc(sizeof(char*) * loglistspace);
+
+	dirdesc = AllocateDir(SIMPLE_UNDOLOG_DIR);
+	while ((de = ReadDir(dirdesc, SIMPLE_UNDOLOG_DIR)) != NULL)
+	{
+		if (strspn(de->d_name, "01234567890abcdef") < strlen(de->d_name))
+			continue;
+
+		if (loglistlen >= loglistspace)
+		{
+			loglistspace *= 2;
+			loglist = repalloc(loglist, sizeof(char*) * loglistspace);
+		}
+		loglist[loglistlen++] = pstrdup(de->d_name);
+	}
+
+	for (i = 0 ; i < loglistlen ; i++)
+	{
+		snprintf(current_ulogfile_name, MAXPGPATH, "%s/%s",
+				 SIMPLE_UNDOLOG_DIR, loglist[i]);
+		current_ulogfile_fd =  BasicOpenFile(current_ulogfile_name,
+											O_RDWR | PG_BINARY |
+											undo_sync_mode);
+		undolog_check_file_header();
+		SimpleUndoLogUndo(true);
+		if (close(current_ulogfile_fd) != 0)
+			ereport(PANIC, errcode_for_file_access(),
+					errmsg("could not close file \"%s\": %m",
+						   current_ulogfile_name));
+		current_ulogfile_fd = -1;
+
+		/* do not remove ulog files for prepared transactions */
+		if (!current_fhdr.prepared)
+			durable_unlink(current_ulogfile_name, FATAL);
+	}
+}
+
+void
+SimpleUndoLogSetPrpared(TransactionId xid, bool prepared)
+{
+	Assert(xid != InvalidTransactionId);
+
+	undolog_open_current_file(xid, false, true);
+	current_fhdr.prepared = prepared;
+	if (lseek(current_ulogfile_fd, 0, SEEK_SET) < 0)
+		ereport(PANIC,
+				errcode_for_file_access(),
+				errmsg("could not seek undolog file \"%s\": %m",
+					   current_ulogfile_name));
+
+	if (write(current_ulogfile_fd, &current_fhdr, sizeof(current_fhdr)) < 0)
+		ereport(PANIC,
+				errcode_for_file_access(),
+				errmsg("could not write undolog file \"%s\": %m",
+					   current_ulogfile_name));
+}
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index c6af8cfd7e..a32ec28eb0 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -78,6 +78,7 @@
 
 #include "access/commit_ts.h"
 #include "access/htup_details.h"
+#include "access/simpleundolog.h"
 #include "access/subtrans.h"
 #include "access/transam.h"
 #include "access/twophase.h"
@@ -1565,6 +1566,8 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
 									   abortstats,
 									   gid);
 
+	AtEOXact_SimpleUndoLog(isCommit, xid);
+
 	ProcArrayRemove(proc, latestXid);
 
 	/*
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index 8daaa535ed..8bbe8fdb08 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -24,6 +24,7 @@
 #include "access/multixact.h"
 #include "access/parallel.h"
 #include "access/subtrans.h"
+#include "access/simpleundolog.h"
 #include "access/transam.h"
 #include "access/twophase.h"
 #include "access/xact.h"
@@ -2224,6 +2225,9 @@ CommitTransaction(void)
 	 */
 	smgrDoPendingSyncs(true, is_parallel_worker);
 
+	/* Likewise perform uncommitted storage file deletion. */
+	smgrDoPendingCleanups(true);
+
 	/* close large objects before lower-level cleanup */
 	AtEOXact_LargeObject(true);
 
@@ -2365,6 +2369,7 @@ CommitTransaction(void)
 	AtEOXact_on_commit_actions(true);
 	AtEOXact_Namespace(true, is_parallel_worker);
 	AtEOXact_SMgr();
+	AtEOXact_SimpleUndoLog(true, GetCurrentTransactionIdIfAny());
 	AtEOXact_Files(true);
 	AtEOXact_ComboCid();
 	AtEOXact_HashTables(true);
@@ -2475,6 +2480,9 @@ PrepareTransaction(void)
 	 */
 	smgrDoPendingSyncs(true, false);
 
+	/* Likewise perform uncommitted storage file deletion. */
+	smgrDoPendingCleanups(true);
+
 	/* close large objects before lower-level cleanup */
 	AtEOXact_LargeObject(true);
 
@@ -2799,6 +2807,7 @@ AbortTransaction(void)
 	AfterTriggerEndXact(false); /* 'false' means it's abort */
 	AtAbort_Portals();
 	smgrDoPendingSyncs(false, is_parallel_worker);
+	smgrDoPendingCleanups(false);
 	AtEOXact_LargeObject(false);
 	AtAbort_Notify();
 	AtEOXact_RelationMap(false, is_parallel_worker);
@@ -2866,6 +2875,7 @@ AbortTransaction(void)
 		AtEOXact_on_commit_actions(false);
 		AtEOXact_Namespace(false, is_parallel_worker);
 		AtEOXact_SMgr();
+		AtEOXact_SimpleUndoLog(false, GetCurrentTransactionIdIfAny());
 		AtEOXact_Files(false);
 		AtEOXact_ComboCid();
 		AtEOXact_HashTables(false);
@@ -5002,6 +5012,8 @@ CommitSubTransaction(void)
 	AtEOSubXact_Inval(true);
 	AtSubCommit_smgr();
 
+	AtEOXact_SimpleUndoLog(true, GetCurrentTransactionIdIfAny());
+
 	/*
 	 * The only lock we actually release here is the subtransaction XID lock.
 	 */
@@ -5181,6 +5193,7 @@ AbortSubTransaction(void)
 							 RESOURCE_RELEASE_AFTER_LOCKS,
 							 false, false);
 		AtSubAbort_smgr();
+		AtEOXact_SimpleUndoLog(false, GetCurrentTransactionIdIfAny());
 
 		AtEOXact_GUC(false, s->gucNestLevel);
 		AtEOSubXact_SPI(false, s->subTransactionId);
@@ -5660,7 +5673,10 @@ XactLogCommitRecord(TimestampTz commit_time,
 	if (!TransactionIdIsValid(twophase_xid))
 		info = XLOG_XACT_COMMIT;
 	else
+	{
+		elog(LOG, "COMMIT PREPARED: %d", twophase_xid);
 		info = XLOG_XACT_COMMIT_PREPARED;
+	}
 
 	/* First figure out and collect all the information needed */
 
@@ -6060,6 +6076,8 @@ xact_redo_commit(xl_xact_parsed_commit *parsed,
 		DropRelationFiles(parsed->xlocators, parsed->nrels, true);
 	}
 
+	AtEOXact_SimpleUndoLog(true, xid);
+
 	if (parsed->nstats > 0)
 	{
 		/* see equivalent call for relations above */
@@ -6171,6 +6189,8 @@ xact_redo_abort(xl_xact_parsed_abort *parsed, TransactionId xid,
 		DropRelationFiles(parsed->xlocators, parsed->nrels, true);
 	}
 
+	AtEOXact_SimpleUndoLog(false, xid);
+
 	if (parsed->nstats > 0)
 	{
 		/* see equivalent call for relations above */
@@ -6236,6 +6256,10 @@ xact_redo(XLogReaderState *record)
 	}
 	else if (info == XLOG_XACT_PREPARE)
 	{
+		xl_xact_prepare *xlrec = (xl_xact_prepare *) XLogRecGetData(record);
+
+		AtEOXact_SimpleUndoLog(true, xlrec->xid);
+
 		/*
 		 * Store xid and start/end pointers of the WAL record in TwoPhaseState
 		 * gxact entry.
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index f6f8adc72a..d6cb9aceec 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -51,6 +51,7 @@
 #include "access/heaptoast.h"
 #include "access/multixact.h"
 #include "access/rewriteheap.h"
+#include "access/simpleundolog.h"
 #include "access/subtrans.h"
 #include "access/timeline.h"
 #include "access/transam.h"
@@ -5385,6 +5386,12 @@ StartupXLOG(void)
 		/* Check that the GUCs used to generate the WAL allow recovery */
 		CheckRequiredParameterValues();
 
+		/*
+		 * Perform undo processing. This must be done before resetting unlogged
+		 * relations.
+		 */
+		UndoLogCleanup();
+
 		/*
 		 * We're in recovery, so unlogged relations may be trashed and must be
 		 * reset.  This should be done BEFORE allowing Hot Standby
@@ -5530,14 +5537,17 @@ StartupXLOG(void)
 	}
 
 	/*
-	 * Reset unlogged relations to the contents of their INIT fork. This is
-	 * done AFTER recovery is complete so as to include any unlogged relations
-	 * created during recovery, but BEFORE recovery is marked as having
-	 * completed successfully. Otherwise we'd not retry if any of the post
-	 * end-of-recovery steps fail.
+	 * Process undo logs left ater recovery, then reset unlogged relations to
+	 * the contents of their INIT fork. This is done AFTER recovery is complete
+	 * so as to include any file creations during recovery, but BEFORE recovery
+	 * is marked as having completed successfully. Otherwise we'd not retry if
+	 * any of the post end-of-recovery steps fail.
 	 */
 	if (InRecovery)
+	{
+		UndoLogCleanup();
 		ResetUnloggedRelations(UNLOGGED_RELATION_INIT);
+	}
 
 	/*
 	 * Pre-scan prepared transactions to find out the range of XIDs present.
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index 2add053489..1778801bbd 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -19,16 +19,20 @@
 
 #include "postgres.h"
 
+#include "access/amapi.h"
 #include "access/parallel.h"
 #include "access/visibilitymap.h"
 #include "access/xact.h"
 #include "access/xlog.h"
 #include "access/xloginsert.h"
 #include "access/xlogutils.h"
+#include "access/simpleundolog.h"
 #include "catalog/storage.h"
 #include "catalog/storage_xlog.h"
+#include "catalog/storage_ulog.h"
 #include "miscadmin.h"
 #include "storage/freespace.h"
+#include "storage/reinit.h"
 #include "storage/smgr.h"
 #include "utils/hsearch.h"
 #include "utils/memutils.h"
@@ -66,6 +70,19 @@ typedef struct PendingRelDelete
 	struct PendingRelDelete *next;	/* linked-list link */
 } PendingRelDelete;
 
+#define	PCOP_UNLINK_FORK		(1 << 0)
+
+typedef struct PendingCleanup
+{
+	RelFileLocator rlocator;	/* relation that need a cleanup */
+	int			op;				/* operation mask */
+	ForkNumber	unlink_forknum; /* forknum to unlink */
+	BackendId	backend;		/* InvalidBackendId if not a temp rel */
+	bool		atCommit;		/* T=delete at commit; F=delete at abort */
+	int			nestLevel;		/* xact nesting level of request */
+	struct PendingCleanup *next;	/* linked-list link */
+}			PendingCleanup;
+
 typedef struct PendingRelSync
 {
 	RelFileLocator rlocator;
@@ -73,6 +90,7 @@ typedef struct PendingRelSync
 } PendingRelSync;
 
 static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
+static PendingCleanup * pendingCleanups = NULL; /* head of linked list */
 static HTAB *pendingSyncHash = NULL;
 
 
@@ -148,6 +166,19 @@ RelationCreateStorage(RelFileLocator rlocator, char relpersistence,
 	srel = smgropen(rlocator, backend);
 	smgrcreate(srel, MAIN_FORKNUM, false);
 
+	/* Write undo log, this requires irrelevant to needs_wal */
+	if (register_delete)
+	{
+		ul_uncommitted_storage ul_storage;
+
+		ul_storage.rlocator = rlocator;
+		ul_storage.forknum = MAIN_FORKNUM;
+		ul_storage.remove = true;
+		SimpleUndoLogWrite(RM_SMGR_ID, ULOG_SMGR_UNCOMMITED_STORAGE,
+						   GetCurrentTransactionId(),
+						   &ul_storage, sizeof(ul_storage));
+	}
+
 	if (needs_wal)
 		log_smgrcreate(&srel->smgr_rlocator.locator, MAIN_FORKNUM);
 
@@ -191,12 +222,32 @@ log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
 	 */
 	xlrec.rlocator = *rlocator;
 	xlrec.forkNum = forkNum;
+	xlrec.xid = GetTopTransactionId();
 
 	XLogBeginInsert();
 	XLogRegisterData((char *) &xlrec, sizeof(xlrec));
 	XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLR_SPECIAL_REL_UPDATE);
 }
 
+/*
+ * Perform XLogInsert of an XLOG_SMGR_UNLINK record to WAL.
+ */
+void
+log_smgrunlink(const RelFileLocator *rlocator, ForkNumber forkNum)
+{
+	xl_smgr_unlink xlrec;
+
+	/*
+	 * Make an XLOG entry reporting the file unlink.
+	 */
+	xlrec.rlocator = *rlocator;
+	xlrec.forkNum = forkNum;
+
+	XLogBeginInsert();
+	XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+	XLogInsert(RM_SMGR_ID, XLOG_SMGR_UNLINK | XLR_SPECIAL_REL_UPDATE);
+}
+
 /*
  * RelationDropStorage
  *		Schedule unlinking of physical storage at transaction commit.
@@ -711,6 +762,75 @@ smgrDoPendingDeletes(bool isCommit)
 	}
 }
 
+/*
+ *	smgrDoPendingUnmark() -- Clean up work that emits WAL records
+ *
+ *  The operations handled in the function emits WAL records, which must be
+ *  part of the current transaction.
+ */
+void
+smgrDoPendingCleanups(bool isCommit)
+{
+	int			nestLevel = GetCurrentTransactionNestLevel();
+	PendingCleanup *pending;
+	PendingCleanup *prev;
+	PendingCleanup *next;
+
+	prev = NULL;
+	for (pending = pendingCleanups; pending != NULL; pending = next)
+	{
+		next = pending->next;
+		if (pending->nestLevel < nestLevel)
+		{
+			/* outer-level entries should not be processed yet */
+			prev = pending;
+		}
+		else
+		{
+			/* unlink list entry first, so we don't retry on failure */
+			if (prev)
+				prev->next = next;
+			else
+				pendingCleanups = next;
+
+			/* do cleanup if called for */
+			if (pending->atCommit == isCommit)
+			{
+				SMgrRelation srel;
+
+				srel = smgropen(pending->rlocator, pending->backend);
+
+				Assert((pending->op & ~(PCOP_UNLINK_FORK)) == 0);
+
+				if (pending->op & PCOP_UNLINK_FORK)
+				{
+					BlockNumber firstblock = 0;
+
+					/*
+					 * Unlink the fork file. Currently this operation is
+					 * applied only to init-forks. As it is not ceratin that
+					 * the init-fork is not loaded on shared buffers, drop all
+					 * buffers for it.
+					 */
+					Assert(pending->unlink_forknum == INIT_FORKNUM);
+					DropRelationBuffers(srel, &pending->unlink_forknum, 1,
+										&firstblock);
+
+					/* Don't emit wal while recovery. */
+					if (!InRecovery)
+						log_smgrunlink(&pending->rlocator,
+									   pending->unlink_forknum);
+					smgrunlink(srel, pending->unlink_forknum, false);
+				}
+			}
+
+			/* must explicitly free the list entry */
+			pfree(pending);
+			/* prev does not change */
+		}
+	}
+}
+
 /*
  *	smgrDoPendingSyncs() -- Take care of relation syncs at end of xact.
  */
@@ -920,6 +1040,9 @@ PostPrepare_smgr(void)
 		/* must explicitly free the list entry */
 		pfree(pending);
 	}
+
+	/* Mark undolog as prepared */
+	SimpleUndoLogSetPrpared(GetCurrentTransactionId(), true);
 }
 
 
@@ -967,10 +1090,28 @@ smgr_redo(XLogReaderState *record)
 	{
 		xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record);
 		SMgrRelation reln;
+		ul_uncommitted_storage ul_storage;
+
+		/* write undo log */
+		ul_storage.rlocator = xlrec->rlocator;
+		ul_storage.forknum = xlrec->forkNum;
+		ul_storage.remove = true;
+		SimpleUndoLogWrite(RM_SMGR_ID, ULOG_SMGR_UNCOMMITED_STORAGE,
+						   xlrec->xid,
+						   &ul_storage, sizeof(ul_storage));
 
 		reln = smgropen(xlrec->rlocator, InvalidBackendId);
 		smgrcreate(reln, xlrec->forkNum, true);
 	}
+	else if (info == XLOG_SMGR_UNLINK)
+	{
+		xl_smgr_unlink *xlrec = (xl_smgr_unlink *) XLogRecGetData(record);
+		SMgrRelation reln;
+
+		reln = smgropen(xlrec->rlocator, InvalidBackendId);
+		smgrunlink(reln, xlrec->forkNum, true);
+		smgrclose(reln);
+	}
 	else if (info == XLOG_SMGR_TRUNCATE)
 	{
 		xl_smgr_truncate *xlrec = (xl_smgr_truncate *) XLogRecGetData(record);
@@ -1062,3 +1203,33 @@ smgr_redo(XLogReaderState *record)
 	else
 		elog(PANIC, "smgr_redo: unknown op code %u", info);
 }
+
+void
+smgr_undo(SimpleUndoLogRecord *record, bool crash_prepared)
+{
+	uint8	info = record->ul_info;
+
+
+	if (info == ULOG_SMGR_UNCOMMITED_STORAGE)
+	{
+		ul_uncommitted_storage *ul_storage =
+			(ul_uncommitted_storage *) ULogRecGetData(record);
+
+		if (!crash_prepared)
+		{
+			SMgrRelation reln;
+
+			reln = smgropen(ul_storage->rlocator, InvalidBackendId);
+			smgrunlink(reln, ul_storage->forknum, true);
+			smgrclose(reln);
+		}
+		else
+		{
+			/* Inform reinit to ignore this file during cleanup */
+			ResetUnloggedRelationIgnore(ul_storage->rlocator);
+		}
+
+	}
+	else
+		elog(PANIC, "smgr_undo: unknown op code %u", info);
+}
diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c
index fb55371b1b..d302feadb1 100644
--- a/src/backend/storage/file/reinit.c
+++ b/src/backend/storage/file/reinit.c
@@ -34,6 +34,39 @@ typedef struct
 	Oid			reloid;			/* hash key */
 } unlogged_relation_entry;
 
+static char **ignore_files = NULL;
+static int nignore_elems = 0;
+static int nignore_files = 0;
+
+/*
+ * identify the file should be ignored during resetting unlogged relations.
+ */
+static bool
+reinit_ignore_file(const char *dirname, const char *name)
+{
+	char fnamebuf[MAXPGPATH];
+	int len;
+
+	if (nignore_files == 0)
+		return false;
+
+	strncpy(fnamebuf, dirname, MAXPGPATH - 1);
+	strncat(fnamebuf, "/", MAXPGPATH - 1);
+	strncat(fnamebuf, name, MAXPGPATH - 1);
+	fnamebuf[MAXPGPATH - 1] = 0;
+
+	for (int i = 0 ; i < nignore_files ; i++)
+	{
+		/* match ignoring fork part */
+		len = strlen(ignore_files[i]);
+		if (strncmp(fnamebuf, ignore_files[i], len) == 0 &&
+			(fnamebuf[len] == 0 || fnamebuf[len] == '_'))
+			return true;
+	}
+
+	return false;
+}
+
 /*
  * Reset unlogged relations from before the last restart.
  *
@@ -203,6 +236,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
 													 &forkNum))
 				continue;
 
+			/* Skip anything that undo log suggested to ignore */
+			if (reinit_ignore_file(dbspacedirname, de->d_name))
+				continue;
+
 			/* Also skip it unless this is the init fork. */
 			if (forkNum != INIT_FORKNUM)
 				continue;
@@ -243,6 +280,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
 													 &forkNum))
 				continue;
 
+			/* Skip anything that undo log suggested to ignore */
+			if (reinit_ignore_file(dbspacedirname, de->d_name))
+				continue;
+
 			/* We never remove the init fork. */
 			if (forkNum == INIT_FORKNUM)
 				continue;
@@ -295,6 +336,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
 													 &forkNum))
 				continue;
 
+			/* Skip anything that undo log suggested to ignore */
+			if (reinit_ignore_file(dbspacedirname, de->d_name))
+				continue;
+
 			/* Also skip it unless this is the init fork. */
 			if (forkNum != INIT_FORKNUM)
 				continue;
@@ -337,6 +382,10 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
 													 &forkNum))
 				continue;
 
+			/* Skip anything that undo log suggested to ignore */
+			if (reinit_ignore_file(dbspacedirname, de->d_name))
+				continue;
+
 			/* Also skip it unless this is the init fork. */
 			if (forkNum != INIT_FORKNUM)
 				continue;
@@ -365,6 +414,35 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
 	}
 }
 
+/*
+ * Record relfilenodes that should be left alone during reinitializing unlogged
+ * relations.
+ */
+void
+ResetUnloggedRelationIgnore(RelFileLocator rloc)
+{
+	RelFileLocatorBackend rbloc;
+
+	if (nignore_files >= nignore_elems)
+	{
+		if (ignore_files == NULL)
+		{
+			nignore_elems = 16;
+			ignore_files = palloc(sizeof(char *) * nignore_elems);
+		}
+		else
+		{
+			nignore_elems *= 2;
+			ignore_files = repalloc(ignore_files,
+									sizeof(char *) * nignore_elems);
+		}
+	}
+
+	rbloc.backend = InvalidBackendId;
+	rbloc.locator = rloc;
+	ignore_files[nignore_files++] = relpath(rbloc, MAIN_FORKNUM);
+}
+
 /*
  * Basic parsing of putative relation filenames.
  *
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 5d0f3d515c..92945c32c3 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -723,6 +723,15 @@ smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
 	smgrsw[reln->smgr_which].smgr_immedsync(reln, forknum);
 }
 
+/*
+ * smgrunlink() -- unlink the storage file
+ */
+void
+smgrunlink(SMgrRelation reln, ForkNumber forknum, bool isRedo)
+{
+	smgrsw[reln->smgr_which].smgr_unlink(reln->smgr_rlocator, forknum, isRedo);
+}
+
 /*
  * AtEOXact_SMgr
  *
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 905b979947..c0938bdf3a 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -303,6 +303,7 @@ void		setup_signals(void);
 void		setup_text_search(void);
 void		create_data_directory(void);
 void		create_xlog_or_symlink(void);
+void		create_ulog(void);
 void		warn_on_mount_point(int error);
 void		initialize_data_directory(void);
 
@@ -2938,6 +2939,21 @@ create_xlog_or_symlink(void)
 	free(subdirloc);
 }
 
+/* Create undo log directory */
+void
+create_ulog(void)
+{
+	char	   *subdirloc;
+
+	/* form name of the place for the subdirectory */
+	subdirloc = psprintf("%s/pg_ulog", pg_data);
+
+	if (mkdir(subdirloc, pg_dir_create_mode) < 0)
+		pg_fatal("could not create directory \"%s\": %m",
+				 subdirloc);
+
+	free(subdirloc);
+}
 
 void
 warn_on_mount_point(int error)
@@ -2972,6 +2988,7 @@ initialize_data_directory(void)
 	create_data_directory();
 
 	create_xlog_or_symlink();
+	create_ulog();
 
 	/* Create required subdirectories (other than pg_wal) */
 	printf(_("creating subdirectories ... "));
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index 27782237d0..87b4659e27 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -28,7 +28,7 @@
  * RmgrNames is an array of the built-in resource manager names, to make error
  * messages a bit nicer.
  */
-#define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup,mask,decode) \
+#define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup,mask,decode,undo) \
   name,
 
 static const char *RmgrNames[RM_MAX_ID + 1] = {
diff --git a/src/bin/pg_waldump/rmgrdesc.c b/src/bin/pg_waldump/rmgrdesc.c
index 6b8c17bb4c..a21009c5b8 100644
--- a/src/bin/pg_waldump/rmgrdesc.c
+++ b/src/bin/pg_waldump/rmgrdesc.c
@@ -32,7 +32,7 @@
 #include "storage/standbydefs.h"
 #include "utils/relmapper.h"
 
-#define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup,mask,decode) \
+#define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup,mask,decode,undo) \
 	{ name, desc, identify},
 
 static const RmgrDescData RmgrDescTable[RM_N_BUILTIN_IDS] = {
diff --git a/src/include/access/rmgr.h b/src/include/access/rmgr.h
index 3b6a497e1b..d705de9256 100644
--- a/src/include/access/rmgr.h
+++ b/src/include/access/rmgr.h
@@ -19,7 +19,7 @@ typedef uint8 RmgrId;
  * Note: RM_MAX_ID must fit in RmgrId; widening that type will affect the XLOG
  * file format.
  */
-#define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup,mask,decode) \
+#define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup,mask,decode,undo) \
 	symname,
 
 typedef enum RmgrIds
diff --git a/src/include/access/rmgrlist.h b/src/include/access/rmgrlist.h
index 463bcb67c5..e15d951000 100644
--- a/src/include/access/rmgrlist.h
+++ b/src/include/access/rmgrlist.h
@@ -25,25 +25,25 @@
  */
 
 /* symbol name, textual name, redo, desc, identify, startup, cleanup, mask, decode */
-PG_RMGR(RM_XLOG_ID, "XLOG", xlog_redo, xlog_desc, xlog_identify, NULL, NULL, NULL, xlog_decode)
-PG_RMGR(RM_XACT_ID, "Transaction", xact_redo, xact_desc, xact_identify, NULL, NULL, NULL, xact_decode)
-PG_RMGR(RM_SMGR_ID, "Storage", smgr_redo, smgr_desc, smgr_identify, NULL, NULL, NULL, NULL)
-PG_RMGR(RM_CLOG_ID, "CLOG", clog_redo, clog_desc, clog_identify, NULL, NULL, NULL, NULL)
-PG_RMGR(RM_DBASE_ID, "Database", dbase_redo, dbase_desc, dbase_identify, NULL, NULL, NULL, NULL)
-PG_RMGR(RM_TBLSPC_ID, "Tablespace", tblspc_redo, tblspc_desc, tblspc_identify, NULL, NULL, NULL, NULL)
-PG_RMGR(RM_MULTIXACT_ID, "MultiXact", multixact_redo, multixact_desc, multixact_identify, NULL, NULL, NULL, NULL)
-PG_RMGR(RM_RELMAP_ID, "RelMap", relmap_redo, relmap_desc, relmap_identify, NULL, NULL, NULL, NULL)
-PG_RMGR(RM_STANDBY_ID, "Standby", standby_redo, standby_desc, standby_identify, NULL, NULL, NULL, standby_decode)
-PG_RMGR(RM_HEAP2_ID, "Heap2", heap2_redo, heap2_desc, heap2_identify, NULL, NULL, heap_mask, heap2_decode)
-PG_RMGR(RM_HEAP_ID, "Heap", heap_redo, heap_desc, heap_identify, NULL, NULL, heap_mask, heap_decode)
-PG_RMGR(RM_BTREE_ID, "Btree", btree_redo, btree_desc, btree_identify, btree_xlog_startup, btree_xlog_cleanup, btree_mask, NULL)
-PG_RMGR(RM_HASH_ID, "Hash", hash_redo, hash_desc, hash_identify, NULL, NULL, hash_mask, NULL)
-PG_RMGR(RM_GIN_ID, "Gin", gin_redo, gin_desc, gin_identify, gin_xlog_startup, gin_xlog_cleanup, gin_mask, NULL)
-PG_RMGR(RM_GIST_ID, "Gist", gist_redo, gist_desc, gist_identify, gist_xlog_startup, gist_xlog_cleanup, gist_mask, NULL)
-PG_RMGR(RM_SEQ_ID, "Sequence", seq_redo, seq_desc, seq_identify, NULL, NULL, seq_mask, NULL)
-PG_RMGR(RM_SPGIST_ID, "SPGist", spg_redo, spg_desc, spg_identify, spg_xlog_startup, spg_xlog_cleanup, spg_mask, NULL)
-PG_RMGR(RM_BRIN_ID, "BRIN", brin_redo, brin_desc, brin_identify, NULL, NULL, brin_mask, NULL)
-PG_RMGR(RM_COMMIT_TS_ID, "CommitTs", commit_ts_redo, commit_ts_desc, commit_ts_identify, NULL, NULL, NULL, NULL)
-PG_RMGR(RM_REPLORIGIN_ID, "ReplicationOrigin", replorigin_redo, replorigin_desc, replorigin_identify, NULL, NULL, NULL, NULL)
-PG_RMGR(RM_GENERIC_ID, "Generic", generic_redo, generic_desc, generic_identify, NULL, NULL, generic_mask, NULL)
-PG_RMGR(RM_LOGICALMSG_ID, "LogicalMessage", logicalmsg_redo, logicalmsg_desc, logicalmsg_identify, NULL, NULL, NULL, logicalmsg_decode)
+PG_RMGR(RM_XLOG_ID, "XLOG", xlog_redo, xlog_desc, xlog_identify, NULL, NULL, NULL, xlog_decode, NULL)
+PG_RMGR(RM_XACT_ID, "Transaction", xact_redo, xact_desc, xact_identify, NULL, NULL, NULL, xact_decode, NULL)
+PG_RMGR(RM_SMGR_ID, "Storage", smgr_redo, smgr_desc, smgr_identify, NULL, NULL, NULL, NULL, smgr_undo)
+PG_RMGR(RM_CLOG_ID, "CLOG", clog_redo, clog_desc, clog_identify, NULL, NULL, NULL, NULL, NULL)
+PG_RMGR(RM_DBASE_ID, "Database", dbase_redo, dbase_desc, dbase_identify, NULL, NULL, NULL, NULL, NULL)
+PG_RMGR(RM_TBLSPC_ID, "Tablespace", tblspc_redo, tblspc_desc, tblspc_identify, NULL, NULL, NULL, NULL, NULL)
+PG_RMGR(RM_MULTIXACT_ID, "MultiXact", multixact_redo, multixact_desc, multixact_identify, NULL, NULL, NULL, NULL, NULL)
+PG_RMGR(RM_RELMAP_ID, "RelMap", relmap_redo, relmap_desc, relmap_identify, NULL, NULL, NULL, NULL, NULL)
+PG_RMGR(RM_STANDBY_ID, "Standby", standby_redo, standby_desc, standby_identify, NULL, NULL, NULL, standby_decode, NULL)
+PG_RMGR(RM_HEAP2_ID, "Heap2", heap2_redo, heap2_desc, heap2_identify, NULL, NULL, heap_mask, heap2_decode, NULL)
+PG_RMGR(RM_HEAP_ID, "Heap", heap_redo, heap_desc, heap_identify, NULL, NULL, heap_mask, heap_decode, NULL)
+PG_RMGR(RM_BTREE_ID, "Btree", btree_redo, btree_desc, btree_identify, btree_xlog_startup, btree_xlog_cleanup, btree_mask, NULL, NULL)
+PG_RMGR(RM_HASH_ID, "Hash", hash_redo, hash_desc, hash_identify, NULL, NULL, hash_mask, NULL, NULL)
+PG_RMGR(RM_GIN_ID, "Gin", gin_redo, gin_desc, gin_identify, gin_xlog_startup, gin_xlog_cleanup, gin_mask, NULL, NULL)
+PG_RMGR(RM_GIST_ID, "Gist", gist_redo, gist_desc, gist_identify, gist_xlog_startup, gist_xlog_cleanup, gist_mask, NULL, NULL)
+PG_RMGR(RM_SEQ_ID, "Sequence", seq_redo, seq_desc, seq_identify, NULL, NULL, seq_mask, NULL, NULL)
+PG_RMGR(RM_SPGIST_ID, "SPGist", spg_redo, spg_desc, spg_identify, spg_xlog_startup, spg_xlog_cleanup, spg_mask, NULL, NULL)
+PG_RMGR(RM_BRIN_ID, "BRIN", brin_redo, brin_desc, brin_identify, NULL, NULL, brin_mask, NULL, NULL)
+PG_RMGR(RM_COMMIT_TS_ID, "CommitTs", commit_ts_redo, commit_ts_desc, commit_ts_identify, NULL, NULL, NULL, NULL, NULL)
+PG_RMGR(RM_REPLORIGIN_ID, "ReplicationOrigin", replorigin_redo, replorigin_desc, replorigin_identify, NULL, NULL, NULL, NULL, NULL)
+PG_RMGR(RM_GENERIC_ID, "Generic", generic_redo, generic_desc, generic_identify, NULL, NULL, generic_mask, NULL, NULL)
+PG_RMGR(RM_LOGICALMSG_ID, "LogicalMessage", logicalmsg_redo, logicalmsg_desc, logicalmsg_identify, NULL, NULL, NULL, logicalmsg_decode, NULL)
diff --git a/src/include/access/simpleundolog.h b/src/include/access/simpleundolog.h
new file mode 100644
index 0000000000..3d3bd2f7e2
--- /dev/null
+++ b/src/include/access/simpleundolog.h
@@ -0,0 +1,36 @@
+#ifndef SIMPLE_UNDOLOG_H
+#define SIMPLE_UNDOLOG_H
+
+#include "access/rmgr.h"
+#include "port/pg_crc32c.h"
+
+#define SIMPLE_UNDOLOG_DIR	"pg_ulog"
+
+typedef struct SimpleUndoLogRecord
+{
+	uint32		ul_tot_len;		/* total length of entire record */
+	pg_crc32c	ul_crc;			/* CRC for this record */
+	RmgrId		ul_rmid;		/* resource manager for this record */
+	uint8		ul_info;		/* record info */
+	TransactionId ul_xid;		/* transaction id */
+	/* rmgr-specific data follow, no padding */
+} SimpleUndoLogRecord;
+
+extern void SimpleUndoLogWrite(RmgrId rmgr, uint8 info,
+							   TransactionId xid, void *data, int len);
+extern void SimpleUndoLogSetPrpared(TransactionId xid, bool prepared);
+extern void AtEOXact_SimpleUndoLog(bool isCommit, TransactionId xid);
+extern void UndoLogCleanup(void);
+
+extern void AtPrepare_UndoLog(TransactionId xid);
+extern void PostPrepare_UndoLog(void);
+extern void undolog_twophase_recover(TransactionId xid, uint16 info,
+									 void *recdata, uint32 len);
+extern void undolog_twophase_postcommit(TransactionId xid, uint16 info,
+										void *recdata, uint32 len);
+extern void undolog_twophase_postabort(TransactionId xid, uint16 info,
+									   void *recdata, uint32 len);
+extern void undolog_twophase_standby_recover(TransactionId xid, uint16 info,
+											 void *recdata, uint32 len);
+
+#endif							/* SIMPLE_UNDOLOG_H */
diff --git a/src/include/catalog/storage.h b/src/include/catalog/storage.h
index 45a3c7835c..0b39c6ef56 100644
--- a/src/include/catalog/storage.h
+++ b/src/include/catalog/storage.h
@@ -25,6 +25,8 @@ extern PGDLLIMPORT int wal_skip_threshold;
 extern SMgrRelation RelationCreateStorage(RelFileLocator rlocator,
 										  char relpersistence,
 										  bool register_delete);
+extern void RelationCreateInitFork(Relation rel);
+extern void RelationDropInitFork(Relation rel);
 extern void RelationDropStorage(Relation rel);
 extern void RelationPreserveStorage(RelFileLocator rlocator, bool atCommit);
 extern void RelationPreTruncate(Relation rel);
@@ -43,6 +45,7 @@ extern void RestorePendingSyncs(char *startAddress);
 extern void smgrDoPendingDeletes(bool isCommit);
 extern void smgrDoPendingSyncs(bool isCommit, bool isParallelWorker);
 extern int	smgrGetPendingDeletes(bool forCommit, RelFileLocator **ptr);
+extern void smgrDoPendingCleanups(bool isCommit);
 extern void AtSubCommit_smgr(void);
 extern void AtSubAbort_smgr(void);
 extern void PostPrepare_smgr(void);
diff --git a/src/include/catalog/storage_ulog.h b/src/include/catalog/storage_ulog.h
new file mode 100644
index 0000000000..8e47428e66
--- /dev/null
+++ b/src/include/catalog/storage_ulog.h
@@ -0,0 +1,35 @@
+/*-------------------------------------------------------------------------
+ *
+ * storage_ulog.h
+ *	  prototypes for Undo Log support for backend/catalog/storage.c
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/catalog/storage_ulog.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef STORAGE_ULOG_H
+#define STORAGE_ULOG_H
+
+/* ULOG gives us high 4 bits (just following xlog) */
+#define ULOG_SMGR_UNCOMMITED_STORAGE	0x10
+
+/* undo log entry for uncommitted storage files */
+typedef struct ul_uncommitted_storage
+{
+	RelFileLocator	rlocator;
+	ForkNumber		forknum;
+	bool			remove;
+} ul_uncommitted_storage;
+
+/* flags for xl_smgr_truncate */
+#define SMGR_TRUNCATE_HEAP		0x0001
+
+void smgr_undo(SimpleUndoLogRecord *record, bool crash_prepared);
+
+#define ULogRecGetData(record) ((char *)record + sizeof(SimpleUndoLogRecord))
+
+#endif							/* STORAGE_XLOG_H */
diff --git a/src/include/catalog/storage_xlog.h b/src/include/catalog/storage_xlog.h
index 6b0a7aa3df..5122f5b61d 100644
--- a/src/include/catalog/storage_xlog.h
+++ b/src/include/catalog/storage_xlog.h
@@ -29,13 +29,21 @@
 /* XLOG gives us high 4 bits */
 #define XLOG_SMGR_CREATE	0x10
 #define XLOG_SMGR_TRUNCATE	0x20
+#define XLOG_SMGR_UNLINK	0x30
 
 typedef struct xl_smgr_create
 {
 	RelFileLocator rlocator;
 	ForkNumber	forkNum;
+	TransactionId	xid;
 } xl_smgr_create;
 
+typedef struct xl_smgr_unlink
+{
+	RelFileLocator rlocator;
+	ForkNumber	forkNum;
+} xl_smgr_unlink;
+
 /* flags for xl_smgr_truncate */
 #define SMGR_TRUNCATE_HEAP		0x0001
 #define SMGR_TRUNCATE_VM		0x0002
@@ -51,6 +59,7 @@ typedef struct xl_smgr_truncate
 } xl_smgr_truncate;
 
 extern void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum);
+extern void log_smgrunlink(const RelFileLocator *rlocator, ForkNumber forkNum);
 
 extern void smgr_redo(XLogReaderState *record);
 extern void smgr_desc(StringInfo buf, XLogReaderState *record);
diff --git a/src/include/storage/reinit.h b/src/include/storage/reinit.h
index e2bbb5abe9..ccd182531d 100644
--- a/src/include/storage/reinit.h
+++ b/src/include/storage/reinit.h
@@ -16,9 +16,11 @@
 #define REINIT_H
 
 #include "common/relpath.h"
+#include "storage/relfilelocator.h"
 
 
 extern void ResetUnloggedRelations(int op);
+extern void ResetUnloggedRelationIgnore(RelFileLocator rloc);
 extern bool parse_filename_for_nontemp_relation(const char *name,
 												int *relnumchars,
 												ForkNumber *fork);
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index a9a179aaba..74194cf1e4 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -88,6 +88,7 @@ extern void smgrcloserellocator(RelFileLocatorBackend rlocator);
 extern void smgrrelease(SMgrRelation reln);
 extern void smgrreleaseall(void);
 extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
+extern void smgrunlink(SMgrRelation reln, ForkNumber forknum, bool isRedo);
 extern void smgrdosyncall(SMgrRelation *rels, int nrels);
 extern void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo);
 extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 49a33c0387..b9255e5e25 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1996,6 +1996,7 @@ PatternInfo
 PatternInfoArray
 Pattern_Prefix_Status
 Pattern_Type
+PendingCleanup
 PendingFsyncEntry
 PendingRelDelete
 PendingRelSync
@@ -2553,6 +2554,7 @@ SimplePtrListCell
 SimpleStats
 SimpleStringList
 SimpleStringListCell
+SimpleUndoLogRecord
 SingleBoundSortItem
 Size
 SkipPages
@@ -2909,6 +2911,8 @@ ULONG
 ULONG_PTR
 UV
 UVersionInfo
+UndoDescData
+UndoLogFileHeader
 UnicodeNormalizationForm
 UnicodeNormalizationQC
 Unique
@@ -3826,6 +3830,7 @@ uint8
 uint8_t
 uint8x16_t
 uintptr_t
+ul_uncommitted_storage
 unicodeStyleBorderFormat
 unicodeStyleColumnFormat
 unicodeStyleFormat
@@ -3938,6 +3943,7 @@ xl_running_xacts
 xl_seq_rec
 xl_smgr_create
 xl_smgr_truncate
+xl_smgr_unlink
 xl_standby_lock
 xl_standby_locks
 xl_tblspc_create_rec
-- 
2.39.3

