From 241a8e7610e699ddaeeb38066b347bdac5d5e535 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Sun, 2 Jan 2022 11:39:23 +1300
Subject: [PATCH v3 1/1] WIP: Move SLRU data into the regular buffer pool.

Previously, slru.c managed 7 small buffer pools for special queue-like
data.  It had very simple sizing, locking, mapping, replacement and
write back.  Switch to the regular buffer pool, to benefit from the
standard infrastructure.

The layout of SLRU files on disk remains unchanged.  File access is done
through new callback functions exposed through smgr.c.

Switching SLRU page format to "standard" format (and thus enabling
in-page LSNs and checksums) is left for a later project.  For now, add
support for raw (headerless) pages in the buffer pool.

XXX Among the problems not yet addressed:
* SMgrRelation objects cache slru_file descriptors, but there is no
  invalidation for when SLRUs wrap around so they might wrap around
* CLOG lookups are a bit slower if you have to do a buffer mapping table
  lookup first; to do better than that you need a little cache, which
  for now is single item, and could be generalised; maybe CLOG is
  actually so hot that it needs summary data that can be access
  atomically?
* CLOG LSNs are now one-per-page, which probably isn't good ("group_lsn"
  dropped by this patch)
* "external LSNs" are in a new separate array, but could perhaps be
  stuffed into BufferDescriptor
* The locking strategies for all SLRUs need to be reviewed
* Support for "extension" SLRUs (ie SLRUs owned by code in core) is
  removed -- did anyone ever acutally use that?  Code searches seem to
  indicate no
* While committing, in a critical section, the buffer pinning code
  allocates memory which isn't allowed; I commented that stuff out for
  now; perhaps we can make the resowner stuff non-allocating?
* Something is occasionally corrupting memory (locking problem, or
  bad pointer arithmetic?)

XXX Work in progress!
XXX Unfinished, experimental!
XXX Contains known bugs!

Discussion: https://postgr.es/m/CA%2BhUKGKAYze99B-jk9NoMp-2BDqAgiRC4oJv%2BbFxghNgdieq8Q%40mail.gmail.com
---
 doc/src/sgml/monitoring.sgml             |   56 +-
 src/backend/access/transam/clog.c        |  199 +--
 src/backend/access/transam/commit_ts.c   |  156 +-
 src/backend/access/transam/multixact.c   |  358 ++---
 src/backend/access/transam/slru.c        | 1734 ++++++----------------
 src/backend/access/transam/subtrans.c    |  108 +-
 src/backend/access/transam/xlog.c        |   19 +-
 src/backend/commands/async.c             |   67 +-
 src/backend/postmaster/checkpointer.c    |   18 +
 src/backend/storage/buffer/buf_init.c    |   17 +-
 src/backend/storage/buffer/bufmgr.c      |  151 +-
 src/backend/storage/ipc/ipci.c           |    4 -
 src/backend/storage/lmgr/lwlock.c        |   14 -
 src/backend/storage/lmgr/lwlocknames.txt |   10 +-
 src/backend/storage/lmgr/predicate.c     |   62 +-
 src/backend/storage/smgr/smgr.c          |   20 +-
 src/backend/storage/sync/sync.c          |   22 +-
 src/backend/utils/activity/pgstat_slru.c |   35 +-
 src/backend/utils/activity/wait_event.c  |    4 +-
 src/backend/utils/adt/pgstatfuncs.c      |    8 +-
 src/backend/utils/mmgr/mcxt.c            |   12 +-
 src/include/access/clog.h                |    6 -
 src/include/access/commit_ts.h           |    3 -
 src/include/access/multixact.h           |    3 -
 src/include/access/slru.h                |  193 +--
 src/include/access/subtrans.h            |    3 -
 src/include/pgstat.h                     |    1 -
 src/include/storage/buf_internals.h      |    6 +
 src/include/storage/bufmgr.h             |    9 +
 src/include/storage/lwlock.h             |    9 +-
 src/include/storage/smgr.h               |    8 +
 src/include/storage/sync.h               |    6 +-
 src/include/utils/pgstat_internal.h      |   27 +-
 src/include/utils/wait_event.h           |    2 +-
 34 files changed, 1022 insertions(+), 2328 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 7dbbab6f5c9..83ab5459339 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -1448,9 +1448,8 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
       <entry>Waiting for a write to a replication slot control file.</entry>
      </row>
      <row>
-      <entry><literal>SLRUFlushSync</literal></entry>
-      <entry>Waiting for SLRU data to reach durable storage during a checkpoint
-       or database shutdown.</entry>
+      <entry><literal>SLRUFlush</literal></entry>
+      <entry>Initiating writeback of SLRU data.</entry>
      </row>
      <row>
       <entry><literal>SLRURead</literal></entry>
@@ -1458,8 +1457,7 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
      </row>
      <row>
       <entry><literal>SLRUSync</literal></entry>
-      <entry>Waiting for SLRU data to reach durable storage following a page
-       write.</entry>
+      <entry>Waiting for SLRU data to reach durable storage.</entry>
      </row>
      <row>
       <entry><literal>SLRUWrite</literal></entry>
@@ -1954,14 +1952,6 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
       <entry>Waiting to read or update the last value set for a
        transaction commit timestamp.</entry>
      </row>
-     <row>
-      <entry><literal>CommitTsBuffer</literal></entry>
-      <entry>Waiting for I/O on a commit timestamp SLRU buffer.</entry>
-     </row>
-     <row>
-      <entry><literal>CommitTsSLRU</literal></entry>
-      <entry>Waiting to access the commit timestamp SLRU cache.</entry>
-     </row>
      <row>
       <entry><literal>ControlFile</literal></entry>
       <entry>Waiting to read or update the <filename>pg_control</filename>
@@ -1991,31 +1981,10 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
       <entry><literal>MultiXactGen</literal></entry>
       <entry>Waiting to read or update shared multixact state.</entry>
      </row>
-     <row>
-      <entry><literal>MultiXactMemberBuffer</literal></entry>
-      <entry>Waiting for I/O on a multixact member SLRU buffer.</entry>
-     </row>
-     <row>
-      <entry><literal>MultiXactMemberSLRU</literal></entry>
-      <entry>Waiting to access the multixact member SLRU cache.</entry>
-     </row>
-     <row>
-      <entry><literal>MultiXactOffsetBuffer</literal></entry>
-      <entry>Waiting for I/O on a multixact offset SLRU buffer.</entry>
-     </row>
-     <row>
-      <entry><literal>MultiXactOffsetSLRU</literal></entry>
-      <entry>Waiting to access the multixact offset SLRU cache.</entry>
-     </row>
      <row>
       <entry><literal>MultiXactTruncation</literal></entry>
       <entry>Waiting to read or truncate multixact information.</entry>
      </row>
-     <row>
-      <entry><literal>NotifyBuffer</literal></entry>
-      <entry>Waiting for I/O on a <command>NOTIFY</command> message SLRU
-       buffer.</entry>
-     </row>
      <row>
       <entry><literal>NotifyQueue</literal></entry>
       <entry>Waiting to read or update <command>NOTIFY</command> messages.</entry>
@@ -2025,11 +1994,6 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
       <entry>Waiting to update limit on <command>NOTIFY</command> message
        storage.</entry>
      </row>
-     <row>
-      <entry><literal>NotifySLRU</literal></entry>
-      <entry>Waiting to access the <command>NOTIFY</command> message SLRU
-       cache.</entry>
-     </row>
      <row>
       <entry><literal>OidGen</literal></entry>
       <entry>Waiting to allocate a new OID.</entry>
@@ -2114,11 +2078,6 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
       <entry><literal>ReplicationSlotIO</literal></entry>
       <entry>Waiting for I/O on a replication slot.</entry>
      </row>
-     <row>
-      <entry><literal>SerialBuffer</literal></entry>
-      <entry>Waiting for I/O on a serializable transaction conflict SLRU
-       buffer.</entry>
-     </row>
      <row>
       <entry><literal>SerializableFinishedList</literal></entry>
       <entry>Waiting to access the list of finished serializable
@@ -2146,11 +2105,6 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
       <entry>Waiting to read or update information about serializable
        transactions.</entry>
      </row>
-     <row>
-      <entry><literal>SerialSLRU</literal></entry>
-      <entry>Waiting to access the serializable transaction conflict SLRU
-       cache.</entry>
-     </row>
      <row>
       <entry><literal>SharedTidBitmap</literal></entry>
       <entry>Waiting to access a shared TID bitmap during a parallel bitmap
@@ -2218,10 +2172,6 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
       <entry>Waiting to update limits on transaction id and multixact
        consumption.</entry>
      </row>
-     <row>
-      <entry><literal>XactBuffer</literal></entry>
-      <entry>Waiting for I/O on a transaction status SLRU buffer.</entry>
-     </row>
      <row>
       <entry><literal>XactSLRU</literal></entry>
       <entry>Waiting to access the transaction status SLRU cache.</entry>
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 3d9088a7048..cbe537ae72a 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -41,6 +41,8 @@
 #include "miscadmin.h"
 #include "pg_trace.h"
 #include "pgstat.h"
+#include "storage/bufmgr.h"
+#include "storage/buf_internals.h"
 #include "storage/proc.h"
 #include "storage/sync.h"
 
@@ -81,15 +83,8 @@
  */
 #define THRESHOLD_SUBTRANS_CLOG_OPT	5
 
-/*
- * Link to shared-memory data structures for CLOG control
- */
-static SlruCtlData XactCtlData;
-
-#define XactCtl (&XactCtlData)
-
 
-static int	ZeroCLOGPage(int pageno, bool writeXlog);
+static Buffer ZeroCLOGPage(int pageno, bool writeXlog);
 static bool CLOGPagePrecedes(int page1, int page2);
 static void WriteZeroPageXlogRec(int pageno);
 static void WriteTruncateXlogRec(int pageno, TransactionId oldestXact,
@@ -99,7 +94,7 @@ static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
 									   XLogRecPtr lsn, int pageno,
 									   bool all_xact_same_page);
 static void TransactionIdSetStatusBit(TransactionId xid, XidStatus status,
-									  XLogRecPtr lsn, int slotno);
+									  XLogRecPtr lsn, Buffer buffer);
 static void set_status_by_pages(int nsubxids, TransactionId *subxids,
 								XidStatus status, XLogRecPtr lsn);
 static bool TransactionGroupUpdateXidStatus(TransactionId xid,
@@ -341,13 +336,12 @@ TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids,
 								   TransactionId *subxids, XidStatus status,
 								   XLogRecPtr lsn, int pageno)
 {
-	int			slotno;
+	Buffer		buffer;
 	int			i;
 
 	Assert(status == TRANSACTION_STATUS_COMMITTED ||
 		   status == TRANSACTION_STATUS_ABORTED ||
 		   (status == TRANSACTION_STATUS_SUB_COMMITTED && !TransactionIdIsValid(xid)));
-	Assert(LWLockHeldByMeInMode(XactSLRULock, LW_EXCLUSIVE));
 
 	/*
 	 * If we're doing an async commit (ie, lsn is valid), then we must wait
@@ -358,7 +352,8 @@ TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids,
 	 * write-busy, since we don't care if the update reaches disk sooner than
 	 * we think.
 	 */
-	slotno = SimpleLruReadPage(XactCtl, pageno, XLogRecPtrIsInvalid(lsn), xid);
+	buffer = ReadSlruBuffer(SLRU_CLOG_REL_ID, pageno);
+	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 
 	/*
 	 * Set the main transaction id, if any.
@@ -376,25 +371,26 @@ TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids,
 		{
 			for (i = 0; i < nsubxids; i++)
 			{
-				Assert(XactCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
+				Assert(pageno == TransactionIdToPage(subxids[i]));
 				TransactionIdSetStatusBit(subxids[i],
 										  TRANSACTION_STATUS_SUB_COMMITTED,
-										  lsn, slotno);
+										  lsn, buffer);
 			}
 		}
 
 		/* ... then the main transaction */
-		TransactionIdSetStatusBit(xid, status, lsn, slotno);
+		TransactionIdSetStatusBit(xid, status, lsn, buffer);
 	}
 
 	/* Set the subtransactions */
 	for (i = 0; i < nsubxids; i++)
 	{
-		Assert(XactCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
-		TransactionIdSetStatusBit(subxids[i], status, lsn, slotno);
+		Assert(pageno == TransactionIdToPage(subxids[i]));
+		TransactionIdSetStatusBit(subxids[i], status, lsn, buffer);
 	}
 
-	XactCtl->shared->page_dirty[slotno] = true;
+	MarkBufferDirty(buffer);
+	UnlockReleaseBuffer(buffer);
 }
 
 /*
@@ -568,7 +564,7 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status,
  * Must be called with XactSLRULock held
  */
 static void
-TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, int slotno)
+TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, Buffer buffer)
 {
 	int			byteno = TransactionIdToByte(xid);
 	int			bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
@@ -576,7 +572,10 @@ TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, i
 	char		byteval;
 	char		curval;
 
-	byteptr = XactCtl->shared->page_buffer[slotno] + byteno;
+	Assert(LWLockHeldByMeInMode(BufferDescriptorGetContentLock(GetBufferDescriptor(buffer - 1)),
+								LW_EXCLUSIVE));
+
+	byteptr = BufferGetPage(buffer) + byteno;
 	curval = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
 
 	/*
@@ -605,7 +604,7 @@ TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, i
 	*byteptr = byteval;
 
 	/*
-	 * Update the group LSN if the transaction completion LSN is higher.
+	 * Update the buffer LSN if the transaction completion LSN is higher.
 	 *
 	 * Note: lsn will be invalid when supplied during InRecovery processing,
 	 * so we don't need to do anything special to avoid LSN updates during
@@ -614,10 +613,8 @@ TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, i
 	 */
 	if (!XLogRecPtrIsInvalid(lsn))
 	{
-		int			lsnindex = GetLSNIndex(slotno, xid);
-
-		if (XactCtl->shared->group_lsn[lsnindex] < lsn)
-			XactCtl->shared->group_lsn[lsnindex] = lsn;
+		if (BufferGetExternalLSN(GetBufferDescriptor(buffer)) < lsn)
+			BufferSetExternalLSN(GetBufferDescriptor(buffer), lsn);
 	}
 }
 
@@ -642,67 +639,22 @@ TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
 	int			pageno = TransactionIdToPage(xid);
 	int			byteno = TransactionIdToByte(xid);
 	int			bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
-	int			slotno;
-	int			lsnindex;
 	char	   *byteptr;
 	XidStatus	status;
+	Buffer		buffer;
 
-	/* lock is acquired by SimpleLruReadPage_ReadOnly */
-
-	slotno = SimpleLruReadPage_ReadOnly(XactCtl, pageno, xid);
-	byteptr = XactCtl->shared->page_buffer[slotno] + byteno;
+	buffer = ReadSlruBuffer(SLRU_CLOG_REL_ID, pageno);
+	byteptr = BufferGetPage(buffer) + byteno;
 
 	status = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
 
-	lsnindex = GetLSNIndex(slotno, xid);
-	*lsn = XactCtl->shared->group_lsn[lsnindex];
+	*lsn = BufferGetExternalLSN(GetBufferDescriptor(buffer));
 
-	LWLockRelease(XactSLRULock);
+	ReleaseBuffer(buffer);
 
 	return status;
 }
 
-/*
- * Number of shared CLOG buffers.
- *
- * On larger multi-processor systems, it is possible to have many CLOG page
- * requests in flight at one time which could lead to disk access for CLOG
- * page if the required page is not found in memory.  Testing revealed that we
- * can get the best performance by having 128 CLOG buffers, more than that it
- * doesn't improve performance.
- *
- * Unconditionally keeping the number of CLOG buffers to 128 did not seem like
- * a good idea, because it would increase the minimum amount of shared memory
- * required to start, which could be a problem for people running very small
- * configurations.  The following formula seems to represent a reasonable
- * compromise: people with very low values for shared_buffers will get fewer
- * CLOG buffers as well, and everyone else will get 128.
- */
-Size
-CLOGShmemBuffers(void)
-{
-	return Min(128, Max(4, NBuffers / 512));
-}
-
-/*
- * Initialization of shared memory for CLOG
- */
-Size
-CLOGShmemSize(void)
-{
-	return SimpleLruShmemSize(CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE);
-}
-
-void
-CLOGShmemInit(void)
-{
-	XactCtl->PagePrecedes = CLOGPagePrecedes;
-	SimpleLruInit(XactCtl, "Xact", CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE,
-				  XactSLRULock, "pg_xact", LWTRANCHE_XACT_BUFFER,
-				  SYNC_HANDLER_CLOG);
-	SlruPagePrecedesUnitTests(XactCtl, CLOG_XACTS_PER_PAGE);
-}
-
 /*
  * This func must be called ONCE on system install.  It creates
  * the initial CLOG segment.  (The CLOG directory is assumed to
@@ -712,18 +664,15 @@ CLOGShmemInit(void)
 void
 BootStrapCLOG(void)
 {
-	int			slotno;
-
-	LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
+	Buffer		buffer;
 
 	/* Create and zero the first page of the commit log */
-	slotno = ZeroCLOGPage(0, false);
+	buffer = ZeroCLOGPage(0, false);
 
 	/* Make sure it's written out */
-	SimpleLruWritePage(XactCtl, slotno);
-	Assert(!XactCtl->shared->page_dirty[slotno]);
+	FlushOneBuffer(buffer);
 
-	LWLockRelease(XactSLRULock);
+	UnlockReleaseBuffer(buffer);
 }
 
 /*
@@ -735,17 +684,18 @@ BootStrapCLOG(void)
  *
  * Control lock must be held at entry, and will be held at exit.
  */
-static int
+static Buffer
 ZeroCLOGPage(int pageno, bool writeXlog)
 {
-	int			slotno;
+	Buffer		buffer;
 
-	slotno = SimpleLruZeroPage(XactCtl, pageno);
+	buffer = ZeroSlruBuffer(SLRU_CLOG_REL_ID, pageno);
+	MarkBufferDirty(buffer);
 
 	if (writeXlog)
 		WriteZeroPageXlogRec(pageno);
 
-	return slotno;
+	return buffer;
 }
 
 /*
@@ -755,17 +705,6 @@ ZeroCLOGPage(int pageno, bool writeXlog)
 void
 StartupCLOG(void)
 {
-	TransactionId xid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
-	int			pageno = TransactionIdToPage(xid);
-
-	LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
-
-	/*
-	 * Initialize our idea of the latest page number.
-	 */
-	XactCtl->shared->latest_page_number = pageno;
-
-	LWLockRelease(XactSLRULock);
 }
 
 /*
@@ -777,8 +716,6 @@ TrimCLOG(void)
 	TransactionId xid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
 	int			pageno = TransactionIdToPage(xid);
 
-	LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
-
 	/*
 	 * Zero out the remainder of the current clog page.  Under normal
 	 * circumstances it should be zeroes already, but it seems at least
@@ -795,40 +732,24 @@ TrimCLOG(void)
 	{
 		int			byteno = TransactionIdToByte(xid);
 		int			bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
-		int			slotno;
 		char	   *byteptr;
+		Buffer		buffer;
 
-		slotno = SimpleLruReadPage(XactCtl, pageno, false, xid);
-		byteptr = XactCtl->shared->page_buffer[slotno] + byteno;
+		buffer = ReadSlruBuffer(SLRU_CLOG_REL_ID, pageno);
+		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+		byteptr = BufferGetPage(buffer) + byteno;
 
 		/* Zero so-far-unused positions in the current byte */
 		*byteptr &= (1 << bshift) - 1;
 		/* Zero the rest of the page */
 		MemSet(byteptr + 1, 0, BLCKSZ - byteno - 1);
 
-		XactCtl->shared->page_dirty[slotno] = true;
-	}
+		MarkBufferDirty(buffer);
 
-	LWLockRelease(XactSLRULock);
-}
-
-/*
- * Perform a checkpoint --- either during shutdown, or on-the-fly
- */
-void
-CheckPointCLOG(void)
-{
-	/*
-	 * Write dirty CLOG pages to disk.  This may result in sync requests
-	 * queued for later handling by ProcessSyncRequests(), as part of the
-	 * checkpoint.
-	 */
-	TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(true);
-	SimpleLruWriteAll(XactCtl, true);
-	TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(true);
+		UnlockReleaseBuffer(buffer);
+	}
 }
 
-
 /*
  * Make sure that CLOG has room for a newly-allocated XID.
  *
@@ -852,12 +773,8 @@ ExtendCLOG(TransactionId newestXact)
 
 	pageno = TransactionIdToPage(newestXact);
 
-	LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
-
 	/* Zero the page and make an XLOG entry about it */
-	ZeroCLOGPage(pageno, true);
-
-	LWLockRelease(XactSLRULock);
+	UnlockReleaseBuffer(ZeroCLOGPage(pageno, true));
 }
 
 
@@ -888,7 +805,8 @@ TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid)
 	cutoffPage = TransactionIdToPage(oldestXact);
 
 	/* Check to see if there's any files that could be removed */
-	if (!SlruScanDirectory(XactCtl, SlruScanDirCbReportPresence, &cutoffPage))
+	if (!SlruScanDirectory(SLRU_CLOG_REL_ID, CLOGPagePrecedes,
+						   SlruScanDirCbReportPresence, &cutoffPage))
 		return;					/* nothing to remove */
 
 	/*
@@ -909,7 +827,7 @@ TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid)
 	WriteTruncateXlogRec(cutoffPage, oldestXact, oldestxid_datoid);
 
 	/* Now we can remove the old CLOG segment(s) */
-	SimpleLruTruncate(XactCtl, cutoffPage);
+	SimpleLruTruncate(SLRU_CLOG_REL_ID, CLOGPagePrecedes, cutoffPage);
 }
 
 
@@ -994,17 +912,13 @@ clog_redo(XLogReaderState *record)
 	if (info == CLOG_ZEROPAGE)
 	{
 		int			pageno;
-		int			slotno;
+		Buffer		buffer;
 
 		memcpy(&pageno, XLogRecGetData(record), sizeof(int));
 
-		LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
-
-		slotno = ZeroCLOGPage(pageno, false);
-		SimpleLruWritePage(XactCtl, slotno);
-		Assert(!XactCtl->shared->page_dirty[slotno]);
-
-		LWLockRelease(XactSLRULock);
+		buffer = ZeroCLOGPage(pageno, false);
+		FlushOneBuffer(buffer);
+		UnlockReleaseBuffer(buffer);
 	}
 	else if (info == CLOG_TRUNCATE)
 	{
@@ -1014,17 +928,8 @@ clog_redo(XLogReaderState *record)
 
 		AdvanceOldestClogXid(xlrec.oldestXact);
 
-		SimpleLruTruncate(XactCtl, xlrec.pageno);
+		SimpleLruTruncate(SLRU_CLOG_REL_ID, CLOGPagePrecedes, xlrec.pageno);
 	}
 	else
 		elog(PANIC, "clog_redo: unknown op code %u", info);
 }
-
-/*
- * Entrypoint for sync.c to sync clog files.
- */
-int
-clogsyncfiletag(const FileTag *ftag, char *path)
-{
-	return SlruSyncFileTag(XactCtl, ftag, path);
-}
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
index 4dc8d402bd3..1eac761b393 100644
--- a/src/backend/access/transam/commit_ts.c
+++ b/src/backend/access/transam/commit_ts.c
@@ -73,13 +73,6 @@ typedef struct CommitTimestampEntry
 #define TransactionIdToCTsEntry(xid)	\
 	((xid) % (TransactionId) COMMIT_TS_XACTS_PER_PAGE)
 
-/*
- * Link to shared-memory data structures for CommitTs control
- */
-static SlruCtlData CommitTsCtlData;
-
-#define CommitTsCtl (&CommitTsCtlData)
-
 /*
  * We keep a cache of the last value set in shared memory.
  *
@@ -110,7 +103,7 @@ static void SetXidCommitTsInPage(TransactionId xid, int nsubxids,
 static void TransactionIdSetCommitTs(TransactionId xid, TimestampTz ts,
 									 RepOriginId nodeid, int slotno);
 static void error_commit_ts_disabled(void);
-static int	ZeroCommitTsPage(int pageno, bool writeXlog);
+static Buffer ZeroCommitTsPage(int pageno, bool writeXlog);
 static bool CommitTsPagePrecedes(int page1, int page2);
 static void ActivateCommitTs(void);
 static void DeactivateCommitTs(void);
@@ -219,30 +212,27 @@ SetXidCommitTsInPage(TransactionId xid, int nsubxids,
 					 TransactionId *subxids, TimestampTz ts,
 					 RepOriginId nodeid, int pageno)
 {
-	int			slotno;
 	int			i;
+	Buffer		buffer;
 
-	LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE);
-
-	slotno = SimpleLruReadPage(CommitTsCtl, pageno, true, xid);
+	buffer = ReadSlruBuffer(SLRU_COMMITTS_REL_ID, pageno);
+	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 
-	TransactionIdSetCommitTs(xid, ts, nodeid, slotno);
+	TransactionIdSetCommitTs(xid, ts, nodeid, buffer);
 	for (i = 0; i < nsubxids; i++)
-		TransactionIdSetCommitTs(subxids[i], ts, nodeid, slotno);
+		TransactionIdSetCommitTs(subxids[i], ts, nodeid, buffer);
 
-	CommitTsCtl->shared->page_dirty[slotno] = true;
+	MarkBufferDirty(buffer);
 
-	LWLockRelease(CommitTsSLRULock);
+	UnlockReleaseBuffer(buffer);
 }
 
 /*
  * Sets the commit timestamp of a single transaction.
- *
- * Must be called with CommitTsSLRULock held
  */
 static void
 TransactionIdSetCommitTs(TransactionId xid, TimestampTz ts,
-						 RepOriginId nodeid, int slotno)
+						 RepOriginId nodeid, Buffer buffer)
 {
 	int			entryno = TransactionIdToCTsEntry(xid);
 	CommitTimestampEntry entry;
@@ -252,8 +242,7 @@ TransactionIdSetCommitTs(TransactionId xid, TimestampTz ts,
 	entry.time = ts;
 	entry.nodeid = nodeid;
 
-	memcpy(CommitTsCtl->shared->page_buffer[slotno] +
-		   SizeOfCommitTimestampEntry * entryno,
+	memcpy(BufferGetPage(buffer) + SizeOfCommitTimestampEntry * entryno,
 		   &entry, SizeOfCommitTimestampEntry);
 }
 
@@ -271,10 +260,10 @@ TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts,
 {
 	int			pageno = TransactionIdToCTsPage(xid);
 	int			entryno = TransactionIdToCTsEntry(xid);
-	int			slotno;
 	CommitTimestampEntry entry;
 	TransactionId oldestCommitTsXid;
 	TransactionId newestCommitTsXid;
+	Buffer		buffer;
 
 	if (!TransactionIdIsValid(xid))
 		ereport(ERROR,
@@ -328,10 +317,11 @@ TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts,
 		return false;
 	}
 
-	/* lock is acquired by SimpleLruReadPage_ReadOnly */
-	slotno = SimpleLruReadPage_ReadOnly(CommitTsCtl, pageno, xid);
+	buffer = ReadSlruBuffer(SLRU_COMMITTS_REL_ID, pageno);
+	LockBuffer(buffer, BUFFER_LOCK_SHARE);
+
 	memcpy(&entry,
-		   CommitTsCtl->shared->page_buffer[slotno] +
+		   BufferGetPage(buffer) +
 		   SizeOfCommitTimestampEntry * entryno,
 		   SizeOfCommitTimestampEntry);
 
@@ -339,7 +329,7 @@ TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts,
 	if (nodeid)
 		*nodeid = entry.nodeid;
 
-	LWLockRelease(CommitTsSLRULock);
+	UnlockReleaseBuffer(buffer);
 	return *ts != 0;
 }
 
@@ -508,27 +498,13 @@ pg_xact_commit_timestamp_origin(PG_FUNCTION_ARGS)
 	PG_RETURN_DATUM(HeapTupleGetDatum(htup));
 }
 
-/*
- * Number of shared CommitTS buffers.
- *
- * We use a very similar logic as for the number of CLOG buffers (except we
- * scale up twice as fast with shared buffers, and the maximum is twice as
- * high); see comments in CLOGShmemBuffers.
- */
-Size
-CommitTsShmemBuffers(void)
-{
-	return Min(256, Max(4, NBuffers / 256));
-}
-
 /*
  * Shared memory sizing for CommitTs
  */
 Size
 CommitTsShmemSize(void)
 {
-	return SimpleLruShmemSize(CommitTsShmemBuffers(), 0) +
-		sizeof(CommitTimestampShared);
+	return sizeof(CommitTimestampShared);
 }
 
 /*
@@ -540,12 +516,7 @@ CommitTsShmemInit(void)
 {
 	bool		found;
 
-	CommitTsCtl->PagePrecedes = CommitTsPagePrecedes;
-	SimpleLruInit(CommitTsCtl, "CommitTs", CommitTsShmemBuffers(), 0,
-				  CommitTsSLRULock, "pg_commit_ts",
-				  LWTRANCHE_COMMITTS_BUFFER,
-				  SYNC_HANDLER_COMMIT_TS);
-	SlruPagePrecedesUnitTests(CommitTsCtl, COMMIT_TS_XACTS_PER_PAGE);
+	SlruPagePrecedesUnitTests(CommitTsPagePrecedes, COMMIT_TS_XACTS_PER_PAGE);
 
 	commitTsShared = ShmemInitStruct("CommitTs shared",
 									 sizeof(CommitTimestampShared),
@@ -589,17 +560,18 @@ BootStrapCommitTs(void)
  *
  * Control lock must be held at entry, and will be held at exit.
  */
-static int
+static Buffer
 ZeroCommitTsPage(int pageno, bool writeXlog)
 {
-	int			slotno;
+	Buffer		buffer;
 
-	slotno = SimpleLruZeroPage(CommitTsCtl, pageno);
+	buffer = ZeroSlruBuffer(SLRU_COMMITTS_REL_ID, pageno);
+	MarkBufferDirty(buffer);
 
 	if (writeXlog)
 		WriteZeroPageXlogRec(pageno);
 
-	return slotno;
+	return buffer;
 }
 
 /*
@@ -697,13 +669,6 @@ ActivateCommitTs(void)
 	xid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
 	pageno = TransactionIdToCTsPage(xid);
 
-	/*
-	 * Re-Initialize our idea of the latest page number.
-	 */
-	LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE);
-	CommitTsCtl->shared->latest_page_number = pageno;
-	LWLockRelease(CommitTsSLRULock);
-
 	/*
 	 * If CommitTs is enabled, but it wasn't in the previous server run, we
 	 * need to set the oldest and newest values to the next Xid; that way, we
@@ -726,15 +691,14 @@ ActivateCommitTs(void)
 	LWLockRelease(CommitTsLock);
 
 	/* Create the current segment file, if necessary */
-	if (!SimpleLruDoesPhysicalPageExist(CommitTsCtl, pageno))
+	if (!SimpleLruDoesPhysicalPageExist(SLRU_COMMITTS_REL_ID, pageno))
 	{
-		int			slotno;
+		Buffer		buffer;
 
-		LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE);
-		slotno = ZeroCommitTsPage(pageno, false);
-		SimpleLruWritePage(CommitTsCtl, slotno);
-		Assert(!CommitTsCtl->shared->page_dirty[slotno]);
-		LWLockRelease(CommitTsSLRULock);
+		buffer = ZeroSlruBuffer(SLRU_COMMITTS_REL_ID, pageno);
+		MarkBufferDirty(buffer);
+		FlushOneBuffer(buffer);
+		UnlockReleaseBuffer(buffer);
 	}
 
 	/* Change the activation status in shared memory. */
@@ -783,23 +747,9 @@ DeactivateCommitTs(void)
 	 * be overwritten anyway when we wrap around, but it seems better to be
 	 * tidy.)
 	 */
-	LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE);
-	(void) SlruScanDirectory(CommitTsCtl, SlruScanDirCbDeleteAll, NULL);
-	LWLockRelease(CommitTsSLRULock);
-}
-
-/*
- * Perform a checkpoint --- either during shutdown, or on-the-fly
- */
-void
-CheckPointCommitTs(void)
-{
-	/*
-	 * Write dirty CommitTs pages to disk.  This may result in sync requests
-	 * queued for later handling by ProcessSyncRequests(), as part of the
-	 * checkpoint.
-	 */
-	SimpleLruWriteAll(CommitTsCtl, true);
+	(void) SlruScanDirectory(SLRU_COMMITTS_REL_ID,
+							 CommitTsPagePrecedes,
+							 SlruScanDirCbDeleteAll, NULL);
 }
 
 /*
@@ -837,12 +787,8 @@ ExtendCommitTs(TransactionId newestXact)
 
 	pageno = TransactionIdToCTsPage(newestXact);
 
-	LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE);
-
 	/* Zero the page and make an XLOG entry about it */
-	ZeroCommitTsPage(pageno, !InRecovery);
-
-	LWLockRelease(CommitTsSLRULock);
+	UnlockReleaseBuffer(ZeroCommitTsPage(pageno, !InRecovery));
 }
 
 /*
@@ -863,7 +809,9 @@ TruncateCommitTs(TransactionId oldestXact)
 	cutoffPage = TransactionIdToCTsPage(oldestXact);
 
 	/* Check to see if there's any files that could be removed */
-	if (!SlruScanDirectory(CommitTsCtl, SlruScanDirCbReportPresence,
+	if (!SlruScanDirectory(SLRU_COMMITTS_REL_ID,
+						   CommitTsPagePrecedes,
+						   SlruScanDirCbReportPresence,
 						   &cutoffPage))
 		return;					/* nothing to remove */
 
@@ -871,7 +819,7 @@ TruncateCommitTs(TransactionId oldestXact)
 	WriteTruncateXlogRec(cutoffPage, oldestXact);
 
 	/* Now we can remove the old CommitTs segment(s) */
-	SimpleLruTruncate(CommitTsCtl, cutoffPage);
+	SimpleLruTruncate(SLRU_COMMITTS_REL_ID, CommitTsPagePrecedes, cutoffPage);
 }
 
 /*
@@ -995,17 +943,14 @@ commit_ts_redo(XLogReaderState *record)
 	if (info == COMMIT_TS_ZEROPAGE)
 	{
 		int			pageno;
-		int			slotno;
+		Buffer		buffer;
 
 		memcpy(&pageno, XLogRecGetData(record), sizeof(int));
 
-		LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE);
-
-		slotno = ZeroCommitTsPage(pageno, false);
-		SimpleLruWritePage(CommitTsCtl, slotno);
-		Assert(!CommitTsCtl->shared->page_dirty[slotno]);
-
-		LWLockRelease(CommitTsSLRULock);
+		buffer = ZeroSlruBuffer(SLRU_COMMITTS_REL_ID, pageno);
+		MarkBufferDirty(buffer);
+		FlushOneBuffer(buffer);
+		UnlockReleaseBuffer(buffer);
 	}
 	else if (info == COMMIT_TS_TRUNCATE)
 	{
@@ -1013,23 +958,8 @@ commit_ts_redo(XLogReaderState *record)
 
 		AdvanceOldestCommitTsXid(trunc->oldestXid);
 
-		/*
-		 * During XLOG replay, latest_page_number isn't set up yet; insert a
-		 * suitable value to bypass the sanity test in SimpleLruTruncate.
-		 */
-		CommitTsCtl->shared->latest_page_number = trunc->pageno;
-
-		SimpleLruTruncate(CommitTsCtl, trunc->pageno);
+		SimpleLruTruncate(SLRU_COMMITTS_REL_ID, CommitTsPagePrecedes, trunc->pageno);
 	}
 	else
 		elog(PANIC, "commit_ts_redo: unknown op code %u", info);
 }
-
-/*
- * Entrypoint for sync.c to sync commit_ts files.
- */
-int
-committssyncfiletag(const FileTag *ftag, char *path)
-{
-	return SlruSyncFileTag(CommitTsCtl, ftag, path);
-}
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 8f7d12950e5..b35e73fc1f6 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -181,15 +181,6 @@
 #define PreviousMultiXactId(xid) \
 	((xid) == FirstMultiXactId ? MaxMultiXactId : (xid) - 1)
 
-/*
- * Links to shared-memory data structures for MultiXact control
- */
-static SlruCtlData MultiXactOffsetCtlData;
-static SlruCtlData MultiXactMemberCtlData;
-
-#define MultiXactOffsetCtl	(&MultiXactOffsetCtlData)
-#define MultiXactMemberCtl	(&MultiXactMemberCtlData)
-
 /*
  * MultiXact state shared across all backends.  All this state is protected
  * by MultiXactGenLock.  (We also use MultiXactOffsetSLRULock and
@@ -353,10 +344,9 @@ static void mXactCachePut(MultiXactId multi, int nmembers,
 static char *mxstatus_to_string(MultiXactStatus status);
 
 /* management of SLRU infrastructure */
-static int	ZeroMultiXactOffsetPage(int pageno, bool writeXlog);
-static int	ZeroMultiXactMemberPage(int pageno, bool writeXlog);
+static Buffer ZeroMultiXactOffsetPage(int pageno, bool writeXlog);
+static Buffer ZeroMultiXactMemberPage(int pageno, bool writeXlog);
 static bool MultiXactOffsetPagePrecedes(int page1, int page2);
-static bool MultiXactMemberPagePrecedes(int page1, int page2);
 static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
 									MultiXactOffset offset2);
 static void ExtendMultiXactOffset(MultiXactId multi);
@@ -865,34 +855,25 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
 	int			pageno;
 	int			prev_pageno;
 	int			entryno;
-	int			slotno;
 	MultiXactOffset *offptr;
 	int			i;
-
-	LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
+	Buffer		buffer;
 
 	pageno = MultiXactIdToOffsetPage(multi);
 	entryno = MultiXactIdToOffsetEntry(multi);
 
-	/*
-	 * Note: we pass the MultiXactId to SimpleLruReadPage as the "transaction"
-	 * to complain about if there's any I/O error.  This is kinda bogus, but
-	 * since the errors will always give the full pathname, it should be clear
-	 * enough that a MultiXactId is really involved.  Perhaps someday we'll
-	 * take the trouble to generalize the slru.c error reporting code.
-	 */
-	slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
-	offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
+	/* XXX set up error context? */
+	buffer = ReadSlruBuffer(SLRU_MULTIXACT_OFFSET_REL_ID, pageno);
+	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+	offptr = (MultiXactOffset *) BufferGetPage(buffer);
 	offptr += entryno;
 
 	*offptr = offset;
 
-	MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
-
-	/* Exchange our lock */
-	LWLockRelease(MultiXactOffsetSLRULock);
+	MarkBufferDirty(buffer);
 
-	LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE);
+	UnlockReleaseBuffer(buffer);
+	buffer = InvalidBuffer;
 
 	prev_pageno = -1;
 
@@ -914,27 +895,28 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
 
 		if (pageno != prev_pageno)
 		{
-			slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, multi);
+			if (BufferIsValid(buffer))
+				UnlockReleaseBuffer(buffer);
+			buffer = ReadSlruBuffer(SLRU_MULTIXACT_MEMBER_REL_ID, pageno);
+			LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 			prev_pageno = pageno;
 		}
 
-		memberptr = (TransactionId *)
-			(MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
+		memberptr = (TransactionId *) (BufferGetPage(buffer) + memberoff);
 
 		*memberptr = members[i].xid;
 
-		flagsptr = (uint32 *)
-			(MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
+		flagsptr = (uint32 *) (BufferGetPage(buffer) + flagsoff);
 
 		flagsval = *flagsptr;
 		flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift);
 		flagsval |= (members[i].status << bshift);
 		*flagsptr = flagsval;
 
-		MultiXactMemberCtl->shared->page_dirty[slotno] = true;
+		MarkBufferDirty(buffer);
 	}
 
-	LWLockRelease(MultiXactMemberSLRULock);
+	UnlockReleaseBuffer(buffer);
 }
 
 /*
@@ -1226,7 +1208,6 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
 	int			pageno;
 	int			prev_pageno;
 	int			entryno;
-	int			slotno;
 	MultiXactOffset *offptr;
 	MultiXactOffset offset;
 	int			length;
@@ -1237,6 +1218,7 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
 	MultiXactId tmpMXact;
 	MultiXactOffset nextOffset;
 	MultiXactMember *ptr;
+	Buffer		buffer;
 
 	debug_elog3(DEBUG2, "GetMembers: asked for %u", multi);
 
@@ -1340,13 +1322,12 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
 	 * time on every multixact creation.
 	 */
 retry:
-	LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
-
 	pageno = MultiXactIdToOffsetPage(multi);
 	entryno = MultiXactIdToOffsetEntry(multi);
 
-	slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
-	offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
+	buffer = ReadSlruBuffer(SLRU_MULTIXACT_OFFSET_REL_ID, pageno);
+	LockBuffer(buffer, BUFFER_LOCK_SHARE);
+	offptr = (MultiXactOffset *) BufferGetPage(buffer);
 	offptr += entryno;
 	offset = *offptr;
 
@@ -1377,16 +1358,20 @@ retry:
 		entryno = MultiXactIdToOffsetEntry(tmpMXact);
 
 		if (pageno != prev_pageno)
-			slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, tmpMXact);
+		{
+			UnlockReleaseBuffer(buffer);
+			buffer = ReadSlruBuffer(SLRU_MULTIXACT_OFFSET_REL_ID, pageno);
+			LockBuffer(buffer, BUFFER_LOCK_SHARE);
+		}
 
-		offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
+		offptr = (MultiXactOffset *) BufferGetPage(buffer);
 		offptr += entryno;
 		nextMXOffset = *offptr;
 
 		if (nextMXOffset == 0)
 		{
 			/* Corner case 2: next multixact is still being filled in */
-			LWLockRelease(MultiXactOffsetSLRULock);
+			UnlockReleaseBuffer(buffer);
 			CHECK_FOR_INTERRUPTS();
 			pg_usleep(1000L);
 			goto retry;
@@ -1394,14 +1379,11 @@ retry:
 
 		length = nextMXOffset - offset;
 	}
-
-	LWLockRelease(MultiXactOffsetSLRULock);
+	UnlockReleaseBuffer(buffer);
+	buffer = InvalidBuffer;
 
 	ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
 
-	/* Now get the members themselves. */
-	LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE);
-
 	truelength = 0;
 	prev_pageno = -1;
 	for (i = 0; i < length; i++, offset++)
@@ -1417,12 +1399,14 @@ retry:
 
 		if (pageno != prev_pageno)
 		{
-			slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, multi);
+			if (BufferIsValid(buffer))
+				UnlockReleaseBuffer(buffer);
+			buffer = ReadSlruBuffer(SLRU_MULTIXACT_MEMBER_REL_ID, pageno);
+			LockBuffer(buffer, BUFFER_LOCK_SHARE);
 			prev_pageno = pageno;
 		}
 
-		xactptr = (TransactionId *)
-			(MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
+		xactptr = (TransactionId *) (BufferGetPage(buffer) + memberoff);
 
 		if (!TransactionIdIsValid(*xactptr))
 		{
@@ -1433,14 +1417,13 @@ retry:
 
 		flagsoff = MXOffsetToFlagsOffset(offset);
 		bshift = MXOffsetToFlagsBitShift(offset);
-		flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
+		flagsptr = (uint32 *) (BufferGetPage(buffer) + flagsoff);
 
 		ptr[truelength].xid = *xactptr;
 		ptr[truelength].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
 		truelength++;
 	}
-
-	LWLockRelease(MultiXactMemberSLRULock);
+	UnlockReleaseBuffer(buffer);
 
 	/* A multixid with zero members should not happen */
 	Assert(truelength > 0);
@@ -1834,8 +1817,6 @@ MultiXactShmemSize(void)
 			 mul_size(sizeof(MultiXactId) * 2, MaxOldestSlot))
 
 	size = SHARED_MULTIXACT_STATE_SIZE;
-	size = add_size(size, SimpleLruShmemSize(NUM_MULTIXACTOFFSET_BUFFERS, 0));
-	size = add_size(size, SimpleLruShmemSize(NUM_MULTIXACTMEMBER_BUFFERS, 0));
 
 	return size;
 }
@@ -1847,22 +1828,6 @@ MultiXactShmemInit(void)
 
 	debug_elog2(DEBUG2, "Shared Memory Init for MultiXact");
 
-	MultiXactOffsetCtl->PagePrecedes = MultiXactOffsetPagePrecedes;
-	MultiXactMemberCtl->PagePrecedes = MultiXactMemberPagePrecedes;
-
-	SimpleLruInit(MultiXactOffsetCtl,
-				  "MultiXactOffset", NUM_MULTIXACTOFFSET_BUFFERS, 0,
-				  MultiXactOffsetSLRULock, "pg_multixact/offsets",
-				  LWTRANCHE_MULTIXACTOFFSET_BUFFER,
-				  SYNC_HANDLER_MULTIXACT_OFFSET);
-	SlruPagePrecedesUnitTests(MultiXactOffsetCtl, MULTIXACT_OFFSETS_PER_PAGE);
-	SimpleLruInit(MultiXactMemberCtl,
-				  "MultiXactMember", NUM_MULTIXACTMEMBER_BUFFERS, 0,
-				  MultiXactMemberSLRULock, "pg_multixact/members",
-				  LWTRANCHE_MULTIXACTMEMBER_BUFFER,
-				  SYNC_HANDLER_MULTIXACT_MEMBER);
-	/* doesn't call SimpleLruTruncate() or meet criteria for unit tests */
-
 	/* Initialize our shared state struct */
 	MultiXactState = ShmemInitStruct("Shared MultiXact State",
 									 SHARED_MULTIXACT_STATE_SIZE,
@@ -1893,29 +1858,17 @@ MultiXactShmemInit(void)
 void
 BootStrapMultiXact(void)
 {
-	int			slotno;
-
-	LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
+	Buffer		buffer;
 
 	/* Create and zero the first page of the offsets log */
-	slotno = ZeroMultiXactOffsetPage(0, false);
-
-	/* Make sure it's written out */
-	SimpleLruWritePage(MultiXactOffsetCtl, slotno);
-	Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
-
-	LWLockRelease(MultiXactOffsetSLRULock);
-
-	LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE);
+	buffer = ZeroMultiXactOffsetPage(0, false);
+	FlushOneBuffer(buffer);
+	UnlockReleaseBuffer(buffer);
 
 	/* Create and zero the first page of the members log */
-	slotno = ZeroMultiXactMemberPage(0, false);
-
-	/* Make sure it's written out */
-	SimpleLruWritePage(MultiXactMemberCtl, slotno);
-	Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
-
-	LWLockRelease(MultiXactMemberSLRULock);
+	buffer = ZeroMultiXactMemberPage(0, false);
+	FlushOneBuffer(buffer);
+	UnlockReleaseBuffer(buffer);
 }
 
 /*
@@ -1927,33 +1880,35 @@ BootStrapMultiXact(void)
  *
  * Control lock must be held at entry, and will be held at exit.
  */
-static int
+static Buffer
 ZeroMultiXactOffsetPage(int pageno, bool writeXlog)
 {
-	int			slotno;
+	Buffer		buffer;
 
-	slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
+	buffer = ZeroSlruBuffer(SLRU_MULTIXACT_OFFSET_REL_ID, pageno);
+	MarkBufferDirty(buffer);
 
 	if (writeXlog)
 		WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_OFF_PAGE);
 
-	return slotno;
+	return buffer;
 }
 
 /*
  * Ditto, for MultiXactMember
  */
-static int
+static Buffer
 ZeroMultiXactMemberPage(int pageno, bool writeXlog)
 {
-	int			slotno;
+	Buffer		buffer;
 
-	slotno = SimpleLruZeroPage(MultiXactMemberCtl, pageno);
+	buffer = ZeroSlruBuffer(SLRU_MULTIXACT_MEMBER_REL_ID, pageno);
+	MarkBufferDirty(buffer);
 
 	if (writeXlog)
 		WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_MEM_PAGE);
 
-	return slotno;
+	return buffer;
 }
 
 /*
@@ -1978,22 +1933,14 @@ MaybeExtendOffsetSlru(void)
 
 	pageno = MultiXactIdToOffsetPage(MultiXactState->nextMXact);
 
-	LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
-
-	if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno))
+	if (!SimpleLruDoesPhysicalPageExist(SLRU_MULTIXACT_OFFSET_REL_ID, pageno))
 	{
-		int			slotno;
+		Buffer			buffer;
 
-		/*
-		 * Fortunately for us, SimpleLruWritePage is already prepared to deal
-		 * with creating a new segment file even if the page we're writing is
-		 * not the first in it, so this is enough.
-		 */
-		slotno = ZeroMultiXactOffsetPage(pageno, false);
-		SimpleLruWritePage(MultiXactOffsetCtl, slotno);
+		buffer = ZeroMultiXactOffsetPage(pageno, false);
+		FlushOneBuffer(buffer);
+		UnlockReleaseBuffer(buffer);
 	}
-
-	LWLockRelease(MultiXactOffsetSLRULock);
 }
 
 /*
@@ -2007,21 +1954,6 @@ MaybeExtendOffsetSlru(void)
 void
 StartupMultiXact(void)
 {
-	MultiXactId multi = MultiXactState->nextMXact;
-	MultiXactOffset offset = MultiXactState->nextOffset;
-	int			pageno;
-
-	/*
-	 * Initialize offset's idea of the latest page number.
-	 */
-	pageno = MultiXactIdToOffsetPage(multi);
-	MultiXactOffsetCtl->shared->latest_page_number = pageno;
-
-	/*
-	 * Initialize member's idea of the latest page number.
-	 */
-	pageno = MXOffsetToMemberPage(offset);
-	MultiXactMemberCtl->shared->latest_page_number = pageno;
 }
 
 /*
@@ -2045,14 +1977,7 @@ TrimMultiXact(void)
 	oldestMXactDB = MultiXactState->oldestMultiXactDB;
 	LWLockRelease(MultiXactGenLock);
 
-	/* Clean up offsets state */
-	LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
-
-	/*
-	 * (Re-)Initialize our idea of the latest page number for offsets.
-	 */
-	pageno = MultiXactIdToOffsetPage(nextMXact);
-	MultiXactOffsetCtl->shared->latest_page_number = pageno;
+	pageno = MXOffsetToMemberPage(offset);
 
 	/*
 	 * Zero out the remainder of the current offsets page.  See notes in
@@ -2065,29 +1990,20 @@ TrimMultiXact(void)
 	entryno = MultiXactIdToOffsetEntry(nextMXact);
 	if (entryno != 0)
 	{
-		int			slotno;
 		MultiXactOffset *offptr;
+		Buffer		buffer;
 
-		slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, nextMXact);
-		offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
+		buffer = ReadSlruBuffer(SLRU_MULTIXACT_OFFSET_REL_ID, pageno);
+		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+		offptr = (MultiXactOffset *) BufferGetPage(buffer);
 		offptr += entryno;
 
 		MemSet(offptr, 0, BLCKSZ - (entryno * sizeof(MultiXactOffset)));
 
-		MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
+		MarkBufferDirty(buffer);
+		UnlockReleaseBuffer(buffer);
 	}
 
-	LWLockRelease(MultiXactOffsetSLRULock);
-
-	/* And the same for members */
-	LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE);
-
-	/*
-	 * (Re-)Initialize our idea of the latest page number for members.
-	 */
-	pageno = MXOffsetToMemberPage(offset);
-	MultiXactMemberCtl->shared->latest_page_number = pageno;
-
 	/*
 	 * Zero out the remainder of the current members page.  See notes in
 	 * TrimCLOG() for motivation.
@@ -2095,14 +2011,14 @@ TrimMultiXact(void)
 	flagsoff = MXOffsetToFlagsOffset(offset);
 	if (flagsoff != 0)
 	{
-		int			slotno;
 		TransactionId *xidptr;
 		int			memberoff;
+		Buffer		buffer;
 
 		memberoff = MXOffsetToMemberOffset(offset);
-		slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, offset);
-		xidptr = (TransactionId *)
-			(MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
+		buffer = ReadSlruBuffer(SLRU_MULTIXACT_MEMBER_REL_ID, pageno);
+		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+		xidptr = (TransactionId *) (BufferGetPage(buffer) + memberoff);
 
 		MemSet(xidptr, 0, BLCKSZ - memberoff);
 
@@ -2112,11 +2028,10 @@ TrimMultiXact(void)
 		 * writing.
 		 */
 
-		MultiXactMemberCtl->shared->page_dirty[slotno] = true;
+		MarkBufferDirty(buffer);
+		UnlockReleaseBuffer(buffer);
 	}
 
-	LWLockRelease(MultiXactMemberSLRULock);
-
 	/* signal that we're officially up */
 	LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
 	MultiXactState->finishedStartup = true;
@@ -2148,25 +2063,6 @@ MultiXactGetCheckptMulti(bool is_shutdown,
 				*nextMulti, *nextMultiOffset, *oldestMulti, *oldestMultiDB);
 }
 
-/*
- * Perform a checkpoint --- either during shutdown, or on-the-fly
- */
-void
-CheckPointMultiXact(void)
-{
-	TRACE_POSTGRESQL_MULTIXACT_CHECKPOINT_START(true);
-
-	/*
-	 * Write dirty MultiXact pages to disk.  This may result in sync requests
-	 * queued for later handling by ProcessSyncRequests(), as part of the
-	 * checkpoint.
-	 */
-	SimpleLruWriteAll(MultiXactOffsetCtl, true);
-	SimpleLruWriteAll(MultiXactMemberCtl, true);
-
-	TRACE_POSTGRESQL_MULTIXACT_CHECKPOINT_DONE(true);
-}
-
 /*
  * Set the next-to-be-assigned MultiXactId and offset
  *
@@ -2415,12 +2311,8 @@ ExtendMultiXactOffset(MultiXactId multi)
 
 	pageno = MultiXactIdToOffsetPage(multi);
 
-	LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
-
 	/* Zero the page and make an XLOG entry about it */
-	ZeroMultiXactOffsetPage(pageno, true);
-
-	LWLockRelease(MultiXactOffsetSLRULock);
+	UnlockReleaseBuffer(ZeroMultiXactOffsetPage(pageno, true));
 }
 
 /*
@@ -2456,12 +2348,8 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
 
 			pageno = MXOffsetToMemberPage(offset);
 
-			LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE);
-
 			/* Zero the page and make an XLOG entry about it */
-			ZeroMultiXactMemberPage(pageno, true);
-
-			LWLockRelease(MultiXactMemberSLRULock);
+			UnlockReleaseBuffer(ZeroMultiXactMemberPage(pageno, true));
 		}
 
 		/*
@@ -2736,8 +2624,9 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result)
 	MultiXactOffset offset;
 	int			pageno;
 	int			entryno;
-	int			slotno;
 	MultiXactOffset *offptr;
+	Buffer		buffer;
+	RelFileLocator rlocator = SlruRelFileLocator(SLRU_MULTIXACT_OFFSET_REL_ID);
 
 	Assert(MultiXactState->finishedStartup);
 
@@ -2745,20 +2634,19 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result)
 	entryno = MultiXactIdToOffsetEntry(multi);
 
 	/*
-	 * Write out dirty data, so PhysicalPageExists can work correctly.
+	 * Cope with missing/bogus oldest MultiXact in inconsistent states (see
+	 * commit 068cfadf9).
 	 */
-	SimpleLruWriteAll(MultiXactOffsetCtl, true);
-	SimpleLruWriteAll(MultiXactMemberCtl, true);
-
-	if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno))
+	if (!BufferProbe(rlocator, MAIN_FORKNUM, pageno) &&
+		!SimpleLruDoesPhysicalPageExist(SLRU_MULTIXACT_OFFSET_REL_ID, pageno))
 		return false;
 
-	/* lock is acquired by SimpleLruReadPage_ReadOnly */
-	slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, multi);
-	offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
+	buffer = ReadSlruBuffer(SLRU_MULTIXACT_OFFSET_REL_ID, pageno);
+	LockBuffer(buffer, BUFFER_LOCK_SHARE);
+	offptr = (MultiXactOffset *) BufferGetPage(buffer);
 	offptr += entryno;
 	offset = *offptr;
-	LWLockRelease(MultiXactOffsetSLRULock);
+	UnlockReleaseBuffer(buffer);
 
 	*result = offset;
 	return true;
@@ -2865,12 +2753,13 @@ typedef struct mxtruncinfo
  *		This callback determines the earliest existing page number.
  */
 static bool
-SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int segpage, void *data)
+SlruScanDirCbFindEarliest(Oid rel_id, SlruPagePrecedesFunction PagePrecedes,
+						  char *filename, int segpage, void *data)
 {
 	mxtruncinfo *trunc = (mxtruncinfo *) data;
 
 	if (trunc->earliestExistingPage == -1 ||
-		ctl->PagePrecedes(segpage, trunc->earliestExistingPage))
+		PagePrecedes(segpage, trunc->earliestExistingPage))
 	{
 		trunc->earliestExistingPage = segpage;
 	}
@@ -2902,7 +2791,7 @@ PerformMembersTruncation(MultiXactOffset oldestOffset, MultiXactOffset newOldest
 	while (segment != endsegment)
 	{
 		elog(DEBUG2, "truncating multixact members segment %x", segment);
-		SlruDeleteSegment(MultiXactMemberCtl, segment);
+		SlruDeleteSegment(SLRU_MULTIXACT_MEMBER_REL_ID, segment);
 
 		/* move to next segment, handling wraparound correctly */
 		if (segment == maxsegment)
@@ -2925,7 +2814,8 @@ PerformOffsetsTruncation(MultiXactId oldestMulti, MultiXactId newOldestMulti)
 	 * didn't subtract one, we'd trigger SimpleLruTruncate's wraparound
 	 * detection.
 	 */
-	SimpleLruTruncate(MultiXactOffsetCtl,
+	SimpleLruTruncate(SLRU_MULTIXACT_OFFSET_REL_ID,
+					  MultiXactOffsetPagePrecedes,
 					  MultiXactIdToOffsetPage(PreviousMultiXactId(newOldestMulti)));
 }
 
@@ -2999,7 +2889,9 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
 	 * been truncated away, and we crashed before updating oldestMulti.
 	 */
 	trunc.earliestExistingPage = -1;
-	SlruScanDirectory(MultiXactOffsetCtl, SlruScanDirCbFindEarliest, &trunc);
+	SlruScanDirectory(SLRU_MULTIXACT_OFFSET_REL_ID,
+					  MultiXactOffsetPagePrecedes,
+					  SlruScanDirCbFindEarliest, &trunc);
 	earliest = trunc.earliestExistingPage * MULTIXACT_OFFSETS_PER_PAGE;
 	if (earliest < FirstMultiXactId)
 		earliest = FirstMultiXactId;
@@ -3131,24 +3023,6 @@ MultiXactOffsetPagePrecedes(int page1, int page2)
 								multi2 + MULTIXACT_OFFSETS_PER_PAGE - 1));
 }
 
-/*
- * Decide whether a MultiXactMember page number is "older" for truncation
- * purposes.  There is no "invalid offset number" so use the numbers verbatim.
- */
-static bool
-MultiXactMemberPagePrecedes(int page1, int page2)
-{
-	MultiXactOffset offset1;
-	MultiXactOffset offset2;
-
-	offset1 = ((MultiXactOffset) page1) * MULTIXACT_MEMBERS_PER_PAGE;
-	offset2 = ((MultiXactOffset) page2) * MULTIXACT_MEMBERS_PER_PAGE;
-
-	return (MultiXactOffsetPrecedes(offset1, offset2) &&
-			MultiXactOffsetPrecedes(offset1,
-									offset2 + MULTIXACT_MEMBERS_PER_PAGE - 1));
-}
-
 /*
  * Decide which of two MultiXactIds is earlier.
  *
@@ -3243,32 +3117,18 @@ multixact_redo(XLogReaderState *record)
 	if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
 	{
 		int			pageno;
-		int			slotno;
 
 		memcpy(&pageno, XLogRecGetData(record), sizeof(int));
 
-		LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
-
-		slotno = ZeroMultiXactOffsetPage(pageno, false);
-		SimpleLruWritePage(MultiXactOffsetCtl, slotno);
-		Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
-
-		LWLockRelease(MultiXactOffsetSLRULock);
+		UnlockReleaseBuffer(ZeroMultiXactOffsetPage(pageno, false));
 	}
 	else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
 	{
 		int			pageno;
-		int			slotno;
 
 		memcpy(&pageno, XLogRecGetData(record), sizeof(int));
 
-		LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE);
-
-		slotno = ZeroMultiXactMemberPage(pageno, false);
-		SimpleLruWritePage(MultiXactMemberCtl, slotno);
-		Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
-
-		LWLockRelease(MultiXactMemberSLRULock);
+		UnlockReleaseBuffer(ZeroMultiXactMemberPage(pageno, false));
 	}
 	else if (info == XLOG_MULTIXACT_CREATE_ID)
 	{
@@ -3302,7 +3162,6 @@ multixact_redo(XLogReaderState *record)
 	else if (info == XLOG_MULTIXACT_TRUNCATE_ID)
 	{
 		xl_multixact_truncate xlrec;
-		int			pageno;
 
 		memcpy(&xlrec, XLogRecGetData(record),
 			   SizeOfMultiXactTruncate);
@@ -3328,13 +3187,6 @@ multixact_redo(XLogReaderState *record)
 
 		PerformMembersTruncation(xlrec.startTruncMemb, xlrec.endTruncMemb);
 
-		/*
-		 * During XLOG replay, latest_page_number isn't necessarily set up
-		 * yet; insert a suitable value to bypass the sanity test in
-		 * SimpleLruTruncate.
-		 */
-		pageno = MultiXactIdToOffsetPage(xlrec.endTruncOff);
-		MultiXactOffsetCtl->shared->latest_page_number = pageno;
 		PerformOffsetsTruncation(xlrec.startTruncOff, xlrec.endTruncOff);
 
 		LWLockRelease(MultiXactTruncationLock);
@@ -3407,21 +3259,3 @@ pg_get_multixact_members(PG_FUNCTION_ARGS)
 
 	SRF_RETURN_DONE(funccxt);
 }
-
-/*
- * Entrypoint for sync.c to sync offsets files.
- */
-int
-multixactoffsetssyncfiletag(const FileTag *ftag, char *path)
-{
-	return SlruSyncFileTag(MultiXactOffsetCtl, ftag, path);
-}
-
-/*
- * Entrypoint for sync.c to sync members files.
- */
-int
-multixactmemberssyncfiletag(const FileTag *ftag, char *path)
-{
-	return SlruSyncFileTag(MultiXactMemberCtl, ftag, path);
-}
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index b65cb49d7ff..12eeca67e27 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -1,41 +1,9 @@
 /*-------------------------------------------------------------------------
  *
  * slru.c
- *		Simple LRU buffering for transaction status logfiles
+ *		Simple buffering for transaction status logfiles
  *
- * We use a simple least-recently-used scheme to manage a pool of page
- * buffers.  Under ordinary circumstances we expect that write
- * traffic will occur mostly to the latest page (and to the just-prior
- * page, soon after a page transition).  Read traffic will probably touch
- * a larger span of pages, but in any case a fairly small number of page
- * buffers should be sufficient.  So, we just search the buffers using plain
- * linear search; there's no need for a hashtable or anything fancy.
- * The management algorithm is straight LRU except that we will never swap
- * out the latest page (since we know it's going to be hit again eventually).
- *
- * We use a control LWLock to protect the shared data structures, plus
- * per-buffer LWLocks that synchronize I/O for each buffer.  The control lock
- * must be held to examine or modify any shared state.  A process that is
- * reading in or writing out a page buffer does not hold the control lock,
- * only the per-buffer lock for the buffer it is working on.
- *
- * "Holding the control lock" means exclusive lock in all cases except for
- * SimpleLruReadPage_ReadOnly(); see comments for SlruRecentlyUsed() for
- * the implications of that.
- *
- * When initiating I/O on a buffer, we acquire the per-buffer lock exclusively
- * before releasing the control lock.  The per-buffer lock is released after
- * completing the I/O, re-acquiring the control lock, and updating the shared
- * state.  (Deadlock is not possible here, because we never try to initiate
- * I/O when someone else is already doing I/O on the same buffer.)
- * To wait for I/O to complete, release the control lock, acquire the
- * per-buffer lock in shared mode, immediately release the per-buffer lock,
- * reacquire the control lock, and then recheck state (since arbitrary things
- * could have happened while we didn't have the lock).
- *
- * As with the regular buffer manager, it is possible for another process
- * to re-dirty a page that is currently being written out.  This is handled
- * by re-setting the page's page_dirty flag.
+ * XXX write me
  *
  *
  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
@@ -60,561 +28,81 @@
 #include "storage/fd.h"
 #include "storage/shmem.h"
 
-#define SlruFileName(ctl, path, seg) \
-	snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
+#define SlruFileName(rel_id, path, seg)							\
+	snprintf(path, MAXPGPATH, "%s/%04X", defs[(rel_id)].path, seg)
+
+struct SlruDef {
+	const char *name;
+	const char *path;
+	bool synchronize;
+};
+
+static const struct SlruDef defs[] = {
+	[SLRU_CLOG_REL_ID] = {
+		.name = "Xact",
+		.path = "pg_xact",
+		.synchronize = true,
+	},
+	[SLRU_SUBTRANS_REL_ID] = {
+		.name = "Subtrans",
+		.path = "pg_subtrans",
+	},
+	[SLRU_MULTIXACT_OFFSET_REL_ID] = {
+		.name = "MultiXactOffset",
+		.path = "pg_multixact/offsets",
+		.synchronize = true,
+	},
+	[SLRU_MULTIXACT_MEMBER_REL_ID] = {
+		.name = "MultiXactMember",
+		.path = "pg_multixact/members",
+		.synchronize = true,
+	},
+	[SLRU_COMMITTS_REL_ID] = {
+		.name = "CommitTs",
+		.path = "pg_commit_ts",
+		.synchronize = true,
+	},
+	[SLRU_SERIAL_REL_ID] = {
+		.name = "Serial",
+		.path = "pg_serial",
+	},
+	[SLRU_NOTIFY_REL_ID] = {
+		.name = "Notify",
+		.path = "pg_notify",
+	},
+};
 
 /*
- * During SimpleLruWriteAll(), we will usually not need to write more than one
- * or two physical files, but we may need to write several pages per file.  We
- * can consolidate the I/O requests by leaving files open until control returns
- * to SimpleLruWriteAll().  This data structure remembers which files are open.
+ * We'll maintain a little cache of recently seen buffers, to try to avoid the
+ * buffer mapping table on repeat access (ie the busy end of the CLOG).  One
+ * entry per SLRU relation.
  */
-#define MAX_WRITEALL_BUFFERS	16
+struct SlruRecentBuffer {
+	int			pageno;
+	Buffer		recent_buffer;
+};
 
-typedef struct SlruWriteAllData
-{
-	int			num_files;		/* # files actually open */
-	int			fd[MAX_WRITEALL_BUFFERS];	/* their FD's */
-	int			segno[MAX_WRITEALL_BUFFERS];	/* their log seg#s */
-} SlruWriteAllData;
-
-typedef struct SlruWriteAllData *SlruWriteAll;
+static struct SlruRecentBuffer slru_recent_buffers[lengthof(defs)];
 
 /*
- * Populate a file tag describing a segment file.  We only use the segment
- * number, since we can derive everything else we need by having separate
- * sync handler functions for clog, multixact etc.
+ * Populate a file tag identifying an SLRU segment file.
  */
-#define INIT_SLRUFILETAG(a,xx_handler,xx_segno) \
+#define INIT_SLRUFILETAG(a,xx_rel_number,xx_segno) \
 ( \
 	memset(&(a), 0, sizeof(FileTag)), \
-	(a).handler = (xx_handler), \
+	(a).handler = SYNC_HANDLER_SLRU, \
+	(a).rlocator = SlruRelFileLocator(xx_rel_number), \
 	(a).segno = (xx_segno) \
 )
 
-/*
- * Macro to mark a buffer slot "most recently used".  Note multiple evaluation
- * of arguments!
- *
- * The reason for the if-test is that there are often many consecutive
- * accesses to the same page (particularly the latest page).  By suppressing
- * useless increments of cur_lru_count, we reduce the probability that old
- * pages' counts will "wrap around" and make them appear recently used.
- *
- * We allow this code to be executed concurrently by multiple processes within
- * SimpleLruReadPage_ReadOnly().  As long as int reads and writes are atomic,
- * this should not cause any completely-bogus values to enter the computation.
- * However, it is possible for either cur_lru_count or individual
- * page_lru_count entries to be "reset" to lower values than they should have,
- * in case a process is delayed while it executes this macro.  With care in
- * SlruSelectLRUPage(), this does little harm, and in any case the absolute
- * worst possible consequence is a nonoptimal choice of page to evict.  The
- * gain from allowing concurrent reads of SLRU pages seems worth it.
- */
-#define SlruRecentlyUsed(shared, slotno)	\
-	do { \
-		int		new_lru_count = (shared)->cur_lru_count; \
-		if (new_lru_count != (shared)->page_lru_count[slotno]) { \
-			(shared)->cur_lru_count = ++new_lru_count; \
-			(shared)->page_lru_count[slotno] = new_lru_count; \
-		} \
-	} while (0)
-
-/* Saved info for SlruReportIOError */
-typedef enum
-{
-	SLRU_OPEN_FAILED,
-	SLRU_SEEK_FAILED,
-	SLRU_READ_FAILED,
-	SLRU_WRITE_FAILED,
-	SLRU_FSYNC_FAILED,
-	SLRU_CLOSE_FAILED
-} SlruErrorCause;
-
-static SlruErrorCause slru_errcause;
-static int	slru_errno;
-
-
-static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno);
-static void SimpleLruWaitIO(SlruCtl ctl, int slotno);
-static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata);
-static bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno);
-static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno,
-								  SlruWriteAll fdata);
-static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid);
-static int	SlruSelectLRUPage(SlruCtl ctl, int pageno);
-
-static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename,
+static bool SlruScanDirCbDeleteCutoff(Oid rel_id,
+									  SlruPagePrecedesFunction PagePrecedes,
+									  char *filename,
 									  int segpage, void *data);
-static void SlruInternalDeleteSegment(SlruCtl ctl, int segno);
-
-/*
- * Initialization of shared memory
- */
-
-Size
-SimpleLruShmemSize(int nslots, int nlsns)
-{
-	Size		sz;
-
-	/* we assume nslots isn't so large as to risk overflow */
-	sz = MAXALIGN(sizeof(SlruSharedData));
-	sz += MAXALIGN(nslots * sizeof(char *));	/* page_buffer[] */
-	sz += MAXALIGN(nslots * sizeof(SlruPageStatus));	/* page_status[] */
-	sz += MAXALIGN(nslots * sizeof(bool));	/* page_dirty[] */
-	sz += MAXALIGN(nslots * sizeof(int));	/* page_number[] */
-	sz += MAXALIGN(nslots * sizeof(int));	/* page_lru_count[] */
-	sz += MAXALIGN(nslots * sizeof(LWLockPadded));	/* buffer_locks[] */
-
-	if (nlsns > 0)
-		sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));	/* group_lsn[] */
-
-	return BUFFERALIGN(sz) + BLCKSZ * nslots;
-}
-
-/*
- * Initialize, or attach to, a simple LRU cache in shared memory.
- *
- * ctl: address of local (unshared) control structure.
- * name: name of SLRU.  (This is user-visible, pick with care!)
- * nslots: number of page slots to use.
- * nlsns: number of LSN groups per page (set to zero if not relevant).
- * ctllock: LWLock to use to control access to the shared control structure.
- * subdir: PGDATA-relative subdirectory that will contain the files.
- * tranche_id: LWLock tranche ID to use for the SLRU's per-buffer LWLocks.
- */
-void
-SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
-			  LWLock *ctllock, const char *subdir, int tranche_id,
-			  SyncRequestHandler sync_handler)
-{
-	SlruShared	shared;
-	bool		found;
-
-	shared = (SlruShared) ShmemInitStruct(name,
-										  SimpleLruShmemSize(nslots, nlsns),
-										  &found);
-
-	if (!IsUnderPostmaster)
-	{
-		/* Initialize locks and shared memory area */
-		char	   *ptr;
-		Size		offset;
-		int			slotno;
-
-		Assert(!found);
-
-		memset(shared, 0, sizeof(SlruSharedData));
-
-		shared->ControlLock = ctllock;
-
-		shared->num_slots = nslots;
-		shared->lsn_groups_per_page = nlsns;
-
-		shared->cur_lru_count = 0;
-
-		/* shared->latest_page_number will be set later */
-
-		shared->slru_stats_idx = pgstat_get_slru_index(name);
-
-		ptr = (char *) shared;
-		offset = MAXALIGN(sizeof(SlruSharedData));
-		shared->page_buffer = (char **) (ptr + offset);
-		offset += MAXALIGN(nslots * sizeof(char *));
-		shared->page_status = (SlruPageStatus *) (ptr + offset);
-		offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
-		shared->page_dirty = (bool *) (ptr + offset);
-		offset += MAXALIGN(nslots * sizeof(bool));
-		shared->page_number = (int *) (ptr + offset);
-		offset += MAXALIGN(nslots * sizeof(int));
-		shared->page_lru_count = (int *) (ptr + offset);
-		offset += MAXALIGN(nslots * sizeof(int));
-
-		/* Initialize LWLocks */
-		shared->buffer_locks = (LWLockPadded *) (ptr + offset);
-		offset += MAXALIGN(nslots * sizeof(LWLockPadded));
-
-		if (nlsns > 0)
-		{
-			shared->group_lsn = (XLogRecPtr *) (ptr + offset);
-			offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
-		}
-
-		ptr += BUFFERALIGN(offset);
-		for (slotno = 0; slotno < nslots; slotno++)
-		{
-			LWLockInitialize(&shared->buffer_locks[slotno].lock,
-							 tranche_id);
-
-			shared->page_buffer[slotno] = ptr;
-			shared->page_status[slotno] = SLRU_PAGE_EMPTY;
-			shared->page_dirty[slotno] = false;
-			shared->page_lru_count[slotno] = 0;
-			ptr += BLCKSZ;
-		}
-
-		/* Should fit to estimated shmem size */
-		Assert(ptr - (char *) shared <= SimpleLruShmemSize(nslots, nlsns));
-	}
-	else
-		Assert(found);
-
-	/*
-	 * Initialize the unshared control struct, including directory path. We
-	 * assume caller set PagePrecedes.
-	 */
-	ctl->shared = shared;
-	ctl->sync_handler = sync_handler;
-	strlcpy(ctl->Dir, subdir, sizeof(ctl->Dir));
-}
-
-/*
- * Initialize (or reinitialize) a page to zeroes.
- *
- * The page is not actually written, just set up in shared memory.
- * The slot number of the new page is returned.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-int
-SimpleLruZeroPage(SlruCtl ctl, int pageno)
-{
-	SlruShared	shared = ctl->shared;
-	int			slotno;
+static void SlruInternalDeleteSegment(Oid rel_id, int segno);
 
-	/* Find a suitable buffer slot for the page */
-	slotno = SlruSelectLRUPage(ctl, pageno);
-	Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
-		   (shared->page_status[slotno] == SLRU_PAGE_VALID &&
-			!shared->page_dirty[slotno]) ||
-		   shared->page_number[slotno] == pageno);
-
-	/* Mark the slot as containing this page */
-	shared->page_number[slotno] = pageno;
-	shared->page_status[slotno] = SLRU_PAGE_VALID;
-	shared->page_dirty[slotno] = true;
-	SlruRecentlyUsed(shared, slotno);
-
-	/* Set the buffer to zeroes */
-	MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
-
-	/* Set the LSNs for this new page to zero */
-	SimpleLruZeroLSNs(ctl, slotno);
-
-	/* Assume this page is now the latest active page */
-	shared->latest_page_number = pageno;
-
-	/* update the stats counter of zeroed pages */
-	pgstat_count_slru_page_zeroed(shared->slru_stats_idx);
-
-	return slotno;
-}
-
-/*
- * Zero all the LSNs we store for this slru page.
- *
- * This should be called each time we create a new page, and each time we read
- * in a page from disk into an existing buffer.  (Such an old page cannot
- * have any interesting LSNs, since we'd have flushed them before writing
- * the page in the first place.)
- *
- * This assumes that InvalidXLogRecPtr is bitwise-all-0.
- */
-static void
-SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
-{
-	SlruShared	shared = ctl->shared;
-
-	if (shared->lsn_groups_per_page > 0)
-		MemSet(&shared->group_lsn[slotno * shared->lsn_groups_per_page], 0,
-			   shared->lsn_groups_per_page * sizeof(XLogRecPtr));
-}
-
-/*
- * Wait for any active I/O on a page slot to finish.  (This does not
- * guarantee that new I/O hasn't been started before we return, though.
- * In fact the slot might not even contain the same page anymore.)
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static void
-SimpleLruWaitIO(SlruCtl ctl, int slotno)
-{
-	SlruShared	shared = ctl->shared;
-
-	/* See notes at top of file */
-	LWLockRelease(shared->ControlLock);
-	LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED);
-	LWLockRelease(&shared->buffer_locks[slotno].lock);
-	LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
-
-	/*
-	 * If the slot is still in an io-in-progress state, then either someone
-	 * already started a new I/O on the slot, or a previous I/O failed and
-	 * neglected to reset the page state.  That shouldn't happen, really, but
-	 * it seems worth a few extra cycles to check and recover from it. We can
-	 * cheaply test for failure by seeing if the buffer lock is still held (we
-	 * assume that transaction abort would release the lock).
-	 */
-	if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
-		shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS)
-	{
-		if (LWLockConditionalAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED))
-		{
-			/* indeed, the I/O must have failed */
-			if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS)
-				shared->page_status[slotno] = SLRU_PAGE_EMPTY;
-			else				/* write_in_progress */
-			{
-				shared->page_status[slotno] = SLRU_PAGE_VALID;
-				shared->page_dirty[slotno] = true;
-			}
-			LWLockRelease(&shared->buffer_locks[slotno].lock);
-		}
-	}
-}
-
-/*
- * Find a page in a shared buffer, reading it in if necessary.
- * The page number must correspond to an already-initialized page.
- *
- * If write_ok is true then it is OK to return a page that is in
- * WRITE_IN_PROGRESS state; it is the caller's responsibility to be sure
- * that modification of the page is safe.  If write_ok is false then we
- * will not return the page until it is not undergoing active I/O.
- *
- * The passed-in xid is used only for error reporting, and may be
- * InvalidTransactionId if no specific xid is associated with the action.
- *
- * Return value is the shared-buffer slot number now holding the page.
- * The buffer's LRU access info is updated.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-int
-SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
-				  TransactionId xid)
-{
-	SlruShared	shared = ctl->shared;
-
-	/* Outer loop handles restart if we must wait for someone else's I/O */
-	for (;;)
-	{
-		int			slotno;
-		bool		ok;
-
-		/* See if page already is in memory; if not, pick victim slot */
-		slotno = SlruSelectLRUPage(ctl, pageno);
-
-		/* Did we find the page in memory? */
-		if (shared->page_number[slotno] == pageno &&
-			shared->page_status[slotno] != SLRU_PAGE_EMPTY)
-		{
-			/*
-			 * If page is still being read in, we must wait for I/O.  Likewise
-			 * if the page is being written and the caller said that's not OK.
-			 */
-			if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
-				(shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
-				 !write_ok))
-			{
-				SimpleLruWaitIO(ctl, slotno);
-				/* Now we must recheck state from the top */
-				continue;
-			}
-			/* Otherwise, it's ready to use */
-			SlruRecentlyUsed(shared, slotno);
-
-			/* update the stats counter of pages found in the SLRU */
-			pgstat_count_slru_page_hit(shared->slru_stats_idx);
-
-			return slotno;
-		}
-
-		/* We found no match; assert we selected a freeable slot */
-		Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
-			   (shared->page_status[slotno] == SLRU_PAGE_VALID &&
-				!shared->page_dirty[slotno]));
-
-		/* Mark the slot read-busy */
-		shared->page_number[slotno] = pageno;
-		shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS;
-		shared->page_dirty[slotno] = false;
-
-		/* Acquire per-buffer lock (cannot deadlock, see notes at top) */
-		LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
-
-		/* Release control lock while doing I/O */
-		LWLockRelease(shared->ControlLock);
-
-		/* Do the read */
-		ok = SlruPhysicalReadPage(ctl, pageno, slotno);
-
-		/* Set the LSNs for this newly read-in page to zero */
-		SimpleLruZeroLSNs(ctl, slotno);
-
-		/* Re-acquire control lock and update page state */
-		LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
-
-		Assert(shared->page_number[slotno] == pageno &&
-			   shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS &&
-			   !shared->page_dirty[slotno]);
-
-		shared->page_status[slotno] = ok ? SLRU_PAGE_VALID : SLRU_PAGE_EMPTY;
-
-		LWLockRelease(&shared->buffer_locks[slotno].lock);
-
-		/* Now it's okay to ereport if we failed */
-		if (!ok)
-			SlruReportIOError(ctl, pageno, xid);
-
-		SlruRecentlyUsed(shared, slotno);
-
-		/* update the stats counter of pages not found in SLRU */
-		pgstat_count_slru_page_read(shared->slru_stats_idx);
-
-		return slotno;
-	}
-}
-
-/*
- * Find a page in a shared buffer, reading it in if necessary.
- * The page number must correspond to an already-initialized page.
- * The caller must intend only read-only access to the page.
- *
- * The passed-in xid is used only for error reporting, and may be
- * InvalidTransactionId if no specific xid is associated with the action.
- *
- * Return value is the shared-buffer slot number now holding the page.
- * The buffer's LRU access info is updated.
- *
- * Control lock must NOT be held at entry, but will be held at exit.
- * It is unspecified whether the lock will be shared or exclusive.
- */
-int
-SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid)
-{
-	SlruShared	shared = ctl->shared;
-	int			slotno;
-
-	/* Try to find the page while holding only shared lock */
-	LWLockAcquire(shared->ControlLock, LW_SHARED);
-
-	/* See if page is already in a buffer */
-	for (slotno = 0; slotno < shared->num_slots; slotno++)
-	{
-		if (shared->page_number[slotno] == pageno &&
-			shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
-			shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS)
-		{
-			/* See comments for SlruRecentlyUsed macro */
-			SlruRecentlyUsed(shared, slotno);
-
-			/* update the stats counter of pages found in the SLRU */
-			pgstat_count_slru_page_hit(shared->slru_stats_idx);
-
-			return slotno;
-		}
-	}
-
-	/* No luck, so switch to normal exclusive lock and do regular read */
-	LWLockRelease(shared->ControlLock);
-	LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
-
-	return SimpleLruReadPage(ctl, pageno, true, xid);
-}
-
-/*
- * Write a page from a shared buffer, if necessary.
- * Does nothing if the specified slot is not dirty.
- *
- * NOTE: only one write attempt is made here.  Hence, it is possible that
- * the page is still dirty at exit (if someone else re-dirtied it during
- * the write).  However, we *do* attempt a fresh write even if the page
- * is already being written; this is for checkpoints.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static void
-SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata)
-{
-	SlruShared	shared = ctl->shared;
-	int			pageno = shared->page_number[slotno];
-	bool		ok;
-
-	/* If a write is in progress, wait for it to finish */
-	while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
-		   shared->page_number[slotno] == pageno)
-	{
-		SimpleLruWaitIO(ctl, slotno);
-	}
-
-	/*
-	 * Do nothing if page is not dirty, or if buffer no longer contains the
-	 * same page we were called for.
-	 */
-	if (!shared->page_dirty[slotno] ||
-		shared->page_status[slotno] != SLRU_PAGE_VALID ||
-		shared->page_number[slotno] != pageno)
-		return;
-
-	/*
-	 * Mark the slot write-busy, and clear the dirtybit.  After this point, a
-	 * transaction status update on this page will mark it dirty again.
-	 */
-	shared->page_status[slotno] = SLRU_PAGE_WRITE_IN_PROGRESS;
-	shared->page_dirty[slotno] = false;
-
-	/* Acquire per-buffer lock (cannot deadlock, see notes at top) */
-	LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
-
-	/* Release control lock while doing I/O */
-	LWLockRelease(shared->ControlLock);
-
-	/* Do the write */
-	ok = SlruPhysicalWritePage(ctl, pageno, slotno, fdata);
-
-	/* If we failed, and we're in a flush, better close the files */
-	if (!ok && fdata)
-	{
-		int			i;
-
-		for (i = 0; i < fdata->num_files; i++)
-			CloseTransientFile(fdata->fd[i]);
-	}
-
-	/* Re-acquire control lock and update page state */
-	LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
-
-	Assert(shared->page_number[slotno] == pageno &&
-		   shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS);
-
-	/* If we failed to write, mark the page dirty again */
-	if (!ok)
-		shared->page_dirty[slotno] = true;
-
-	shared->page_status[slotno] = SLRU_PAGE_VALID;
-
-	LWLockRelease(&shared->buffer_locks[slotno].lock);
-
-	/* Now it's okay to ereport if we failed */
-	if (!ok)
-		SlruReportIOError(ctl, pageno, InvalidTransactionId);
-
-	/* If part of a checkpoint, count this as a buffer written. */
-	if (fdata)
-		CheckpointStats.ckpt_bufs_written++;
-}
-
-/*
- * Wrapper of SlruInternalWritePage, for external callers.
- * fdata is always passed a NULL here.
- */
-void
-SimpleLruWritePage(SlruCtl ctl, int slotno)
-{
-	SlruInternalWritePage(ctl, slotno, NULL);
-}
+static File slrufile(SMgrRelation reln, BlockNumber blocknum, int mode,
+					 bool missing_ok);
 
 /*
  * Return whether the given page exists on disk.
@@ -623,592 +111,30 @@ SimpleLruWritePage(SlruCtl ctl, int slotno)
  * large enough to contain the given page.
  */
 bool
-SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno)
-{
-	int			segno = pageno / SLRU_PAGES_PER_SEGMENT;
-	int			rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
-	int			offset = rpageno * BLCKSZ;
-	char		path[MAXPGPATH];
-	int			fd;
-	bool		result;
-	off_t		endpos;
-
-	/* update the stats counter of checked pages */
-	pgstat_count_slru_page_exists(ctl->shared->slru_stats_idx);
-
-	SlruFileName(ctl, path, segno);
-
-	fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
-	if (fd < 0)
-	{
-		/* expected: file doesn't exist */
-		if (errno == ENOENT)
-			return false;
-
-		/* report error normally */
-		slru_errcause = SLRU_OPEN_FAILED;
-		slru_errno = errno;
-		SlruReportIOError(ctl, pageno, 0);
-	}
-
-	if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
-	{
-		slru_errcause = SLRU_SEEK_FAILED;
-		slru_errno = errno;
-		SlruReportIOError(ctl, pageno, 0);
-	}
-
-	result = endpos >= (off_t) (offset + BLCKSZ);
-
-	if (CloseTransientFile(fd) != 0)
-	{
-		slru_errcause = SLRU_CLOSE_FAILED;
-		slru_errno = errno;
-		return false;
-	}
-
-	return result;
-}
-
-/*
- * Physical read of a (previously existing) page into a buffer slot
- *
- * On failure, we cannot just ereport(ERROR) since caller has put state in
- * shared memory that must be undone.  So, we return false and save enough
- * info in static variables to let SlruReportIOError make the report.
- *
- * For now, assume it's not worth keeping a file pointer open across
- * read/write operations.  We could cache one virtual file pointer ...
- */
-static bool
-SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
+SimpleLruDoesPhysicalPageExist(RelFileNumber rel_id, int pageno)
 {
-	SlruShared	shared = ctl->shared;
-	int			segno = pageno / SLRU_PAGES_PER_SEGMENT;
 	int			rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
 	off_t		offset = rpageno * BLCKSZ;
-	char		path[MAXPGPATH];
-	int			fd;
-
-	SlruFileName(ctl, path, segno);
-
-	/*
-	 * In a crash-and-restart situation, it's possible for us to receive
-	 * commands to set the commit status of transactions whose bits are in
-	 * already-truncated segments of the commit log (see notes in
-	 * SlruPhysicalWritePage).  Hence, if we are InRecovery, allow the case
-	 * where the file doesn't exist, and return zeroes instead.
-	 */
-	fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
-	if (fd < 0)
-	{
-		if (errno != ENOENT || !InRecovery)
-		{
-			slru_errcause = SLRU_OPEN_FAILED;
-			slru_errno = errno;
-			return false;
-		}
-
-		ereport(LOG,
-				(errmsg("file \"%s\" doesn't exist, reading as zeroes",
-						path)));
-		MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
-		return true;
-	}
-
-	errno = 0;
-	pgstat_report_wait_start(WAIT_EVENT_SLRU_READ);
-	if (pg_pread(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
-	{
-		pgstat_report_wait_end();
-		slru_errcause = SLRU_READ_FAILED;
-		slru_errno = errno;
-		CloseTransientFile(fd);
-		return false;
-	}
-	pgstat_report_wait_end();
-
-	if (CloseTransientFile(fd) != 0)
-	{
-		slru_errcause = SLRU_CLOSE_FAILED;
-		slru_errno = errno;
-		return false;
-	}
+	off_t		size;
+	File		file;
+	RelFileLocator rlocator = SlruRelFileLocator(rel_id);
+	SMgrRelation reln = smgropen(rlocator, InvalidBackendId);
 
-	return true;
-}
-
-/*
- * Physical write of a page from a buffer slot
- *
- * On failure, we cannot just ereport(ERROR) since caller has put state in
- * shared memory that must be undone.  So, we return false and save enough
- * info in static variables to let SlruReportIOError make the report.
- *
- * For now, assume it's not worth keeping a file pointer open across
- * independent read/write operations.  We do batch operations during
- * SimpleLruWriteAll, though.
- *
- * fdata is NULL for a standalone write, pointer to open-file info during
- * SimpleLruWriteAll.
- */
-static bool
-SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata)
-{
-	SlruShared	shared = ctl->shared;
-	int			segno = pageno / SLRU_PAGES_PER_SEGMENT;
-	int			rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
-	off_t		offset = rpageno * BLCKSZ;
-	char		path[MAXPGPATH];
-	int			fd = -1;
-
-	/* update the stats counter of written pages */
-	pgstat_count_slru_page_written(shared->slru_stats_idx);
-
-	/*
-	 * Honor the write-WAL-before-data rule, if appropriate, so that we do not
-	 * write out data before associated WAL records.  This is the same action
-	 * performed during FlushBuffer() in the main buffer manager.
-	 */
-	if (shared->group_lsn != NULL)
-	{
-		/*
-		 * We must determine the largest async-commit LSN for the page. This
-		 * is a bit tedious, but since this entire function is a slow path
-		 * anyway, it seems better to do this here than to maintain a per-page
-		 * LSN variable (which'd need an extra comparison in the
-		 * transaction-commit path).
-		 */
-		XLogRecPtr	max_lsn;
-		int			lsnindex,
-					lsnoff;
-
-		lsnindex = slotno * shared->lsn_groups_per_page;
-		max_lsn = shared->group_lsn[lsnindex++];
-		for (lsnoff = 1; lsnoff < shared->lsn_groups_per_page; lsnoff++)
-		{
-			XLogRecPtr	this_lsn = shared->group_lsn[lsnindex++];
-
-			if (max_lsn < this_lsn)
-				max_lsn = this_lsn;
-		}
-
-		if (!XLogRecPtrIsInvalid(max_lsn))
-		{
-			/*
-			 * As noted above, elog(ERROR) is not acceptable here, so if
-			 * XLogFlush were to fail, we must PANIC.  This isn't much of a
-			 * restriction because XLogFlush is just about all critical
-			 * section anyway, but let's make sure.
-			 */
-			START_CRIT_SECTION();
-			XLogFlush(max_lsn);
-			END_CRIT_SECTION();
-		}
-	}
-
-	/*
-	 * During a WriteAll, we may already have the desired file open.
-	 */
-	if (fdata)
-	{
-		int			i;
-
-		for (i = 0; i < fdata->num_files; i++)
-		{
-			if (fdata->segno[i] == segno)
-			{
-				fd = fdata->fd[i];
-				break;
-			}
-		}
-	}
-
-	if (fd < 0)
-	{
-		/*
-		 * If the file doesn't already exist, we should create it.  It is
-		 * possible for this to need to happen when writing a page that's not
-		 * first in its segment; we assume the OS can cope with that. (Note:
-		 * it might seem that it'd be okay to create files only when
-		 * SimpleLruZeroPage is called for the first page of a segment.
-		 * However, if after a crash and restart the REDO logic elects to
-		 * replay the log from a checkpoint before the latest one, then it's
-		 * possible that we will get commands to set transaction status of
-		 * transactions that have already been truncated from the commit log.
-		 * Easiest way to deal with that is to accept references to
-		 * nonexistent files here and in SlruPhysicalReadPage.)
-		 *
-		 * Note: it is possible for more than one backend to be executing this
-		 * code simultaneously for different pages of the same file. Hence,
-		 * don't use O_EXCL or O_TRUNC or anything like that.
-		 */
-		SlruFileName(ctl, path, segno);
-		fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY);
-		if (fd < 0)
-		{
-			slru_errcause = SLRU_OPEN_FAILED;
-			slru_errno = errno;
-			return false;
-		}
-
-		if (fdata)
-		{
-			if (fdata->num_files < MAX_WRITEALL_BUFFERS)
-			{
-				fdata->fd[fdata->num_files] = fd;
-				fdata->segno[fdata->num_files] = segno;
-				fdata->num_files++;
-			}
-			else
-			{
-				/*
-				 * In the unlikely event that we exceed MAX_FLUSH_BUFFERS,
-				 * fall back to treating it as a standalone write.
-				 */
-				fdata = NULL;
-			}
-		}
-	}
+	/* update the stats counter of checked pages */
+	pgstat_count_slru_page_exists(rel_id);
 
-	errno = 0;
-	pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE);
-	if (pg_pwrite(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
+	file = slrufile(reln, pageno, O_RDWR, true);
+	if (file < 0)
 	{
-		pgstat_report_wait_end();
-		/* if write didn't set errno, assume problem is no disk space */
-		if (errno == 0)
-			errno = ENOSPC;
-		slru_errcause = SLRU_WRITE_FAILED;
-		slru_errno = errno;
-		if (!fdata)
-			CloseTransientFile(fd);
+		Assert(errno == ENOENT);
 		return false;
 	}
-	pgstat_report_wait_end();
-
-	/* Queue up a sync request for the checkpointer. */
-	if (ctl->sync_handler != SYNC_HANDLER_NONE)
-	{
-		FileTag		tag;
-
-		INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
-		if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false))
-		{
-			/* No space to enqueue sync request.  Do it synchronously. */
-			pgstat_report_wait_start(WAIT_EVENT_SLRU_SYNC);
-			if (pg_fsync(fd) != 0)
-			{
-				pgstat_report_wait_end();
-				slru_errcause = SLRU_FSYNC_FAILED;
-				slru_errno = errno;
-				CloseTransientFile(fd);
-				return false;
-			}
-			pgstat_report_wait_end();
-		}
-	}
+	size = FileSize(file);
+	if (size < 0)
+		elog(ERROR, "could not get size of file \"%s\": %m",
+			 FilePathName(file));
 
-	/* Close file, unless part of flush request. */
-	if (!fdata)
-	{
-		if (CloseTransientFile(fd) != 0)
-		{
-			slru_errcause = SLRU_CLOSE_FAILED;
-			slru_errno = errno;
-			return false;
-		}
-	}
-
-	return true;
-}
-
-/*
- * Issue the error message after failure of SlruPhysicalReadPage or
- * SlruPhysicalWritePage.  Call this after cleaning up shared-memory state.
- */
-static void
-SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
-{
-	int			segno = pageno / SLRU_PAGES_PER_SEGMENT;
-	int			rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
-	int			offset = rpageno * BLCKSZ;
-	char		path[MAXPGPATH];
-
-	SlruFileName(ctl, path, segno);
-	errno = slru_errno;
-	switch (slru_errcause)
-	{
-		case SLRU_OPEN_FAILED:
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not access status of transaction %u", xid),
-					 errdetail("Could not open file \"%s\": %m.", path)));
-			break;
-		case SLRU_SEEK_FAILED:
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not access status of transaction %u", xid),
-					 errdetail("Could not seek in file \"%s\" to offset %d: %m.",
-							   path, offset)));
-			break;
-		case SLRU_READ_FAILED:
-			if (errno)
-				ereport(ERROR,
-						(errcode_for_file_access(),
-						 errmsg("could not access status of transaction %u", xid),
-						 errdetail("Could not read from file \"%s\" at offset %d: %m.",
-								   path, offset)));
-			else
-				ereport(ERROR,
-						(errmsg("could not access status of transaction %u", xid),
-						 errdetail("Could not read from file \"%s\" at offset %d: read too few bytes.", path, offset)));
-			break;
-		case SLRU_WRITE_FAILED:
-			if (errno)
-				ereport(ERROR,
-						(errcode_for_file_access(),
-						 errmsg("could not access status of transaction %u", xid),
-						 errdetail("Could not write to file \"%s\" at offset %d: %m.",
-								   path, offset)));
-			else
-				ereport(ERROR,
-						(errmsg("could not access status of transaction %u", xid),
-						 errdetail("Could not write to file \"%s\" at offset %d: wrote too few bytes.",
-								   path, offset)));
-			break;
-		case SLRU_FSYNC_FAILED:
-			ereport(data_sync_elevel(ERROR),
-					(errcode_for_file_access(),
-					 errmsg("could not access status of transaction %u", xid),
-					 errdetail("Could not fsync file \"%s\": %m.",
-							   path)));
-			break;
-		case SLRU_CLOSE_FAILED:
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not access status of transaction %u", xid),
-					 errdetail("Could not close file \"%s\": %m.",
-							   path)));
-			break;
-		default:
-			/* can't get here, we trust */
-			elog(ERROR, "unrecognized SimpleLru error cause: %d",
-				 (int) slru_errcause);
-			break;
-	}
-}
-
-/*
- * Select the slot to re-use when we need a free slot.
- *
- * The target page number is passed because we need to consider the
- * possibility that some other process reads in the target page while
- * we are doing I/O to free a slot.  Hence, check or recheck to see if
- * any slot already holds the target page, and return that slot if so.
- * Thus, the returned slot is *either* a slot already holding the pageno
- * (could be any state except EMPTY), *or* a freeable slot (state EMPTY
- * or CLEAN).
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static int
-SlruSelectLRUPage(SlruCtl ctl, int pageno)
-{
-	SlruShared	shared = ctl->shared;
-
-	/* Outer loop handles restart after I/O */
-	for (;;)
-	{
-		int			slotno;
-		int			cur_count;
-		int			bestvalidslot = 0;	/* keep compiler quiet */
-		int			best_valid_delta = -1;
-		int			best_valid_page_number = 0; /* keep compiler quiet */
-		int			bestinvalidslot = 0;	/* keep compiler quiet */
-		int			best_invalid_delta = -1;
-		int			best_invalid_page_number = 0;	/* keep compiler quiet */
-
-		/* See if page already has a buffer assigned */
-		for (slotno = 0; slotno < shared->num_slots; slotno++)
-		{
-			if (shared->page_number[slotno] == pageno &&
-				shared->page_status[slotno] != SLRU_PAGE_EMPTY)
-				return slotno;
-		}
-
-		/*
-		 * If we find any EMPTY slot, just select that one. Else choose a
-		 * victim page to replace.  We normally take the least recently used
-		 * valid page, but we will never take the slot containing
-		 * latest_page_number, even if it appears least recently used.  We
-		 * will select a slot that is already I/O busy only if there is no
-		 * other choice: a read-busy slot will not be least recently used once
-		 * the read finishes, and waiting for an I/O on a write-busy slot is
-		 * inferior to just picking some other slot.  Testing shows the slot
-		 * we pick instead will often be clean, allowing us to begin a read at
-		 * once.
-		 *
-		 * Normally the page_lru_count values will all be different and so
-		 * there will be a well-defined LRU page.  But since we allow
-		 * concurrent execution of SlruRecentlyUsed() within
-		 * SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
-		 * acquire the same lru_count values.  In that case we break ties by
-		 * choosing the furthest-back page.
-		 *
-		 * Notice that this next line forcibly advances cur_lru_count to a
-		 * value that is certainly beyond any value that will be in the
-		 * page_lru_count array after the loop finishes.  This ensures that
-		 * the next execution of SlruRecentlyUsed will mark the page newly
-		 * used, even if it's for a page that has the current counter value.
-		 * That gets us back on the path to having good data when there are
-		 * multiple pages with the same lru_count.
-		 */
-		cur_count = (shared->cur_lru_count)++;
-		for (slotno = 0; slotno < shared->num_slots; slotno++)
-		{
-			int			this_delta;
-			int			this_page_number;
-
-			if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
-				return slotno;
-			this_delta = cur_count - shared->page_lru_count[slotno];
-			if (this_delta < 0)
-			{
-				/*
-				 * Clean up in case shared updates have caused cur_count
-				 * increments to get "lost".  We back off the page counts,
-				 * rather than trying to increase cur_count, to avoid any
-				 * question of infinite loops or failure in the presence of
-				 * wrapped-around counts.
-				 */
-				shared->page_lru_count[slotno] = cur_count;
-				this_delta = 0;
-			}
-			this_page_number = shared->page_number[slotno];
-			if (this_page_number == shared->latest_page_number)
-				continue;
-			if (shared->page_status[slotno] == SLRU_PAGE_VALID)
-			{
-				if (this_delta > best_valid_delta ||
-					(this_delta == best_valid_delta &&
-					 ctl->PagePrecedes(this_page_number,
-									   best_valid_page_number)))
-				{
-					bestvalidslot = slotno;
-					best_valid_delta = this_delta;
-					best_valid_page_number = this_page_number;
-				}
-			}
-			else
-			{
-				if (this_delta > best_invalid_delta ||
-					(this_delta == best_invalid_delta &&
-					 ctl->PagePrecedes(this_page_number,
-									   best_invalid_page_number)))
-				{
-					bestinvalidslot = slotno;
-					best_invalid_delta = this_delta;
-					best_invalid_page_number = this_page_number;
-				}
-			}
-		}
-
-		/*
-		 * If all pages (except possibly the latest one) are I/O busy, we'll
-		 * have to wait for an I/O to complete and then retry.  In that
-		 * unhappy case, we choose to wait for the I/O on the least recently
-		 * used slot, on the assumption that it was likely initiated first of
-		 * all the I/Os in progress and may therefore finish first.
-		 */
-		if (best_valid_delta < 0)
-		{
-			SimpleLruWaitIO(ctl, bestinvalidslot);
-			continue;
-		}
-
-		/*
-		 * If the selected page is clean, we're set.
-		 */
-		if (!shared->page_dirty[bestvalidslot])
-			return bestvalidslot;
-
-		/*
-		 * Write the page.
-		 */
-		SlruInternalWritePage(ctl, bestvalidslot, NULL);
-
-		/*
-		 * Now loop back and try again.  This is the easiest way of dealing
-		 * with corner cases such as the victim page being re-dirtied while we
-		 * wrote it.
-		 */
-	}
-}
-
-/*
- * Write dirty pages to disk during checkpoint or database shutdown.  Flushing
- * is deferred until the next call to ProcessSyncRequests(), though we do fsync
- * the containing directory here to make sure that newly created directory
- * entries are on disk.
- */
-void
-SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied)
-{
-	SlruShared	shared = ctl->shared;
-	SlruWriteAllData fdata;
-	int			slotno;
-	int			pageno = 0;
-	int			i;
-	bool		ok;
-
-	/* update the stats counter of flushes */
-	pgstat_count_slru_flush(shared->slru_stats_idx);
-
-	/*
-	 * Find and write dirty pages
-	 */
-	fdata.num_files = 0;
-
-	LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
-
-	for (slotno = 0; slotno < shared->num_slots; slotno++)
-	{
-		SlruInternalWritePage(ctl, slotno, &fdata);
-
-		/*
-		 * In some places (e.g. checkpoints), we cannot assert that the slot
-		 * is clean now, since another process might have re-dirtied it
-		 * already.  That's okay.
-		 */
-		Assert(allow_redirtied ||
-			   shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
-			   (shared->page_status[slotno] == SLRU_PAGE_VALID &&
-				!shared->page_dirty[slotno]));
-	}
-
-	LWLockRelease(shared->ControlLock);
-
-	/*
-	 * Now close any files that were open
-	 */
-	ok = true;
-	for (i = 0; i < fdata.num_files; i++)
-	{
-		if (CloseTransientFile(fdata.fd[i]) != 0)
-		{
-			slru_errcause = SLRU_CLOSE_FAILED;
-			slru_errno = errno;
-			pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
-			ok = false;
-		}
-	}
-	if (!ok)
-		SlruReportIOError(ctl, pageno, InvalidTransactionId);
-
-	/* Ensure that directory entries for new files are on disk. */
-	if (ctl->sync_handler != SYNC_HANDLER_NONE)
-		fsync_fname(ctl->Dir, true);
+	return size >= offset + BLCKSZ;
 }
 
 /*
@@ -1223,75 +149,14 @@ SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied)
  * after it has accrued freshly-written data.
  */
 void
-SimpleLruTruncate(SlruCtl ctl, int cutoffPage)
+SimpleLruTruncate(Oid rel_id, SlruPagePrecedesFunction PagePrecedes, int cutoffPage)
 {
-	SlruShared	shared = ctl->shared;
-	int			slotno;
-
 	/* update the stats counter of truncates */
-	pgstat_count_slru_truncate(shared->slru_stats_idx);
-
-	/*
-	 * Scan shared memory and remove any pages preceding the cutoff page, to
-	 * ensure we won't rewrite them later.  (Since this is normally called in
-	 * or just after a checkpoint, any dirty pages should have been flushed
-	 * already ... we're just being extra careful here.)
-	 */
-	LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
-
-restart:;
-
-	/*
-	 * While we are holding the lock, make an important safety check: the
-	 * current endpoint page must not be eligible for removal.
-	 */
-	if (ctl->PagePrecedes(shared->latest_page_number, cutoffPage))
-	{
-		LWLockRelease(shared->ControlLock);
-		ereport(LOG,
-				(errmsg("could not truncate directory \"%s\": apparent wraparound",
-						ctl->Dir)));
-		return;
-	}
-
-	for (slotno = 0; slotno < shared->num_slots; slotno++)
-	{
-		if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
-			continue;
-		if (!ctl->PagePrecedes(shared->page_number[slotno], cutoffPage))
-			continue;
-
-		/*
-		 * If page is clean, just change state to EMPTY (expected case).
-		 */
-		if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
-			!shared->page_dirty[slotno])
-		{
-			shared->page_status[slotno] = SLRU_PAGE_EMPTY;
-			continue;
-		}
-
-		/*
-		 * Hmm, we have (or may have) I/O operations acting on the page, so
-		 * we've got to wait for them to finish and then start again. This is
-		 * the same logic as in SlruSelectLRUPage.  (XXX if page is dirty,
-		 * wouldn't it be OK to just discard it without writing it?
-		 * SlruMayDeleteSegment() uses a stricter qualification, so we might
-		 * not delete this page in the end; even if we don't delete it, we
-		 * won't have cause to read its data again.  For now, keep the logic
-		 * the same as it was.)
-		 */
-		if (shared->page_status[slotno] == SLRU_PAGE_VALID)
-			SlruInternalWritePage(ctl, slotno, NULL);
-		else
-			SimpleLruWaitIO(ctl, slotno);
-		goto restart;
-	}
-
-	LWLockRelease(shared->ControlLock);
+	pgstat_count_slru_truncate(rel_id);
 
 	/* Now we can remove the old segment(s) */
-	(void) SlruScanDirectory(ctl, SlruScanDirCbDeleteCutoff, &cutoffPage);
+	(void) SlruScanDirectory(rel_id, PagePrecedes, SlruScanDirCbDeleteCutoff,
+							 &cutoffPage);
 }
 
 /*
@@ -1301,21 +166,21 @@ restart:;
  * they either can't yet contain anything, or have already been cleaned out.
  */
 static void
-SlruInternalDeleteSegment(SlruCtl ctl, int segno)
+SlruInternalDeleteSegment(Oid rel_id, int segno)
 {
 	char		path[MAXPGPATH];
 
 	/* Forget any fsync requests queued for this segment. */
-	if (ctl->sync_handler != SYNC_HANDLER_NONE)
+	if (defs[rel_id].synchronize)
 	{
 		FileTag		tag;
 
-		INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
+		INIT_SLRUFILETAG(tag, rel_id, segno);
 		RegisterSyncRequest(&tag, SYNC_FORGET_REQUEST, true);
 	}
 
 	/* Unlink the file. */
-	SlruFileName(ctl, path, segno);
+	SlruFileName(rel_id, path, segno);
 	ereport(DEBUG2, (errmsg_internal("removing file \"%s\"", path)));
 	unlink(path);
 }
@@ -1324,54 +189,15 @@ SlruInternalDeleteSegment(SlruCtl ctl, int segno)
  * Delete an individual SLRU segment, identified by the segment number.
  */
 void
-SlruDeleteSegment(SlruCtl ctl, int segno)
+SlruDeleteSegment(RelFileNumber rel_number, int segno)
 {
-	SlruShared	shared = ctl->shared;
-	int			slotno;
-	bool		did_write;
+	RelFileLocator rlocator = SlruRelFileLocator(rel_number);
 
 	/* Clean out any possibly existing references to the segment. */
-	LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
-restart:
-	did_write = false;
-	for (slotno = 0; slotno < shared->num_slots; slotno++)
-	{
-		int			pagesegno = shared->page_number[slotno] / SLRU_PAGES_PER_SEGMENT;
-
-		if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
-			continue;
-
-		/* not the segment we're looking for */
-		if (pagesegno != segno)
-			continue;
-
-		/* If page is clean, just change state to EMPTY (expected case). */
-		if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
-			!shared->page_dirty[slotno])
-		{
-			shared->page_status[slotno] = SLRU_PAGE_EMPTY;
-			continue;
-		}
+	for (int i = 0; i < SLRU_PAGES_PER_SEGMENT; ++i)
+		DiscardBuffer(rlocator, MAIN_FORKNUM, segno * SLRU_PAGES_PER_SEGMENT + i);
 
-		/* Same logic as SimpleLruTruncate() */
-		if (shared->page_status[slotno] == SLRU_PAGE_VALID)
-			SlruInternalWritePage(ctl, slotno, NULL);
-		else
-			SimpleLruWaitIO(ctl, slotno);
-
-		did_write = true;
-	}
-
-	/*
-	 * Be extra careful and re-check. The IO functions release the control
-	 * lock, so new pages could have been read in.
-	 */
-	if (did_write)
-		goto restart;
-
-	SlruInternalDeleteSegment(ctl, segno);
-
-	LWLockRelease(shared->ControlLock);
+	SlruInternalDeleteSegment(rel_number, segno);
 }
 
 /*
@@ -1388,19 +214,21 @@ restart:
  * first>=cutoff && last>=cutoff: no; every page of this segment is too young
  */
 static bool
-SlruMayDeleteSegment(SlruCtl ctl, int segpage, int cutoffPage)
+SlruMayDeleteSegment(SlruPagePrecedesFunction PagePrecedes,
+					 int segpage, int cutoffPage)
 {
 	int			seg_last_page = segpage + SLRU_PAGES_PER_SEGMENT - 1;
 
 	Assert(segpage % SLRU_PAGES_PER_SEGMENT == 0);
 
-	return (ctl->PagePrecedes(segpage, cutoffPage) &&
-			ctl->PagePrecedes(seg_last_page, cutoffPage));
+	return (PagePrecedes(segpage, cutoffPage) &&
+			PagePrecedes(seg_last_page, cutoffPage));
 }
 
 #ifdef USE_ASSERT_CHECKING
 static void
-SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
+SlruPagePrecedesTestOffset(SlruPagePrecedesFunction PagePrecedes,
+						   int per_page, uint32 offset)
 {
 	TransactionId lhs,
 				rhs;
@@ -1425,19 +253,19 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
 	Assert(!TransactionIdPrecedes(rhs, lhs + 1));
 	Assert(!TransactionIdFollowsOrEquals(lhs, rhs));
 	Assert(!TransactionIdFollowsOrEquals(rhs, lhs));
-	Assert(!ctl->PagePrecedes(lhs / per_page, lhs / per_page));
-	Assert(!ctl->PagePrecedes(lhs / per_page, rhs / per_page));
-	Assert(!ctl->PagePrecedes(rhs / per_page, lhs / per_page));
-	Assert(!ctl->PagePrecedes((lhs - per_page) / per_page, rhs / per_page));
-	Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 3 * per_page) / per_page));
-	Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 2 * per_page) / per_page));
-	Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 1 * per_page) / per_page)
+	Assert(!PagePrecedes(lhs / per_page, lhs / per_page));
+	Assert(!PagePrecedes(lhs / per_page, rhs / per_page));
+	Assert(!PagePrecedes(rhs / per_page, lhs / per_page));
+	Assert(!PagePrecedes((lhs - per_page) / per_page, rhs / per_page));
+	Assert(PagePrecedes(rhs / per_page, (lhs - 3 * per_page) / per_page));
+	Assert(PagePrecedes(rhs / per_page, (lhs - 2 * per_page) / per_page));
+	Assert(PagePrecedes(rhs / per_page, (lhs - 1 * per_page) / per_page)
 		   || (1U << 31) % per_page != 0);	/* See CommitTsPagePrecedes() */
-	Assert(ctl->PagePrecedes((lhs + 1 * per_page) / per_page, rhs / per_page)
+	Assert(PagePrecedes((lhs + 1 * per_page) / per_page, rhs / per_page)
 		   || (1U << 31) % per_page != 0);
-	Assert(ctl->PagePrecedes((lhs + 2 * per_page) / per_page, rhs / per_page));
-	Assert(ctl->PagePrecedes((lhs + 3 * per_page) / per_page, rhs / per_page));
-	Assert(!ctl->PagePrecedes(rhs / per_page, (lhs + per_page) / per_page));
+	Assert(PagePrecedes((lhs + 2 * per_page) / per_page, rhs / per_page));
+	Assert(PagePrecedes((lhs + 3 * per_page) / per_page, rhs / per_page));
+	Assert(!PagePrecedes(rhs / per_page, (lhs + per_page) / per_page));
 
 	/*
 	 * GetNewTransactionId() has assigned the last XID it can safely use, and
@@ -1450,7 +278,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
 	oldestXact = newestXact + 1;
 	oldestXact -= 1U << 31;
 	oldestPage = oldestXact / per_page;
-	Assert(!SlruMayDeleteSegment(ctl,
+	Assert(!SlruMayDeleteSegment(PagePrecedes,
 								 (newestPage -
 								  newestPage % SLRU_PAGES_PER_SEGMENT),
 								 oldestPage));
@@ -1466,7 +294,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
 	oldestXact = newestXact + 1;
 	oldestXact -= 1U << 31;
 	oldestPage = oldestXact / per_page;
-	Assert(!SlruMayDeleteSegment(ctl,
+	Assert(!SlruMayDeleteSegment(PagePrecedes,
 								 (newestPage -
 								  newestPage % SLRU_PAGES_PER_SEGMENT),
 								 oldestPage));
@@ -1482,12 +310,12 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
  * do not apply to them.)
  */
 void
-SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page)
+SlruPagePrecedesUnitTests(SlruPagePrecedesFunction PagePrecedes, int per_page)
 {
 	/* Test first, middle and last entries of a page. */
-	SlruPagePrecedesTestOffset(ctl, per_page, 0);
-	SlruPagePrecedesTestOffset(ctl, per_page, per_page / 2);
-	SlruPagePrecedesTestOffset(ctl, per_page, per_page - 1);
+	SlruPagePrecedesTestOffset(PagePrecedes, per_page, 0);
+	SlruPagePrecedesTestOffset(PagePrecedes, per_page, per_page / 2);
+	SlruPagePrecedesTestOffset(PagePrecedes, per_page, per_page - 1);
 }
 #endif
 
@@ -1497,11 +325,12 @@ SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page)
  *		one containing the page passed as "data".
  */
 bool
-SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data)
+SlruScanDirCbReportPresence(Oid rel_id, SlruPagePrecedesFunction PagePrecedes,
+							char *filename, int segpage, void *data)
 {
 	int			cutoffPage = *(int *) data;
 
-	if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))
+	if (SlruMayDeleteSegment(PagePrecedes, segpage, cutoffPage))
 		return true;			/* found one; don't iterate any more */
 
 	return false;				/* keep going */
@@ -1512,12 +341,18 @@ SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data
  *		This callback deletes segments prior to the one passed in as "data".
  */
 static bool
-SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
+SlruScanDirCbDeleteCutoff(RelFileNumber rel_number, SlruPagePrecedesFunction PagePrecedes,
+						  char *filename, int segpage, void *data)
 {
+	RelFileLocator rlocator = SlruRelFileLocator(rel_number);
 	int			cutoffPage = *(int *) data;
 
-	if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))
-		SlruInternalDeleteSegment(ctl, segpage / SLRU_PAGES_PER_SEGMENT);
+	if (SlruMayDeleteSegment(PagePrecedes, segpage, cutoffPage))
+	{
+		for (int i = 0; i < SLRU_PAGES_PER_SEGMENT; ++i)
+			DiscardBuffer(rlocator, MAIN_FORKNUM, segpage + i);
+		SlruInternalDeleteSegment(rel_number, segpage / SLRU_PAGES_PER_SEGMENT);
+	}
 
 	return false;				/* keep going */
 }
@@ -1527,9 +362,10 @@ SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
  *		This callback deletes all segments.
  */
 bool
-SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data)
+SlruScanDirCbDeleteAll(Oid rel_id, SlruPagePrecedesFunction PagePrecedes,
+					   char *filename, int segpage, void *data)
 {
-	SlruInternalDeleteSegment(ctl, segpage / SLRU_PAGES_PER_SEGMENT);
+	SlruInternalDeleteSegment(rel_id, segpage / SLRU_PAGES_PER_SEGMENT);
 
 	return false;				/* keep going */
 }
@@ -1550,16 +386,20 @@ SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data)
  * Note that no locking is applied.
  */
 bool
-SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
+SlruScanDirectory(Oid rel_id, SlruPagePrecedesFunction PagePrecedes,
+				  SlruScanCallback callback, void *data)
 {
 	bool		retval = false;
 	DIR		   *cldir;
 	struct dirent *clde;
 	int			segno;
 	int			segpage;
+	const char *path;
 
-	cldir = AllocateDir(ctl->Dir);
-	while ((clde = ReadDir(cldir, ctl->Dir)) != NULL)
+	path = defs[rel_id].path;
+
+	cldir = AllocateDir(path);
+	while ((clde = ReadDir(cldir, path)) != NULL)
 	{
 		size_t		len;
 
@@ -1572,8 +412,8 @@ SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
 			segpage = segno * SLRU_PAGES_PER_SEGMENT;
 
 			elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s",
-				 ctl->Dir, clde->d_name);
-			retval = callback(ctl, clde->d_name, segpage, data);
+				 path, clde->d_name);
+			retval = callback(rel_id, PagePrecedes, clde->d_name, segpage, data);
 			if (retval)
 				break;
 		}
@@ -1583,30 +423,294 @@ SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
 	return retval;
 }
 
+void
+CheckPointSLRU(void)
+{
+	/* Ensure that directory entries for new files are on disk. */
+	for (int i = 0; i < lengthof(defs); ++i)
+	{
+		if (defs[i].synchronize)
+			fsync_fname(defs[i].path, true);
+	}
+}
+
 /*
- * Individual SLRUs (clog, ...) have to provide a sync.c handler function so
- * that they can provide the correct "SlruCtl" (otherwise we don't know how to
- * build the path), but they just forward to this common implementation that
- * performs the fsync.
+ * Read a buffer.  Buffer is pinned on return.
  */
-int
-SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path)
+Buffer
+ReadSlruBuffer(RelFileNumber rel_number, int pageno)
+{
+	RelFileLocator rlocator = SlruRelFileLocator(rel_number);
+	Buffer		buffer;
+	bool		hit;
+
+	/* Try to avoid doing a buffer mapping table lookup for repeated access. */
+	buffer = slru_recent_buffers[rel_number].recent_buffer;
+	if (slru_recent_buffers[rel_number].pageno == pageno &&
+		BufferIsValid(buffer) &&
+		ReadRecentBuffer(rlocator, MAIN_FORKNUM, pageno, buffer))
+	{
+		pgstat_count_slru_page_hit(rel_number);
+		return buffer;
+	}
+
+	/* Regular lookup. */
+	buffer = ReadBufferWithoutRelcacheWithHit(rlocator, MAIN_FORKNUM, pageno,
+											  RBM_NORMAL, &hit);
+
+	/* Remember where this page is for next time. */
+	slru_recent_buffers[rel_number].pageno = pageno;
+	slru_recent_buffers[rel_number].recent_buffer = buffer;
+
+	if (hit)
+		pgstat_count_slru_page_hit(rel_number);
+
+	return buffer;
+}
+
+/*
+ * Zero-initialize a buffer.  Buffer is pinned and exclusively locked on return.
+ */
+Buffer
+ZeroSlruBuffer(RelFileNumber rel_number, int pageno)
+{
+	RelFileLocator rlocator = SlruRelFileLocator(rel_number);
+	Buffer	buffer;
+
+	buffer = ReadBufferWithoutRelcache(rlocator, MAIN_FORKNUM, pageno,
+									   RBM_ZERO_AND_LOCK, NULL, true);
+
+	/* Remember where this page is for next time. */
+	slru_recent_buffers[rel_number].pageno = pageno;
+	slru_recent_buffers[rel_number].recent_buffer = buffer;
+
+	pgstat_count_slru_page_zeroed(rel_number);
+
+	return buffer;
+}
+
+Oid
+SlruRelIdByName(const char *name)
 {
-	int			fd;
-	int			save_errno;
-	int			result;
+	for (int i = 0; i < lengthof(defs); ++i)
+		if (strcmp(defs[i].name, name) == 0)
+			return i;
+
+	elog(ERROR, "unknown SLRU \"%s\"", name);
+}
 
-	SlruFileName(ctl, path, ftag->segno);
+const char *
+SlruName(Oid rel_id)
+{
+	if (rel_id >= SLRU_NUM_RELS)
+		elog(ERROR, "invalid SLRU rel ID %u", rel_id);
 
-	fd = OpenTransientFile(path, O_RDWR | PG_BINARY);
-	if (fd < 0)
+	return defs[rel_id].name;
+}
+
+int
+slrusyncfiletag(const FileTag *ftag, char *path)
+{
+	SMgrRelation reln;
+	File		file;
+
+	reln = smgropen(ftag->rlocator, InvalidBackendId);
+	file = slrufile(reln, ftag->segno * SLRU_PAGES_PER_SEGMENT, O_RDWR, true);
+	if (file < 0)
+	{
+		/* Path is reported here so the caller can make an error message */
+		SlruFileName(ftag->rlocator.relNumber, path, ftag->segno);
 		return -1;
+	}
+
+	pgstat_count_slru_flush(reln->smgr_rlocator.locator.relNumber);
+
+	return FileSync(file, WAIT_EVENT_SLRU_SYNC);
+}
+
+static File
+slrufile(SMgrRelation reln, BlockNumber blocknum, int mode, bool missing_ok)
+{
+	int			segment = blocknum / SLRU_PAGES_PER_SEGMENT;
+	char		path[MAXPGPATH];
+
+	Assert(reln->smgr_rlocator.locator.dbOid == SLRU_DB_ID);
+	Assert(reln->smgr_rlocator.locator.relNumber < lengthof(defs));
+	Assert(defs[reln->smgr_rlocator.locator.relNumber].path != NULL);
+
+	/* Do we have the right file open already? */
+	if (reln->slru_file_segment == segment)
+	{
+		/* XXX How can we invalidate this if the SLRU wraps around?! */
+		Assert(reln->slru_file != -1);
+		return reln->slru_file;
+	}
+
+	/* Close the current file, if we have one open. */
+	if (reln->slru_file_segment != -1)
+	{
+		Assert(reln->slru_file != -1);
+		FileClose(reln->slru_file);
+		reln->slru_file = -1;
+		reln->slru_file_segment = -1;
+	}
+
+	/* Open the file we want. */
+	SlruFileName(reln->smgr_rlocator.locator.relNumber, path, segment);
+	reln->slru_file = PathNameOpenFile(path, mode);
+	if (reln->slru_file >= 0)
+		reln->slru_file_segment = segment;
+	else if (!(missing_ok && errno == ENOENT))
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not open file \"%s\": %m",
+						path)));
+
+	/*
+	 * XXX That error message doesn't contain the xid; then again, the
+	 * previous SLRU error codes were all weird about xids anyway; maybe
+	 * clog.c, notify.c et al should set a context that has SLRU-specific
+	 * context in a more natural format
+	 */
+
+	return reln->slru_file;
+}
+
+void
+slruopen(SMgrRelation reln)
+{
+	reln->slru_file = -1;
+	reln->slru_file_segment = -1;
+
+	/*
+	 * We don't want this to be closed at end of transaction, which would
+	 * otherwise happen, because it isn't owned by a Relation.
+	 */
+	dlist_delete(&reln->node);
+}
+
+void
+slruclose(SMgrRelation reln, ForkNumber forknum)
+{
+	if (reln->slru_file != -1)
+		FileClose(reln->slru_file);
+	reln->slru_file = -1;
+	reln->slru_file_segment = -1;
+}
+
+void
+slruwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
+		  char *buffer, bool skipFsync)
+{
+	File		file;
+	RelFileNumber rel_number;
+	off_t		offset;
+	int			rc;
+
+	file = slrufile(reln, blocknum, O_RDWR | O_CREAT, false);
+	offset = (blocknum % SLRU_PAGES_PER_SEGMENT) * BLCKSZ;
+
+	rc = FileWrite(file, buffer, BLCKSZ, offset, WAIT_EVENT_SLRU_WRITE);
+	if (rc < 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not write %d bytes to file \"%s\" at offset %d: %m",
+						BLCKSZ,
+						FilePathName(file),
+						(int) offset)));
+	if (rc < BLCKSZ)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not write %d bytes to file \"%s\" at offset %d, only %d bytes written",
+						BLCKSZ,
+						FilePathName(file),
+						(int) offset,
+						rc)));
+
+	rel_number = reln->smgr_rlocator.locator.relNumber;
+	if (defs[rel_number].synchronize)
+	{
+		FileTag			tag;
+
+		/* Tell checkpointer to synchronize this file. */
+		INIT_SLRUFILETAG(tag, rel_number, blocknum / SLRU_PAGES_PER_SEGMENT);
+		if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false))
+		{
+			/* Queue full.  Do it synchronously. */
+			if (FileSync(file, WAIT_EVENT_SLRU_SYNC) < 0)
+				ereport(data_sync_elevel(ERROR),
+						(errcode_for_file_access(),
+						 errmsg("could not synchronize file \"%s\": %m",
+								FilePathName(file))));
+		}
+	}
+
+	pgstat_count_slru_page_written(reln->smgr_rlocator.locator.relNumber);
+}
+
+void
+slruread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
+		 char *buffer)
+{
+	File file;
+	off_t offset;
+	int rc;
+
+	file = slrufile(reln, blocknum, O_RDWR, false);
+	offset = (blocknum % SLRU_PAGES_PER_SEGMENT) * BLCKSZ;
+
+	rc = FileRead(file, buffer, BLCKSZ, offset, WAIT_EVENT_SLRU_READ);
+	if (rc < 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not read %d bytes from file \"%s\" at offset %d: %m",
+						BLCKSZ,
+						FilePathName(file),
+						(int) offset)));
+	if (rc < BLCKSZ)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not read %d bytes from file \"%s\" at offset %d, only %d bytes read",
+						BLCKSZ,
+						FilePathName(file),
+						(int) offset,
+						rc)));
+
+	pgstat_count_slru_page_read(reln->smgr_rlocator.locator.relNumber);
+}
 
-	result = pg_fsync(fd);
-	save_errno = errno;
+void
+slruwriteback(SMgrRelation reln, ForkNumber forknum,
+			  BlockNumber blocknum, BlockNumber nblocks)
+{
+	RelFileNumber rel_number;
 
-	CloseTransientFile(fd);
+	/* No point in flushing data we won't be fsyncing. */
+	rel_number = reln->smgr_rlocator.locator.relNumber;
+	if (!defs[rel_number].synchronize)
+		return;
 
-	errno = save_errno;
-	return result;
+	while (nblocks > 0)
+	{
+		File		file;
+		BlockNumber blocknum_in_this_file;
+		BlockNumber nflush;
+
+		/* File range of blocks to flush in one file. */
+		blocknum_in_this_file = blocknum % SLRU_PAGES_PER_SEGMENT;
+		nflush = Min(nblocks, SLRU_PAGES_PER_SEGMENT - blocknum_in_this_file);
+
+		/* The file might have been unlinked already, so tolerate missing. */
+		file = slrufile(reln, blocknum, O_RDWR, true);
+		if (file < 0)
+			return;
+
+		FileWriteback(file,
+					  BLCKSZ * blocknum_in_this_file,
+					  BLCKSZ * nflush,
+					  WAIT_EVENT_SLRU_FLUSH);
+
+		nblocks -= nflush;
+		blocknum += nflush;
+	}
 }
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
index 66d35481552..98c4caea8d8 100644
--- a/src/backend/access/transam/subtrans.c
+++ b/src/backend/access/transam/subtrans.c
@@ -32,6 +32,7 @@
 #include "access/subtrans.h"
 #include "access/transam.h"
 #include "pg_trace.h"
+#include "storage/bufmgr.h"
 #include "utils/snapmgr.h"
 
 
@@ -55,15 +56,7 @@
 #define TransactionIdToEntry(xid) ((xid) % (TransactionId) SUBTRANS_XACTS_PER_PAGE)
 
 
-/*
- * Link to shared-memory data structures for SUBTRANS control
- */
-static SlruCtlData SubTransCtlData;
-
-#define SubTransCtl  (&SubTransCtlData)
-
-
-static int	ZeroSUBTRANSPage(int pageno);
+static Buffer ZeroSUBTRANSPage(int pageno);
 static bool SubTransPagePrecedes(int page1, int page2);
 
 
@@ -75,16 +68,15 @@ SubTransSetParent(TransactionId xid, TransactionId parent)
 {
 	int			pageno = TransactionIdToPage(xid);
 	int			entryno = TransactionIdToEntry(xid);
-	int			slotno;
 	TransactionId *ptr;
+	Buffer		buffer;
 
 	Assert(TransactionIdIsValid(parent));
 	Assert(TransactionIdFollows(xid, parent));
 
-	LWLockAcquire(SubtransSLRULock, LW_EXCLUSIVE);
-
-	slotno = SimpleLruReadPage(SubTransCtl, pageno, true, xid);
-	ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
+	buffer = ReadSlruBuffer(SLRU_SUBTRANS_REL_ID, pageno);
+	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+	ptr = (TransactionId *) BufferGetPage(buffer);
 	ptr += entryno;
 
 	/*
@@ -96,10 +88,10 @@ SubTransSetParent(TransactionId xid, TransactionId parent)
 	{
 		Assert(*ptr == InvalidTransactionId);
 		*ptr = parent;
-		SubTransCtl->shared->page_dirty[slotno] = true;
+		MarkBufferDirty(buffer);
 	}
 
-	LWLockRelease(SubtransSLRULock);
+	UnlockReleaseBuffer(buffer);
 }
 
 /*
@@ -110,9 +102,9 @@ SubTransGetParent(TransactionId xid)
 {
 	int			pageno = TransactionIdToPage(xid);
 	int			entryno = TransactionIdToEntry(xid);
-	int			slotno;
 	TransactionId *ptr;
 	TransactionId parent;
+	Buffer		buffer;
 
 	/* Can't ask about stuff that might not be around anymore */
 	Assert(TransactionIdFollowsOrEquals(xid, TransactionXmin));
@@ -121,15 +113,14 @@ SubTransGetParent(TransactionId xid)
 	if (!TransactionIdIsNormal(xid))
 		return InvalidTransactionId;
 
-	/* lock is acquired by SimpleLruReadPage_ReadOnly */
+	buffer = ReadSlruBuffer(SLRU_SUBTRANS_REL_ID, pageno);
 
-	slotno = SimpleLruReadPage_ReadOnly(SubTransCtl, pageno, xid);
-	ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
+	ptr = (TransactionId *) BufferGetPage(buffer);
 	ptr += entryno;
 
 	parent = *ptr;
 
-	LWLockRelease(SubtransSLRULock);
+	ReleaseBuffer(buffer);
 
 	return parent;
 }
@@ -177,26 +168,6 @@ SubTransGetTopmostTransaction(TransactionId xid)
 	return previousXid;
 }
 
-
-/*
- * Initialization of shared memory for SUBTRANS
- */
-Size
-SUBTRANSShmemSize(void)
-{
-	return SimpleLruShmemSize(NUM_SUBTRANS_BUFFERS, 0);
-}
-
-void
-SUBTRANSShmemInit(void)
-{
-	SubTransCtl->PagePrecedes = SubTransPagePrecedes;
-	SimpleLruInit(SubTransCtl, "Subtrans", NUM_SUBTRANS_BUFFERS, 0,
-				  SubtransSLRULock, "pg_subtrans",
-				  LWTRANCHE_SUBTRANS_BUFFER, SYNC_HANDLER_NONE);
-	SlruPagePrecedesUnitTests(SubTransCtl, SUBTRANS_XACTS_PER_PAGE);
-}
-
 /*
  * This func must be called ONCE on system install.  It creates
  * the initial SUBTRANS segment.  (The SUBTRANS directory is assumed to
@@ -210,18 +181,16 @@ SUBTRANSShmemInit(void)
 void
 BootStrapSUBTRANS(void)
 {
-	int			slotno;
+	Buffer		buffer;
 
-	LWLockAcquire(SubtransSLRULock, LW_EXCLUSIVE);
+	SlruPagePrecedesUnitTests(SubTransPagePrecedes, SUBTRANS_XACTS_PER_PAGE);
 
 	/* Create and zero the first page of the subtrans log */
-	slotno = ZeroSUBTRANSPage(0);
+	buffer = ZeroSUBTRANSPage(0);
 
 	/* Make sure it's written out */
-	SimpleLruWritePage(SubTransCtl, slotno);
-	Assert(!SubTransCtl->shared->page_dirty[slotno]);
-
-	LWLockRelease(SubtransSLRULock);
+	FlushOneBuffer(buffer);
+	UnlockReleaseBuffer(buffer);
 }
 
 /*
@@ -232,10 +201,15 @@ BootStrapSUBTRANS(void)
  *
  * Control lock must be held at entry, and will be held at exit.
  */
-static int
+static Buffer
 ZeroSUBTRANSPage(int pageno)
 {
-	return SimpleLruZeroPage(SubTransCtl, pageno);
+	Buffer		buffer;
+
+	buffer = ZeroSlruBuffer(SLRU_SUBTRANS_REL_ID, pageno);
+	MarkBufferDirty(buffer);
+
+	return buffer;
 }
 
 /*
@@ -258,7 +232,6 @@ StartupSUBTRANS(TransactionId oldestActiveXID)
 	 * Whenever we advance into a new page, ExtendSUBTRANS will likewise zero
 	 * the new page without regard to whatever was previously on disk.
 	 */
-	LWLockAcquire(SubtransSLRULock, LW_EXCLUSIVE);
 
 	startPage = TransactionIdToPage(oldestActiveXID);
 	nextXid = ShmemVariableCache->nextXid;
@@ -266,36 +239,15 @@ StartupSUBTRANS(TransactionId oldestActiveXID)
 
 	while (startPage != endPage)
 	{
-		(void) ZeroSUBTRANSPage(startPage);
+		UnlockReleaseBuffer(ZeroSUBTRANSPage(startPage));
 		startPage++;
 		/* must account for wraparound */
 		if (startPage > TransactionIdToPage(MaxTransactionId))
 			startPage = 0;
 	}
-	(void) ZeroSUBTRANSPage(startPage);
-
-	LWLockRelease(SubtransSLRULock);
+	UnlockReleaseBuffer(ZeroSUBTRANSPage(startPage));
 }
 
-/*
- * Perform a checkpoint --- either during shutdown, or on-the-fly
- */
-void
-CheckPointSUBTRANS(void)
-{
-	/*
-	 * Write dirty SUBTRANS pages to disk
-	 *
-	 * This is not actually necessary from a correctness point of view. We do
-	 * it merely to improve the odds that writing of dirty pages is done by
-	 * the checkpoint process and not by backends.
-	 */
-	TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_START(true);
-	SimpleLruWriteAll(SubTransCtl, true);
-	TRACE_POSTGRESQL_SUBTRANS_CHECKPOINT_DONE(true);
-}
-
-
 /*
  * Make sure that SUBTRANS has room for a newly-allocated XID.
  *
@@ -319,12 +271,8 @@ ExtendSUBTRANS(TransactionId newestXact)
 
 	pageno = TransactionIdToPage(newestXact);
 
-	LWLockAcquire(SubtransSLRULock, LW_EXCLUSIVE);
-
 	/* Zero the page */
-	ZeroSUBTRANSPage(pageno);
-
-	LWLockRelease(SubtransSLRULock);
+	UnlockReleaseBuffer(ZeroSUBTRANSPage(pageno));
 }
 
 
@@ -350,7 +298,7 @@ TruncateSUBTRANS(TransactionId oldestXact)
 	TransactionIdRetreat(oldestXact);
 	cutoffPage = TransactionIdToPage(oldestXact);
 
-	SimpleLruTruncate(SubTransCtl, cutoffPage);
+	SimpleLruTruncate(SLRU_SUBTRANS_REL_ID, SubTransPagePrecedes, cutoffPage);
 }
 
 
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 15ab8d90d4b..9b484382f80 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -51,6 +51,7 @@
 #include "access/heaptoast.h"
 #include "access/multixact.h"
 #include "access/rewriteheap.h"
+#include "access/slru.h"
 #include "access/subtrans.h"
 #include "access/timeline.h"
 #include "access/transam.h"
@@ -1459,7 +1460,7 @@ WaitXLogInsertionsToFinish(XLogRecPtr upto)
 	 */
 	if (upto > reservedUpto)
 	{
-		ereport(LOG,
+		ereport(PANIC,
 				(errmsg("request to flush past end of generated WAL; request %X/%X, current position %X/%X",
 						LSN_FORMAT_ARGS(upto), LSN_FORMAT_ARGS(reservedUpto))));
 		upto = reservedUpto;
@@ -4501,6 +4502,7 @@ BootStrapXLOG(void)
 	uint64		sysidentifier;
 	struct timeval tv;
 	pg_crc32c	crc;
+	ResourceOwner resowner;
 
 	/* allow ordinary WAL segment creation, like StartupXLOG() would */
 	LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
@@ -4641,11 +4643,15 @@ BootStrapXLOG(void)
 	/* some additional ControlFile fields are set in WriteControlFile() */
 	WriteControlFile();
 
-	/* Bootstrap the commit log, too */
+	/* Bootstrap the commit log too */
+	resowner = ResourceOwnerCreate(NULL, "bootstrap resowner");
+	CurrentResourceOwner = resowner;
 	BootStrapCLOG();
 	BootStrapCommitTs();
 	BootStrapSUBTRANS();
 	BootStrapMultiXact();
+	CurrentResourceOwner = NULL;
+	ResourceOwnerDelete(resowner);
 
 	pfree(buffer);
 
@@ -6851,15 +6857,12 @@ CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
 	CheckPointSnapBuild();
 	CheckPointLogicalRewriteHeap();
 	CheckPointReplicationOrigin();
+	CheckPointPredicate();
+	CheckPointSLRU();
 
-	/* Write out all dirty data in SLRUs and the main buffer pool */
+	/* Write out all dirty data in the buffer pool */
 	TRACE_POSTGRESQL_BUFFER_CHECKPOINT_START(flags);
 	CheckpointStats.ckpt_write_t = GetCurrentTimestamp();
-	CheckPointCLOG();
-	CheckPointCommitTs();
-	CheckPointSUBTRANS();
-	CheckPointMultiXact();
-	CheckPointPredicate();
 	CheckPointBuffers(flags);
 
 	/* Perform all queued up fsyncs */
diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c
index 3e1b92df030..c4249bfa861 100644
--- a/src/backend/commands/async.c
+++ b/src/backend/commands/async.c
@@ -141,6 +141,7 @@
 #include "libpq/libpq.h"
 #include "libpq/pqformat.h"
 #include "miscadmin.h"
+#include "storage/bufmgr.h"
 #include "storage/ipc.h"
 #include "storage/lmgr.h"
 #include "storage/proc.h"
@@ -305,12 +306,6 @@ static AsyncQueueControl *asyncQueueControl;
 #define QUEUE_NEXT_LISTENER(i)		(asyncQueueControl->backend[i].nextListener)
 #define QUEUE_BACKEND_POS(i)		(asyncQueueControl->backend[i].pos)
 
-/*
- * The SLRU buffer area through which we access the notification queue
- */
-static SlruCtlData NotifyCtlData;
-
-#define NotifyCtl					(&NotifyCtlData)
 #define QUEUE_PAGESIZE				BLCKSZ
 #define QUEUE_FULL_WARN_INTERVAL	5000	/* warn at most once every 5s */
 
@@ -521,8 +516,6 @@ AsyncShmemSize(void)
 	size = mul_size(MaxBackends + 1, sizeof(QueueBackendStatus));
 	size = add_size(size, offsetof(AsyncQueueControl, backend));
 
-	size = add_size(size, SimpleLruShmemSize(NUM_NOTIFY_BUFFERS, 0));
-
 	return size;
 }
 
@@ -565,20 +558,13 @@ AsyncShmemInit(void)
 		}
 	}
 
-	/*
-	 * Set up SLRU management of the pg_notify data.
-	 */
-	NotifyCtl->PagePrecedes = asyncQueuePagePrecedes;
-	SimpleLruInit(NotifyCtl, "Notify", NUM_NOTIFY_BUFFERS, 0,
-				  NotifySLRULock, "pg_notify", LWTRANCHE_NOTIFY_BUFFER,
-				  SYNC_HANDLER_NONE);
-
 	if (!found)
 	{
 		/*
 		 * During start or reboot, clean out the pg_notify directory.
 		 */
-		(void) SlruScanDirectory(NotifyCtl, SlruScanDirCbDeleteAll, NULL);
+		(void) SlruScanDirectory(SLRU_NOTIFY_REL_ID, asyncQueuePagePrecedes,
+								 SlruScanDirCbDeleteAll, NULL);
 	}
 }
 
@@ -1411,10 +1397,7 @@ asyncQueueAddEntries(ListCell *nextNotify)
 	QueuePosition queue_head;
 	int			pageno;
 	int			offset;
-	int			slotno;
-
-	/* We hold both NotifyQueueLock and NotifySLRULock during this operation */
-	LWLockAcquire(NotifySLRULock, LW_EXCLUSIVE);
+	Buffer		buffer;
 
 	/*
 	 * We work with a local copy of QUEUE_HEAD, which we write back to shared
@@ -1439,13 +1422,17 @@ asyncQueueAddEntries(ListCell *nextNotify)
 	 */
 	pageno = QUEUE_POS_PAGE(queue_head);
 	if (QUEUE_POS_IS_ZERO(queue_head))
-		slotno = SimpleLruZeroPage(NotifyCtl, pageno);
+	{
+		buffer = ZeroSlruBuffer(SLRU_NOTIFY_REL_ID, pageno);
+	}
 	else
-		slotno = SimpleLruReadPage(NotifyCtl, pageno, true,
-								   InvalidTransactionId);
+	{
+		buffer = ReadSlruBuffer(SLRU_NOTIFY_REL_ID, pageno);
+		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+	}
 
 	/* Note we mark the page dirty before writing in it */
-	NotifyCtl->shared->page_dirty[slotno] = true;
+	MarkBufferDirty(buffer);
 
 	while (nextNotify != NULL)
 	{
@@ -1476,7 +1463,7 @@ asyncQueueAddEntries(ListCell *nextNotify)
 		}
 
 		/* Now copy qe into the shared buffer page */
-		memcpy(NotifyCtl->shared->page_buffer[slotno] + offset,
+		memcpy(BufferGetPage(buffer) + offset,
 			   &qe,
 			   qe.length);
 
@@ -1491,7 +1478,10 @@ asyncQueueAddEntries(ListCell *nextNotify)
 			 * asyncQueueIsFull() ensured that there is room to create this
 			 * page without overrunning the queue.
 			 */
-			slotno = SimpleLruZeroPage(NotifyCtl, QUEUE_POS_PAGE(queue_head));
+			UnlockReleaseBuffer(buffer);
+			buffer = ZeroSlruBuffer(SLRU_NOTIFY_REL_ID,
+									QUEUE_POS_PAGE(queue_head));
+			MarkBufferDirty(buffer);
 
 			/*
 			 * If the new page address is a multiple of QUEUE_CLEANUP_DELAY,
@@ -1505,12 +1495,11 @@ asyncQueueAddEntries(ListCell *nextNotify)
 			break;
 		}
 	}
+	UnlockReleaseBuffer(buffer);
 
 	/* Success, so update the global QUEUE_HEAD */
 	QUEUE_HEAD = queue_head;
 
-	LWLockRelease(NotifySLRULock);
-
 	return nextNotify;
 }
 
@@ -1983,17 +1972,16 @@ asyncQueueReadAllNotifications(void)
 		{
 			int			curpage = QUEUE_POS_PAGE(pos);
 			int			curoffset = QUEUE_POS_OFFSET(pos);
-			int			slotno;
 			int			copysize;
+			Buffer		buffer;
 
 			/*
-			 * We copy the data from SLRU into a local buffer, so as to avoid
-			 * holding the NotifySLRULock while we are examining the entries
-			 * and possibly transmitting them to our frontend.  Copy only the
-			 * part of the page we will actually inspect.
+			 * We copy the data into a local buffer, so as to avoid holding a
+			 * buffer pin while we are examining the entries and possibly
+			 * transmitting them to our frontend.  Copy only the part of the
+			 * page we will actually inspect.
 			 */
-			slotno = SimpleLruReadPage_ReadOnly(NotifyCtl, curpage,
-												InvalidTransactionId);
+			buffer = ReadSlruBuffer(SLRU_NOTIFY_REL_ID, curpage);
 			if (curpage == QUEUE_POS_PAGE(head))
 			{
 				/* we only want to read as far as head */
@@ -2007,10 +1995,9 @@ asyncQueueReadAllNotifications(void)
 				copysize = QUEUE_PAGESIZE - curoffset;
 			}
 			memcpy(page_buffer.buf + curoffset,
-				   NotifyCtl->shared->page_buffer[slotno] + curoffset,
+				   BufferGetPage(buffer) + curoffset,
 				   copysize);
-			/* Release lock that we got from SimpleLruReadPage_ReadOnly() */
-			LWLockRelease(NotifySLRULock);
+			ReleaseBuffer(buffer);
 
 			/*
 			 * Process messages up to the stop position, end of page, or an
@@ -2207,7 +2194,7 @@ asyncQueueAdvanceTail(void)
 		 * SimpleLruTruncate() will ask for NotifySLRULock but will also
 		 * release the lock again.
 		 */
-		SimpleLruTruncate(NotifyCtl, newtailpage);
+		SimpleLruTruncate(SLRU_NOTIFY_REL_ID, asyncQueuePagePrecedes, newtailpage);
 
 		/*
 		 * Update QUEUE_STOP_PAGE.  This changes asyncQueueIsFull()'s verdict
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index 5fc076fc149..f9c31143ae0 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -1172,6 +1172,24 @@ CompactCheckpointerRequestQueue(void)
 	/* must hold CheckpointerCommLock in exclusive mode */
 	Assert(LWLockHeldByMe(CheckpointerCommLock));
 
+	/*
+	 * XXX FIXME
+	 * If we were unlucky enough to be reached from
+	 * TransactionIdSetTreeStatus() while trying to read in a CLOG page and
+	 * having to write out some other dirty page and then finding the sync
+	 * request queue full, we're now running in a critical section and we're
+	 * not allowed to allocate any memory below.
+	 *
+	 * XXX Without this, 027_stream_regress.pl aborts below occasionally, due
+	 * to pressure on its very small shared_buffers and thus also sync request
+	 * queue.
+	 */
+	if (CritSectionCount > 0)
+	{
+		elog(LOG, "CompactCheckpointerRequestQueue() critical section, returning");
+		return false;
+	}
+
 	/* Initialize skip_slot array */
 	skip_slot = palloc0(sizeof(bool) * CheckpointerShmem->num_requests);
 
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
index 2862e9e412c..719f27a86a4 100644
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -20,6 +20,7 @@
 
 BufferDescPadded *BufferDescriptors;
 char	   *BufferBlocks;
+XLogRecPtr *BufferExternalLSNs;
 ConditionVariableMinimallyPadded *BufferIOCVArray;
 WritebackContext BackendWritebackContext;
 CkptSortItem *CkptBufferIds;
@@ -69,9 +70,11 @@ InitBufferPool(void)
 {
 	bool		foundBufs,
 				foundDescs,
+				foundLSNs,
 				foundIOCV,
 				foundBufCkpt;
 
+
 	/* Align descriptors to a cacheline boundary. */
 	BufferDescriptors = (BufferDescPadded *)
 		ShmemInitStruct("Buffer Descriptors",
@@ -88,6 +91,11 @@ InitBufferPool(void)
 						NBuffers * sizeof(ConditionVariableMinimallyPadded),
 						&foundIOCV);
 
+	BufferExternalLSNs = (XLogRecPtr *)
+		ShmemInitStruct("Buffer External LSNs",
+						NBuffers * sizeof(XLogRecPtr),
+						&foundLSNs);
+
 	/*
 	 * The array used to sort to-be-checkpointed buffer ids is located in
 	 * shared memory, to avoid having to allocate significant amounts of
@@ -99,10 +107,10 @@ InitBufferPool(void)
 		ShmemInitStruct("Checkpoint BufferIds",
 						NBuffers * sizeof(CkptSortItem), &foundBufCkpt);
 
-	if (foundDescs || foundBufs || foundIOCV || foundBufCkpt)
+	if (foundDescs || foundBufs || foundIOCV || foundBufCkpt || foundLSNs)
 	{
 		/* should find all of these, or none of them */
-		Assert(foundDescs && foundBufs && foundIOCV && foundBufCkpt);
+		Assert(foundDescs && foundBufs && foundIOCV && foundBufCkpt && foundLSNs);
 		/* note: this path is only taken in EXEC_BACKEND case */
 	}
 	else
@@ -133,6 +141,8 @@ InitBufferPool(void)
 							 LWTRANCHE_BUFFER_CONTENT);
 
 			ConditionVariableInit(BufferDescriptorGetIOCV(buf));
+
+			BufferExternalLSNs[i] = InvalidXLogRecPtr;
 		}
 
 		/* Correct last entry of linked list */
@@ -166,6 +176,9 @@ BufferShmemSize(void)
 	/* size of data pages */
 	size = add_size(size, mul_size(NBuffers, BLCKSZ));
 
+	/* size of external LSNs */
+	size = add_size(size, mul_size(NBuffers, sizeof(XLogRecPtr)));
+
 	/* size of stuff controlled by freelist.c */
 	size = add_size(size, StrategyShmemSize());
 
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index b7488b5d89e..709949fdb87 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -57,10 +57,21 @@
 #include "utils/resowner_private.h"
 #include "utils/timestamp.h"
 
+/*
+ * XXX Ideally we'd switch to standard pages for SLRU data, but in the
+ * meantime we need some way to identify buffers that hold raw data (no
+ * invasive LSN, no checksums).
+ */
+#define BufferHasStandardPage(bufHdr)			\
+	((bufHdr)->tag.rlocator.dbOid != 9)
+
+#define BufferHasExternalLSN(bufHdr)			\
+	!BufferHasStandardPage(bufHdr)
 
 /* Note: these two macros only work on shared buffers, not local ones! */
 #define BufHdrGetBlock(bufHdr)	((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
-#define BufferGetLSN(bufHdr)	(PageGetLSN(BufHdrGetBlock(bufHdr)))
+#define BufferGetLSN(bufHdr) \
+	(BufferHasExternalLSN(bufHdr) ? BufferGetExternalLSN(bufHdr) : PageGetLSN(BufHdrGetBlock(bufHdr)))
 
 /* Note: this macro only works on local buffers, not shared ones! */
 #define LocalBufHdrGetBlock(bufHdr) \
@@ -809,6 +820,21 @@ ReadBufferWithoutRelcache(RelFileLocator rlocator, ForkNumber forkNum,
 							 mode, strategy, &hit);
 }
 
+/*
+ * Like ReadBufferWithoutRelcache, but returns the hit flag.
+ * XXX Merge
+ */
+Buffer
+ReadBufferWithoutRelcacheWithHit(RelFileLocator rlocator, ForkNumber forkNum,
+								 BlockNumber blockNum, ReadBufferMode mode,
+								 bool *hit)
+{
+	SMgrRelation smgr = smgropen(rlocator, InvalidBackendId);
+
+	return ReadBuffer_common(smgr, RELPERSISTENCE_PERMANENT, forkNum, blockNum,
+							 mode, NULL, hit);
+}
+
 
 /*
  * ReadBuffer_common -- common logic for all ReadBuffer variants
@@ -1029,7 +1055,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			}
 
 			/* check for garbage data */
-			if (!PageIsVerifiedExtended((Page) bufBlock, blockNum,
+			if (BufferHasStandardPage(bufHdr) &&
+				!PageIsVerifiedExtended((Page) bufBlock, blockNum,
 										PIV_LOG_WARNING | PIV_REPORT_STAT))
 			{
 				if (mode == RBM_ZERO_ON_ERROR || zero_damaged_pages)
@@ -1450,6 +1477,9 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 
 	LWLockRelease(newPartitionLock);
 
+	if (BufferHasExternalLSN(buf))
+		BufferSetExternalLSN(buf, InvalidXLogRecPtr);
+
 	/*
 	 * Buffer contents are currently invalid.  Try to obtain the right to
 	 * start I/O.  If StartBufferIO returns false, then someone else managed
@@ -1570,6 +1600,84 @@ retry:
 	StrategyFreeBuffer(buf);
 }
 
+/*
+ * DiscardBuffer -- drop a buffer from pool.
+ *
+ * If the buffer isn't present in shared buffers, nothing happens.  If it is
+ * present and not pinned, it is discarded without making any attempt to write
+ * it back out to the operating system.  If I/O is in progress, we wait for it
+ * to to complete.  If it is pinned, an error is raised (some other backend
+ * must still be interested in it, so it's an error to discard it).
+ */
+void
+DiscardBuffer(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum)
+{
+	SMgrRelation smgr = smgropen(rlocator, InvalidBackendId);
+	BufferTag	tag;			/* identity of target block */
+	uint32		hash;			/* hash value for tag */
+	LWLock	   *partitionLock;	/* buffer partition lock for it */
+	int			buf_id;
+	BufferDesc *bufHdr;
+	uint32		buf_state;
+
+	/* create a tag so we can lookup the buffer */
+	INIT_BUFFERTAG(tag, smgr->smgr_rlocator.locator, forkNum, blockNum);
+
+	/* determine its hash code and partition lock ID */
+	hash = BufTableHashCode(&tag);
+	partitionLock = BufMappingPartitionLock(hash);
+
+ retry:
+	/* see if the block is in the buffer pool */
+	LWLockAcquire(partitionLock, LW_SHARED);
+	buf_id = BufTableLookup(&tag, hash);
+	LWLockRelease(partitionLock);
+
+	/* didn't find it, so nothing to do */
+	if (buf_id < 0)
+		return;
+
+	/* take the buffer header lock */
+	bufHdr = GetBufferDescriptor(buf_id);
+	buf_state = LockBufHdr(bufHdr);
+
+	/*
+	 * The buffer might been evicted after we released the partition lock and
+	 * before we acquired the buffer header lock.  If so, the buffer we've
+	 * locked might contain some other data which we shouldn't touch. If the
+	 * buffer hasn't been recycled, we proceed to invalidate it.
+	 */
+	if (RelFileLocatorEquals(bufHdr->tag.rlocator, rlocator) &&
+		bufHdr->tag.blockNum == blockNum &&
+		bufHdr->tag.forkNum == forkNum)
+	{
+		if (buf_state & BM_IO_IN_PROGRESS)
+		{
+			UnlockBufHdr(bufHdr, buf_state);
+			WaitIO(bufHdr);
+			goto retry;
+		}
+		else if (BUF_STATE_GET_REFCOUNT(buf_state) == 0)
+		{
+			/* Nobody has it pinned, so we can immediately invalidate it. */
+			InvalidateBuffer(bufHdr);	/* releases spinlock */
+		}
+		else
+		{
+			/*
+			 * XXX: Is it OK to say that the contract for DiscardBuffer() is
+			 * that the caller is asserting that no one else could be
+			 * interested in this buffer, and therefore it's a programming
+			 * error or corruption if you reach this case?
+			 */
+			UnlockBufHdr(bufHdr, buf_state);
+			elog(ERROR, "cannot discard buffer that is pinned");
+		}
+	}
+	else
+		UnlockBufHdr(bufHdr, buf_state);
+}
+
 /*
  * MarkBufferDirty
  *
@@ -2900,7 +3008,10 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln)
 	 * buffer, other processes might be updating hint bits in it, so we must
 	 * copy the page to private storage if we do checksumming.
 	 */
-	bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
+	if (BufferHasStandardPage(buf))
+		bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
+	else
+		bufToWrite = bufBlock;
 
 	if (track_io_timing)
 		INSTR_TIME_SET_CURRENT(io_start);
@@ -3029,7 +3140,10 @@ BufferGetLSNAtomic(Buffer buffer)
 	Assert(BufferIsPinned(buffer));
 
 	buf_state = LockBufHdr(bufHdr);
-	lsn = PageGetLSN(page);
+	if (BufferHasStandardPage(bufHdr))
+		lsn = PageGetLSN(page);
+	else
+		lsn = BufferGetExternalLSN(bufHdr);
 	UnlockBufHdr(bufHdr, buf_state);
 
 	return lsn;
@@ -3541,7 +3655,8 @@ FlushRelationBuffers(Relation rel)
 				errcallback.previous = error_context_stack;
 				error_context_stack = &errcallback;
 
-				PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
+				if (BufferHasStandardPage(bufHdr))
+					PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
 
 				smgrwrite(RelationGetSmgr(rel),
 						  bufHdr->tag.forkNum,
@@ -5011,3 +5126,29 @@ TestForOldSnapshot_impl(Snapshot snapshot, Relation relation)
 				(errcode(ERRCODE_SNAPSHOT_TOO_OLD),
 				 errmsg("snapshot too old")));
 }
+
+/*
+ * Check if a buffer tag is currently mapped.
+ *
+ * XXX Dubious semantics; needed only for multixact's handling for
+ * inconsistent states.
+ */
+bool
+BufferProbe(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockNum)
+{
+	BufferTag	tag;
+	uint32		hash;
+	LWLock	   *partitionLock;
+	int			buf_id;
+
+	INIT_BUFFERTAG(tag, rlocator, forkNum, blockNum);
+
+	hash = BufTableHashCode(&tag);
+	partitionLock = BufMappingPartitionLock(hash);
+
+	LWLockAcquire(partitionLock, LW_SHARED);
+	buf_id = BufTableLookup(&tag, hash);
+	LWLockRelease(partitionLock);
+
+	return buf_id >= 0;
+}
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 1a6f5270518..c063a201a4f 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -116,9 +116,7 @@ CalculateShmemSize(int *num_semaphores)
 	size = add_size(size, XLogPrefetchShmemSize());
 	size = add_size(size, XLOGShmemSize());
 	size = add_size(size, XLogRecoveryShmemSize());
-	size = add_size(size, CLOGShmemSize());
 	size = add_size(size, CommitTsShmemSize());
-	size = add_size(size, SUBTRANSShmemSize());
 	size = add_size(size, TwoPhaseShmemSize());
 	size = add_size(size, BackgroundWorkerShmemSize());
 	size = add_size(size, MultiXactShmemSize());
@@ -240,9 +238,7 @@ CreateSharedMemoryAndSemaphores(void)
 	XLOGShmemInit();
 	XLogPrefetchShmemInit();
 	XLogRecoveryShmemInit();
-	CLOGShmemInit();
 	CommitTsShmemInit();
-	SUBTRANSShmemInit();
 	MultiXactShmemInit();
 	InitBufferPool();
 
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 38317edaf96..d1e1add7484 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -129,20 +129,6 @@ extern slock_t *ShmemLock;
 extern const char *const IndividualLWLockNames[];	/* in lwlocknames.c */
 
 static const char *const BuiltinTrancheNames[] = {
-	/* LWTRANCHE_XACT_BUFFER: */
-	"XactBuffer",
-	/* LWTRANCHE_COMMITTS_BUFFER: */
-	"CommitTSBuffer",
-	/* LWTRANCHE_SUBTRANS_BUFFER: */
-	"SubtransBuffer",
-	/* LWTRANCHE_MULTIXACTOFFSET_BUFFER: */
-	"MultiXactOffsetBuffer",
-	/* LWTRANCHE_MULTIXACTMEMBER_BUFFER: */
-	"MultiXactMemberBuffer",
-	/* LWTRANCHE_NOTIFY_BUFFER: */
-	"NotifyBuffer",
-	/* LWTRANCHE_SERIAL_BUFFER: */
-	"SerialBuffer",
 	/* LWTRANCHE_WAL_INSERT: */
 	"WALInsert",
 	/* LWTRANCHE_BUFFER_CONTENT: */
diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt
index 6c7cf6c2956..fbd2976a600 100644
--- a/src/backend/storage/lmgr/lwlocknames.txt
+++ b/src/backend/storage/lmgr/lwlocknames.txt
@@ -17,10 +17,10 @@ WALWriteLock						8
 ControlFileLock						9
 # 10 was CheckpointLock
 XactSLRULock						11
-SubtransSLRULock					12
+# 12 was SubtransSLRULock
 MultiXactGenLock					13
-MultiXactOffsetSLRULock				14
-MultiXactMemberSLRULock				15
+# 14 was MultiXactOffsetSLRULock
+# 15 was MultiXactMemberSLRULock
 RelCacheInitLock					16
 CheckpointerCommLock				17
 TwoPhaseStateLock					18
@@ -31,7 +31,7 @@ AutovacuumLock						22
 AutovacuumScheduleLock				23
 SyncScanLock						24
 RelationMappingLock					25
-NotifySLRULock						26
+# 26 was NotifySLRULock
 NotifyQueueLock						27
 SerializableXactHashLock			28
 SerializableFinishedListLock		29
@@ -43,7 +43,7 @@ DynamicSharedMemoryControlLock		34
 AutoFileLock						35
 ReplicationSlotAllocationLock		36
 ReplicationSlotControlLock			37
-CommitTsSLRULock					38
+# 38 was CommitTsSLRULock
 CommitTsLock						39
 ReplicationOriginLock				40
 MultiXactTruncationLock				41
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index 5136da6ea36..7c200bc8745 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -312,14 +312,6 @@
 	((targethash) ^ ((uint32) PointerGetDatum((predicatelocktag)->myXact)) \
 	 << LOG2_NUM_PREDICATELOCK_PARTITIONS)
 
-
-/*
- * The SLRU buffer area through which we access the old xids.
- */
-static SlruCtlData SerialSlruCtlData;
-
-#define SerialSlruCtl			(&SerialSlruCtlData)
-
 #define SERIAL_PAGESIZE			BLCKSZ
 #define SERIAL_ENTRYSIZE			sizeof(SerCommitSeqNo)
 #define SERIAL_ENTRIESPERPAGE	(SERIAL_PAGESIZE / SERIAL_ENTRYSIZE)
@@ -331,8 +323,8 @@ static SlruCtlData SerialSlruCtlData;
 
 #define SerialNextPage(page) (((page) >= SERIAL_MAX_PAGE) ? 0 : (page) + 1)
 
-#define SerialValue(slotno, xid) (*((SerCommitSeqNo *) \
-	(SerialSlruCtl->shared->page_buffer[slotno] + \
+#define SerialValue(buffer, xid) (*((SerCommitSeqNo *) \
+	(BufferGetPage(buffer) + \
 	((((uint32) (xid)) % SERIAL_ENTRIESPERPAGE) * SERIAL_ENTRYSIZE))))
 
 #define SerialPage(xid)	(((uint32) (xid)) / SERIAL_ENTRIESPERPAGE)
@@ -867,17 +859,10 @@ SerialInit(void)
 {
 	bool		found;
 
-	/*
-	 * Set up SLRU management of the pg_serial data.
-	 */
-	SerialSlruCtl->PagePrecedes = SerialPagePrecedesLogically;
-	SimpleLruInit(SerialSlruCtl, "Serial",
-				  NUM_SERIAL_BUFFERS, 0, SerialSLRULock, "pg_serial",
-				  LWTRANCHE_SERIAL_BUFFER, SYNC_HANDLER_NONE);
 #ifdef USE_ASSERT_CHECKING
 	SerialPagePrecedesLogicallyUnitTests();
 #endif
-	SlruPagePrecedesUnitTests(SerialSlruCtl, SERIAL_ENTRIESPERPAGE);
+	SlruPagePrecedesUnitTests(SerialPagePrecedesLogically, SERIAL_ENTRIESPERPAGE);
 
 	/*
 	 * Create or attach to the SerialControl structure.
@@ -907,9 +892,9 @@ SerialAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo)
 {
 	TransactionId tailXid;
 	int			targetPage;
-	int			slotno;
 	int			firstZeroPage;
 	bool		isNewPage;
+	Buffer		buffer;
 
 	Assert(TransactionIdIsValid(xid));
 
@@ -954,16 +939,22 @@ SerialAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo)
 		/* Initialize intervening pages. */
 		while (firstZeroPage != targetPage)
 		{
-			(void) SimpleLruZeroPage(SerialSlruCtl, firstZeroPage);
+			buffer = ZeroSlruBuffer(SLRU_SERIAL_REL_ID, firstZeroPage);
+			MarkBufferDirty(buffer);
+			UnlockReleaseBuffer(buffer);
 			firstZeroPage = SerialNextPage(firstZeroPage);
 		}
-		slotno = SimpleLruZeroPage(SerialSlruCtl, targetPage);
+		buffer = ZeroSlruBuffer(SLRU_SERIAL_REL_ID, targetPage);
 	}
 	else
-		slotno = SimpleLruReadPage(SerialSlruCtl, targetPage, true, xid);
+	{
+		buffer = ReadSlruBuffer(SLRU_SERIAL_REL_ID, targetPage);
+		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+	}
 
-	SerialValue(slotno, xid) = minConflictCommitSeqNo;
-	SerialSlruCtl->shared->page_dirty[slotno] = true;
+	SerialValue(buffer, xid) = minConflictCommitSeqNo;
+	MarkBufferDirty(buffer);
+	UnlockReleaseBuffer(buffer);
 
 	LWLockRelease(SerialSLRULock);
 }
@@ -979,7 +970,7 @@ SerialGetMinConflictCommitSeqNo(TransactionId xid)
 	TransactionId headXid;
 	TransactionId tailXid;
 	SerCommitSeqNo val;
-	int			slotno;
+	Buffer		buffer;
 
 	Assert(TransactionIdIsValid(xid));
 
@@ -1001,9 +992,9 @@ SerialGetMinConflictCommitSeqNo(TransactionId xid)
 	 * The following function must be called without holding SerialSLRULock,
 	 * but will return with that lock held, which must then be released.
 	 */
-	slotno = SimpleLruReadPage_ReadOnly(SerialSlruCtl,
-										SerialPage(xid), xid);
-	val = SerialValue(slotno, xid);
+	buffer = ReadSlruBuffer(SLRU_SERIAL_REL_ID, SerialPage(xid));
+	val = SerialValue(buffer, xid);
+	ReleaseBuffer(buffer);
 	LWLockRelease(SerialSLRULock);
 	return val;
 }
@@ -1122,19 +1113,7 @@ CheckPointPredicate(void)
 	LWLockRelease(SerialSLRULock);
 
 	/* Truncate away pages that are no longer required */
-	SimpleLruTruncate(SerialSlruCtl, tailPage);
-
-	/*
-	 * Write dirty SLRU pages to disk
-	 *
-	 * This is not actually necessary from a correctness point of view. We do
-	 * it merely as a debugging aid.
-	 *
-	 * We're doing this after the truncation to avoid writing pages right
-	 * before deleting the file in which they sit, which would be completely
-	 * pointless.
-	 */
-	SimpleLruWriteAll(SerialSlruCtl, true);
+	SimpleLruTruncate(SLRU_SERIAL_REL_ID, SerialPagePrecedesLogically, tailPage);
 }
 
 /*------------------------------------------------------------------------*/
@@ -1396,7 +1375,6 @@ PredicateLockShmemSize(void)
 
 	/* Shared memory structures for SLRU tracking of old committed xids. */
 	size = add_size(size, sizeof(SerialControlData));
-	size = add_size(size, SimpleLruShmemSize(NUM_SERIAL_BUFFERS, 0));
 
 	return size;
 }
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index c1a5febcbfd..bda3f02fccd 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -17,6 +17,7 @@
  */
 #include "postgres.h"
 
+#include "access/slru.h"
 #include "access/xlogutils.h"
 #include "lib/ilist.h"
 #include "storage/bufmgr.h"
@@ -82,6 +83,14 @@ static const f_smgr smgrsw[] = {
 		.smgr_nblocks = mdnblocks,
 		.smgr_truncate = mdtruncate,
 		.smgr_immedsync = mdimmedsync,
+	},
+	/* "SLRU" storage */
+	{
+		.smgr_open = slruopen,
+		.smgr_close = slruclose,
+		.smgr_read = slruread,
+		.smgr_write = slruwrite,
+		.smgr_writeback = slruwriteback,
 	}
 };
 
@@ -176,13 +185,18 @@ smgropen(RelFileLocator rlocator, BackendId backend)
 		reln->smgr_targblock = InvalidBlockNumber;
 		for (int i = 0; i <= MAX_FORKNUM; ++i)
 			reln->smgr_cached_nblocks[i] = InvalidBlockNumber;
-		reln->smgr_which = 0;	/* we only have md.c at present */
 
-		/* implementation-specific initialization */
-		smgrsw[reln->smgr_which].smgr_open(reln);
+		/* XXX find some elegant way to do this, or something better */
+		if (rlocator.dbOid == SLRU_DB_ID)
+			reln->smgr_which = 1;	/* slru.c */
+		else
+			reln->smgr_which = 0;	/* md.c */
 
 		/* it has no owner yet */
 		dlist_push_tail(&unowned_relns, &reln->node);
+
+		/* implementation-specific initialization */
+		smgrsw[reln->smgr_which].smgr_open(reln);
 	}
 
 	return reln;
diff --git a/src/backend/storage/sync/sync.c b/src/backend/storage/sync/sync.c
index 9d6a9e91090..6015e8e79b4 100644
--- a/src/backend/storage/sync/sync.c
+++ b/src/backend/storage/sync/sync.c
@@ -18,9 +18,7 @@
 #include <fcntl.h>
 #include <sys/file.h>
 
-#include "access/commit_ts.h"
-#include "access/clog.h"
-#include "access/multixact.h"
+#include "access/slru.h"
 #include "access/xlog.h"
 #include "access/xlogutils.h"
 #include "commands/tablespace.h"
@@ -106,21 +104,9 @@ static const SyncOps syncsw[] = {
 		.sync_unlinkfiletag = mdunlinkfiletag,
 		.sync_filetagmatches = mdfiletagmatches
 	},
-	/* pg_xact */
-	[SYNC_HANDLER_CLOG] = {
-		.sync_syncfiletag = clogsyncfiletag
-	},
-	/* pg_commit_ts */
-	[SYNC_HANDLER_COMMIT_TS] = {
-		.sync_syncfiletag = committssyncfiletag
-	},
-	/* pg_multixact/offsets */
-	[SYNC_HANDLER_MULTIXACT_OFFSET] = {
-		.sync_syncfiletag = multixactoffsetssyncfiletag
-	},
-	/* pg_multixact/members */
-	[SYNC_HANDLER_MULTIXACT_MEMBER] = {
-		.sync_syncfiletag = multixactmemberssyncfiletag
+	/* SLRU */
+	[SYNC_HANDLER_SLRU] = {
+		.sync_syncfiletag = slrusyncfiletag
 	}
 };
 
diff --git a/src/backend/utils/activity/pgstat_slru.c b/src/backend/utils/activity/pgstat_slru.c
index 28ef736735f..d3f5d558e71 100644
--- a/src/backend/utils/activity/pgstat_slru.c
+++ b/src/backend/utils/activity/pgstat_slru.c
@@ -31,7 +31,7 @@ static void pgstat_reset_slru_counter_internal(int index, TimestampTz ts);
  * SLRU counters are reported within critical sections so we use static memory
  * in order to avoid memory allocation.
  */
-static PgStat_SLRUStats pending_SLRUStats[SLRU_NUM_ELEMENTS];
+static PgStat_SLRUStats pending_SLRUStats[SLRU_NUM_RELS];
 bool		have_slrustats = false;
 
 
@@ -110,37 +110,21 @@ pgstat_fetch_slru(void)
 }
 
 /*
- * Returns SLRU name for an index. The index may be above SLRU_NUM_ELEMENTS,
- * in which case this returns NULL. This allows writing code that does not
- * know the number of entries in advance.
+ * Returns SLRU name for an index.
  */
 const char *
 pgstat_get_slru_name(int slru_idx)
 {
-	if (slru_idx < 0 || slru_idx >= SLRU_NUM_ELEMENTS)
-		return NULL;
-
-	return slru_names[slru_idx];
+	return SlruName(slru_idx);
 }
 
 /*
- * Determine index of entry for a SLRU with a given name. If there's no exact
- * match, returns index of the last "other" entry used for SLRUs defined in
- * external projects.
+ * Determine index of entry for a SLRU with a given name.
  */
 int
 pgstat_get_slru_index(const char *name)
 {
-	int			i;
-
-	for (i = 0; i < SLRU_NUM_ELEMENTS; i++)
-	{
-		if (strcmp(slru_names[i], name) == 0)
-			return i;
-	}
-
-	/* return index of the last entry (which is the "other" one) */
-	return (SLRU_NUM_ELEMENTS - 1);
+	return SlruRelIdByName(name);
 }
 
 /*
@@ -166,7 +150,7 @@ pgstat_slru_flush(bool nowait)
 	else if (!LWLockConditionalAcquire(&stats_shmem->lock, LW_EXCLUSIVE))
 		return true;
 
-	for (i = 0; i < SLRU_NUM_ELEMENTS; i++)
+	for (i = 0; i < SLRU_NUM_RELS; i++)
 	{
 		PgStat_SLRUStats *sharedent = &stats_shmem->stats[i];
 		PgStat_SLRUStats *pendingent = &pending_SLRUStats[i];
@@ -195,7 +179,7 @@ pgstat_slru_flush(bool nowait)
 void
 pgstat_slru_reset_all_cb(TimestampTz ts)
 {
-	for (int i = 0; i < SLRU_NUM_ELEMENTS; i++)
+	for (int i = 0; i < SLRU_NUM_RELS; i++)
 		pgstat_reset_slru_counter_internal(i, ts);
 }
 
@@ -213,8 +197,7 @@ pgstat_slru_snapshot_cb(void)
 }
 
 /*
- * Returns pointer to entry with counters for given SLRU (based on the name
- * stored in SlruCtl as lwlock tranche name).
+ * Returns pointer to entry with counters for given SLRU.
  */
 static inline PgStat_SLRUStats *
 get_slru_entry(int slru_idx)
@@ -227,7 +210,7 @@ get_slru_entry(int slru_idx)
 	 */
 	Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
 
-	Assert((slru_idx >= 0) && (slru_idx < SLRU_NUM_ELEMENTS));
+	Assert((slru_idx >= 0) && (slru_idx < SLRU_NUM_RELS));
 
 	have_slrustats = true;
 
diff --git a/src/backend/utils/activity/wait_event.c b/src/backend/utils/activity/wait_event.c
index da57a93034e..836e7a9b482 100644
--- a/src/backend/utils/activity/wait_event.c
+++ b/src/backend/utils/activity/wait_event.c
@@ -660,8 +660,8 @@ pgstat_get_wait_io(WaitEventIO w)
 		case WAIT_EVENT_REPLICATION_SLOT_WRITE:
 			event_name = "ReplicationSlotWrite";
 			break;
-		case WAIT_EVENT_SLRU_FLUSH_SYNC:
-			event_name = "SLRUFlushSync";
+		case WAIT_EVENT_SLRU_FLUSH:
+			event_name = "SLRUFlush";
 			break;
 		case WAIT_EVENT_SLRU_READ:
 			event_name = "SLRURead";
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index d9e2a793829..f376e5b0a68 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -15,6 +15,7 @@
 #include "postgres.h"
 
 #include "access/htup_details.h"
+#include "access/slru.h"
 #include "access/xlog.h"
 #include "access/xlogprefetcher.h"
 #include "catalog/pg_authid.h"
@@ -1813,7 +1814,7 @@ pg_stat_get_slru(PG_FUNCTION_ARGS)
 	/* request SLRU stats from the cumulative stats system */
 	stats = pgstat_fetch_slru();
 
-	for (i = 0;; i++)
+	for (i = 0; i < SLRU_NUM_RELS; i++)
 	{
 		/* for each row */
 		Datum		values[PG_STAT_GET_SLRU_COLS] = {0};
@@ -1821,10 +1822,7 @@ pg_stat_get_slru(PG_FUNCTION_ARGS)
 		PgStat_SLRUStats stat;
 		const char *name;
 
-		name = pgstat_get_slru_name(i);
-
-		if (!name)
-			break;
+		name = SlruName(i);
 
 		stat = stats[i];
 
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index e12be1b9bd8..7373538e2f7 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -865,7 +865,9 @@ MemoryContextAlloc(MemoryContext context, Size size)
 	void	   *ret;
 
 	AssertArg(MemoryContextIsValid(context));
-	AssertNotInCriticalSection(context);
+	// XXX horrible hack: while pinning clog buffers during commit, resowner
+	// stuff allocates!
+	//AssertNotInCriticalSection(context);
 
 	if (!AllocSizeIsValid(size))
 		elog(ERROR, "invalid memory alloc request size %zu", size);
@@ -1072,7 +1074,13 @@ palloc(Size size)
 	MemoryContext context = CurrentMemoryContext;
 
 	AssertArg(MemoryContextIsValid(context));
-	AssertNotInCriticalSection(context);
+	/*
+	 * XXX:TM I commented this out for now, because otherwise it can fail
+	 * while writing back md.c buffers due to cache pressure while reading in
+	 * CLOG buffers during commit, in a critical section (md.c allocates
+	 * memory to build paths...); FIXME!
+	 */
+	//AssertNotInCriticalSection(context);
 
 	if (!AllocSizeIsValid(size))
 		elog(ERROR, "invalid memory alloc request size %zu", size);
diff --git a/src/include/access/clog.h b/src/include/access/clog.h
index 543f2e2643a..af16a2afcb9 100644
--- a/src/include/access/clog.h
+++ b/src/include/access/clog.h
@@ -40,18 +40,12 @@ extern void TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
 									   TransactionId *subxids, XidStatus status, XLogRecPtr lsn);
 extern XidStatus TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn);
 
-extern Size CLOGShmemBuffers(void);
-extern Size CLOGShmemSize(void);
-extern void CLOGShmemInit(void);
 extern void BootStrapCLOG(void);
 extern void StartupCLOG(void);
 extern void TrimCLOG(void);
-extern void CheckPointCLOG(void);
 extern void ExtendCLOG(TransactionId newestXact);
 extern void TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid);
 
-extern int	clogsyncfiletag(const FileTag *ftag, char *path);
-
 /* XLOG stuff */
 #define CLOG_ZEROPAGE		0x00
 #define CLOG_TRUNCATE		0x10
diff --git a/src/include/access/commit_ts.h b/src/include/access/commit_ts.h
index 7662f8e1a9c..4e986fc023d 100644
--- a/src/include/access/commit_ts.h
+++ b/src/include/access/commit_ts.h
@@ -27,7 +27,6 @@ extern bool TransactionIdGetCommitTsData(TransactionId xid,
 extern TransactionId GetLatestCommitTsData(TimestampTz *ts,
 										   RepOriginId *nodeid);
 
-extern Size CommitTsShmemBuffers(void);
 extern Size CommitTsShmemSize(void);
 extern void CommitTsShmemInit(void);
 extern void BootStrapCommitTs(void);
@@ -41,8 +40,6 @@ extern void SetCommitTsLimit(TransactionId oldestXact,
 							 TransactionId newestXact);
 extern void AdvanceOldestCommitTsXid(TransactionId oldestXact);
 
-extern int	committssyncfiletag(const FileTag *ftag, char *path);
-
 /* XLOG stuff */
 #define COMMIT_TS_ZEROPAGE		0x00
 #define COMMIT_TS_TRUNCATE		0x10
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index a5600a320ae..a6bbe53e1bb 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -118,9 +118,6 @@ extern bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2);
 extern bool MultiXactIdPrecedesOrEquals(MultiXactId multi1,
 										MultiXactId multi2);
 
-extern int	multixactoffsetssyncfiletag(const FileTag *ftag, char *path);
-extern int	multixactmemberssyncfiletag(const FileTag *ftag, char *path);
-
 extern void AtEOXact_MultiXact(void);
 extern void AtPrepare_MultiXact(void);
 extern void PostPrepare_MultiXact(TransactionId xid);
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
index 130c41c8632..ed85dd019d9 100644
--- a/src/include/access/slru.h
+++ b/src/include/access/slru.h
@@ -1,7 +1,7 @@
 /*-------------------------------------------------------------------------
  *
  * slru.h
- *		Simple LRU buffering for transaction status logfiles
+ *		Buffering for transaction status logfiles
  *
  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -14,9 +14,34 @@
 #define SLRU_H
 
 #include "access/xlogdefs.h"
+#include "catalog/pg_tablespace_d.h"
+#include "storage/buf.h"
 #include "storage/lwlock.h"
+#include "storage/smgr.h"
 #include "storage/sync.h"
 
+/* Pseudo database ID used for SLRU data. */
+#define SLRU_DB_ID 9
+
+/* Pseudo relation IDs used by each cache. */
+#define SLRU_CLOG_REL_ID 0
+#define SLRU_MULTIXACT_OFFSET_REL_ID 1
+#define SLRU_MULTIXACT_MEMBER_REL_ID 2
+#define SLRU_SUBTRANS_REL_ID 3
+#define SLRU_SERIAL_REL_ID 4
+#define SLRU_COMMITTS_REL_ID 5
+#define SLRU_NOTIFY_REL_ID 6
+#define SLRU_NUM_RELS 7
+
+typedef bool (*SlruPagePrecedesFunction) (int, int);
+
+static inline RelFileLocator
+SlruRelFileLocator(RelFileNumber relNumber)
+{
+	RelFileLocator rlocator = {DEFAULTTABLESPACE_OID, SLRU_DB_ID, relNumber};
+	return rlocator;
+}
+
 
 /*
  * Define SLRU segment size.  A page is the same BLCKSZ as is used everywhere
@@ -33,142 +58,54 @@
  */
 #define SLRU_PAGES_PER_SEGMENT	32
 
-/*
- * Page status codes.  Note that these do not include the "dirty" bit.
- * page_dirty can be true only in the VALID or WRITE_IN_PROGRESS states;
- * in the latter case it implies that the page has been re-dirtied since
- * the write started.
- */
-typedef enum
-{
-	SLRU_PAGE_EMPTY,			/* buffer is not in use */
-	SLRU_PAGE_READ_IN_PROGRESS, /* page is being read in */
-	SLRU_PAGE_VALID,			/* page is valid and not being written */
-	SLRU_PAGE_WRITE_IN_PROGRESS /* page is being written out */
-} SlruPageStatus;
-
-/*
- * Shared-memory state
- */
-typedef struct SlruSharedData
-{
-	LWLock	   *ControlLock;
-
-	/* Number of buffers managed by this SLRU structure */
-	int			num_slots;
-
-	/*
-	 * Arrays holding info for each buffer slot.  Page number is undefined
-	 * when status is EMPTY, as is page_lru_count.
-	 */
-	char	  **page_buffer;
-	SlruPageStatus *page_status;
-	bool	   *page_dirty;
-	int		   *page_number;
-	int		   *page_lru_count;
-	LWLockPadded *buffer_locks;
-
-	/*
-	 * Optional array of WAL flush LSNs associated with entries in the SLRU
-	 * pages.  If not zero/NULL, we must flush WAL before writing pages (true
-	 * for pg_xact, false for multixact, pg_subtrans, pg_notify).  group_lsn[]
-	 * has lsn_groups_per_page entries per buffer slot, each containing the
-	 * highest LSN known for a contiguous group of SLRU entries on that slot's
-	 * page.
-	 */
-	XLogRecPtr *group_lsn;
-	int			lsn_groups_per_page;
-
-	/*----------
-	 * We mark a page "most recently used" by setting
-	 *		page_lru_count[slotno] = ++cur_lru_count;
-	 * The oldest page is therefore the one with the highest value of
-	 *		cur_lru_count - page_lru_count[slotno]
-	 * The counts will eventually wrap around, but this calculation still
-	 * works as long as no page's age exceeds INT_MAX counts.
-	 *----------
-	 */
-	int			cur_lru_count;
-
-	/*
-	 * latest_page_number is the page number of the current end of the log;
-	 * this is not critical data, since we use it only to avoid swapping out
-	 * the latest page.
-	 */
-	int			latest_page_number;
-
-	/* SLRU's index for statistics purposes (might not be unique) */
-	int			slru_stats_idx;
-} SlruSharedData;
-
-typedef SlruSharedData *SlruShared;
-
-/*
- * SlruCtlData is an unshared structure that points to the active information
- * in shared memory.
- */
-typedef struct SlruCtlData
-{
-	SlruShared	shared;
-
-	/*
-	 * Which sync handler function to use when handing sync requests over to
-	 * the checkpointer.  SYNC_HANDLER_NONE to disable fsync (eg pg_notify).
-	 */
-	SyncRequestHandler sync_handler;
-
-	/*
-	 * Decide whether a page is "older" for truncation and as a hint for
-	 * evicting pages in LRU order.  Return true if every entry of the first
-	 * argument is older than every entry of the second argument.  Note that
-	 * !PagePrecedes(a,b) && !PagePrecedes(b,a) need not imply a==b; it also
-	 * arises when some entries are older and some are not.  For SLRUs using
-	 * SimpleLruTruncate(), this must use modular arithmetic.  (For others,
-	 * the behavior of this callback has no functional implications.)  Use
-	 * SlruPagePrecedesUnitTests() in SLRUs meeting its criteria.
-	 */
-	bool		(*PagePrecedes) (int, int);
-
-	/*
-	 * Dir is set during SimpleLruInit and does not change thereafter. Since
-	 * it's always the same, it doesn't need to be in shared memory.
-	 */
-	char		Dir[64];
-} SlruCtlData;
-
-typedef SlruCtlData *SlruCtl;
-
-
-extern Size SimpleLruShmemSize(int nslots, int nlsns);
-extern void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
-						  LWLock *ctllock, const char *subdir, int tranche_id,
-						  SyncRequestHandler sync_handler);
-extern int	SimpleLruZeroPage(SlruCtl ctl, int pageno);
-extern int	SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
-							  TransactionId xid);
-extern int	SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno,
-									   TransactionId xid);
-extern void SimpleLruWritePage(SlruCtl ctl, int slotno);
-extern void SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied);
 #ifdef USE_ASSERT_CHECKING
-extern void SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page);
+extern void SlruPagePrecedesUnitTests(SlruPagePrecedesFunction PagePrecedes,
+									  int per_page);
 #else
 #define SlruPagePrecedesUnitTests(ctl, per_page) do {} while (0)
 #endif
-extern void SimpleLruTruncate(SlruCtl ctl, int cutoffPage);
-extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno);
+extern void SimpleLruTruncate(Oid rel_id, SlruPagePrecedesFunction PagePrecedes,
+							  int cutoffPage);
+extern bool SimpleLruDoesPhysicalPageExist(Oid rel_id, int pageno);
 
-typedef bool (*SlruScanCallback) (SlruCtl ctl, char *filename, int segpage,
+typedef bool (*SlruScanCallback) (Oid rel_id,
+								  SlruPagePrecedesFunction PagePrecedes,
+								  char *filename, int segpage,
 								  void *data);
-extern bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data);
-extern void SlruDeleteSegment(SlruCtl ctl, int segno);
-
-extern int	SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path);
+extern bool SlruScanDirectory(Oid rel_id, SlruPagePrecedesFunction PagePrecedes,
+							  SlruScanCallback callback, void *data);
+extern void SlruDeleteSegment(Oid rel_id, int segno);
 
 /* SlruScanDirectory public callbacks */
-extern bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename,
+extern bool SlruScanDirCbReportPresence(Oid rel_id,
+										SlruPagePrecedesFunction PagePrecedes,
+										char *filename,
 										int segpage, void *data);
-extern bool SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage,
+extern bool SlruScanDirCbDeleteAll(Oid rel_id, SlruPagePrecedesFunction PagePrecedes,
+								   char *filename, int segpage,
 								   void *data);
 
+extern void CheckPointSLRU(void);
+
+/* Buffer access */
+extern Buffer ReadSlruBuffer(Oid rel_id, int pageno);
+extern Buffer ZeroSlruBuffer(Oid rel_id, int pageno);
+
+/* Interfaces use by stats view */
+extern Oid SlruRelIdByName(const char *name);
+extern const char *SlruName(Oid rel_id);
+
+/* Sync callback */
+extern int	slrusyncfiletag(const FileTag *ftag, char *path);
+
+/* SMGR callbacks */
+extern void slruopen(SMgrRelation reln);
+extern void slruclose(SMgrRelation reln, ForkNumber forknum);
+extern void slruread(SMgrRelation reln, ForkNumber forknum,
+					 BlockNumber blocknum, char *buffer);
+extern void slruwrite(SMgrRelation reln, ForkNumber forknum,
+					  BlockNumber blocknum, char *buffer, bool skipFsync);
+extern void slruwriteback(SMgrRelation reln, ForkNumber forknum,
+						  BlockNumber blocknum, BlockNumber nblocks);
+
 #endif							/* SLRU_H */
diff --git a/src/include/access/subtrans.h b/src/include/access/subtrans.h
index f94e116640b..4685a05bc92 100644
--- a/src/include/access/subtrans.h
+++ b/src/include/access/subtrans.h
@@ -18,11 +18,8 @@ extern void SubTransSetParent(TransactionId xid, TransactionId parent);
 extern TransactionId SubTransGetParent(TransactionId xid);
 extern TransactionId SubTransGetTopmostTransaction(TransactionId xid);
 
-extern Size SUBTRANSShmemSize(void);
-extern void SUBTRANSShmemInit(void);
 extern void BootStrapSUBTRANS(void);
 extern void StartupSUBTRANS(TransactionId oldestActiveXID);
-extern void CheckPointSUBTRANS(void);
 extern void ExtendSUBTRANS(TransactionId newestXact);
 extern void TruncateSUBTRANS(TransactionId oldestXact);
 
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index ac28f813b4e..bb7099fc809 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -691,5 +691,4 @@ extern PGDLLIMPORT SessionEndType pgStatSessionEndCause;
 /* updated directly by backends and background processes */
 extern PGDLLIMPORT PgStat_WalStats PendingWalStats;
 
-
 #endif							/* PGSTAT_H */
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 69e45900bae..f071539281a 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -230,6 +230,11 @@ typedef union BufferDescPadded
 #define BufferDescriptorGetContentLock(bdesc) \
 	((LWLock*) (&(bdesc)->content_lock))
 
+#define BufferGetExternalLSN(bufHdr) \
+	BufferExternalLSNs[(bufHdr)->buf_id]
+#define BufferSetExternalLSN(bufHdr, lsn) \
+	BufferExternalLSNs[(bufHdr)->buf_id] = (lsn)
+
 extern PGDLLIMPORT ConditionVariableMinimallyPadded *BufferIOCVArray;
 
 /*
@@ -276,6 +281,7 @@ typedef struct WritebackContext
 
 /* in buf_init.c */
 extern PGDLLIMPORT BufferDescPadded *BufferDescriptors;
+extern PGDLLIMPORT XLogRecPtr *BufferExternalLSNs;
 extern PGDLLIMPORT WritebackContext BackendWritebackContext;
 
 /* in localbuf.c */
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index bf8cce7ccf6..b7c7a1eae9d 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -116,12 +116,21 @@ extern Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator,
 										ForkNumber forkNum, BlockNumber blockNum,
 										ReadBufferMode mode, BufferAccessStrategy strategy,
 										bool permanent);
+extern Buffer ReadBufferWithoutRelcacheWithHit(RelFileLocator rlocator,
+											   ForkNumber forkNum, BlockNumber blockNum,
+											   ReadBufferMode mode, bool *hit);
 extern void ReleaseBuffer(Buffer buffer);
 extern void UnlockReleaseBuffer(Buffer buffer);
 extern void MarkBufferDirty(Buffer buffer);
 extern void IncrBufferRefCount(Buffer buffer);
 extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation,
 								   BlockNumber blockNum);
+extern void DiscardBuffer(RelFileLocator rlocator,
+						  ForkNumber forkNum,
+						  BlockNumber blockNum);
+extern bool BufferProbe(RelFileLocator rlocator,
+						ForkNumber forkNum,
+						BlockNumber blockNum);
 
 extern void InitBufferPool(void);
 extern void InitBufferPoolAccess(void);
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index e03d317eeac..d71bf58de22 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -166,14 +166,7 @@ extern void LWLockInitialize(LWLock *lock, int tranche_id);
  */
 typedef enum BuiltinTrancheIds
 {
-	LWTRANCHE_XACT_BUFFER = NUM_INDIVIDUAL_LWLOCKS,
-	LWTRANCHE_COMMITTS_BUFFER,
-	LWTRANCHE_SUBTRANS_BUFFER,
-	LWTRANCHE_MULTIXACTOFFSET_BUFFER,
-	LWTRANCHE_MULTIXACTMEMBER_BUFFER,
-	LWTRANCHE_NOTIFY_BUFFER,
-	LWTRANCHE_SERIAL_BUFFER,
-	LWTRANCHE_WAL_INSERT,
+	LWTRANCHE_WAL_INSERT = NUM_INDIVIDUAL_LWLOCKS,
 	LWTRANCHE_BUFFER_CONTENT,
 	LWTRANCHE_REPLICATION_ORIGIN_STATE,
 	LWTRANCHE_REPLICATION_SLOT_IO,
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index a07715356ba..7398c0d2c9f 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -16,6 +16,7 @@
 
 #include "lib/ilist.h"
 #include "storage/block.h"
+#include "storage/fd.h"
 #include "storage/relfilelocator.h"
 
 /*
@@ -68,6 +69,13 @@ typedef struct SMgrRelationData
 	int			md_num_open_segs[MAX_FORKNUM + 1];
 	struct _MdfdVec *md_seg_fds[MAX_FORKNUM + 1];
 
+	/*
+	 * for slru.c; we'll just cache one File for now.
+	 * XXX Unionize with md stuff somehow so we don't waste space
+	 */
+	int			slru_file_segment;
+	File		slru_file;
+
 	/* if unowned, list link in list of all unowned SMgrRelations */
 	dlist_node	node;
 } SMgrRelationData;
diff --git a/src/include/storage/sync.h b/src/include/storage/sync.h
index 049af878dec..45778b185bc 100644
--- a/src/include/storage/sync.h
+++ b/src/include/storage/sync.h
@@ -35,11 +35,7 @@ typedef enum SyncRequestType
 typedef enum SyncRequestHandler
 {
 	SYNC_HANDLER_MD = 0,
-	SYNC_HANDLER_CLOG,
-	SYNC_HANDLER_COMMIT_TS,
-	SYNC_HANDLER_MULTIXACT_OFFSET,
-	SYNC_HANDLER_MULTIXACT_MEMBER,
-	SYNC_HANDLER_NONE
+	SYNC_HANDLER_SLRU
 } SyncRequestHandler;
 
 /*
diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index 9303d05427f..eec150af73f 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -14,6 +14,7 @@
 #define PGSTAT_INTERNAL_H
 
 
+#include "access/slru.h"
 #include "common/hashfn.h"
 #include "lib/dshash.h"
 #include "lib/ilist.h"
@@ -260,28 +261,6 @@ typedef struct PgStat_KindInfo
 } PgStat_KindInfo;
 
 
-/*
- * List of SLRU names that we keep stats for.  There is no central registry of
- * SLRUs, so we use this fixed list instead.  The "other" entry is used for
- * all SLRUs without an explicit entry (e.g. SLRUs in extensions).
- *
- * This is only defined here so that SLRU_NUM_ELEMENTS is known for later type
- * definitions.
- */
-static const char *const slru_names[] = {
-	"CommitTs",
-	"MultiXactMember",
-	"MultiXactOffset",
-	"Notify",
-	"Serial",
-	"Subtrans",
-	"Xact",
-	"other"						/* has to be last */
-};
-
-#define SLRU_NUM_ELEMENTS	lengthof(slru_names)
-
-
 /* ----------
  * Types and definitions for different kinds of fixed-amount stats.
  *
@@ -333,7 +312,7 @@ typedef struct PgStatShared_SLRU
 {
 	/* lock protects ->stats */
 	LWLock		lock;
-	PgStat_SLRUStats stats[SLRU_NUM_ELEMENTS];
+	PgStat_SLRUStats stats[SLRU_NUM_RELS];
 } PgStatShared_SLRU;
 
 typedef struct PgStatShared_Wal
@@ -442,7 +421,7 @@ typedef struct PgStat_Snapshot
 
 	PgStat_CheckpointerStats checkpointer;
 
-	PgStat_SLRUStats slru[SLRU_NUM_ELEMENTS];
+	PgStat_SLRUStats slru[SLRU_NUM_RELS];
 
 	PgStat_WalStats wal;
 
diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h
index c3ade011206..60c221a5729 100644
--- a/src/include/utils/wait_event.h
+++ b/src/include/utils/wait_event.h
@@ -203,7 +203,7 @@ typedef enum
 	WAIT_EVENT_REPLICATION_SLOT_RESTORE_SYNC,
 	WAIT_EVENT_REPLICATION_SLOT_SYNC,
 	WAIT_EVENT_REPLICATION_SLOT_WRITE,
-	WAIT_EVENT_SLRU_FLUSH_SYNC,
+	WAIT_EVENT_SLRU_FLUSH,
 	WAIT_EVENT_SLRU_READ,
 	WAIT_EVENT_SLRU_SYNC,
 	WAIT_EVENT_SLRU_WRITE,
-- 
2.30.2

