buffer manager refactoring to isolate ARC algorithm

From: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>
To: pgsql-patches(at)postgreSQL(dot)org
Subject: buffer manager refactoring to isolate ARC algorithm
Date: 2005-02-03 20:24:31
Message-ID: 3097.1107462271@sss.pgh.pa.us
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-patches

I'm planning to apply the attached patch to hide the ARC-related data
structures in freelist.c. There are no data structure or algorithm
changes, just removal of ARC details from the shared header
buf_internals.h, plus minor refactoring to hide the knowledge that ARC
wants a twice-normal-size buffer lookup hashtable. This should allow us
to make the ARC-or-not changes within freelist.c only.

I did leave the "bufNext" fields of BufferDesc as-is. Strictly speaking
this list is an internal data structure of freelist.c, but moving these
fields out of BufferDesc would require a larger change than seems
worthwhile, especially seeing that an LRU algorithm could make use of
the same list.

Any objections?

regards, tom lane

*** src/backend/storage/buffer/buf_init.c.orig Fri Dec 31 17:46:08 2004
--- src/backend/storage/buffer/buf_init.c Thu Feb 3 14:51:18 2005
***************
*** 73,79 ****
* aborts, it should only unpin the buffers exactly the number of times it
* has pinned them, so that it will not blow away buffers of another
* backend.
- *
*/


--- 73,78 ----
***************
*** 120,133 ****
block = BufferBlocks;

/*
! * link the buffers into a single linked list. This will become
! * the LIFO list of unused buffers returned by
! * StrategyGetBuffer().
*/
for (i = 0; i < NBuffers; block += BLCKSZ, buf++, i++)
{
Assert(ShmemIsValid((unsigned long) block));

buf->bufNext = i + 1;

CLEAR_BUFFERTAG(buf->tag);
--- 119,135 ----
block = BufferBlocks;

/*
! * Initialize all the buffer headers.
*/
for (i = 0; i < NBuffers; block += BLCKSZ, buf++, i++)
{
Assert(ShmemIsValid((unsigned long) block));

+ /*
+ * The bufNext fields link together all totally-unused buffers.
+ * Subsequent management of this list is done by
+ * StrategyGetBuffer().
+ */
buf->bufNext = i + 1;

CLEAR_BUFFERTAG(buf->tag);
***************
*** 142,148 ****
buf->wait_backend_id = 0;
}

! /* Correct last entry */
BufferDescriptors[NBuffers - 1].bufNext = -1;

LWLockRelease(BufMgrLock);
--- 144,150 ----
buf->wait_backend_id = 0;
}

! /* Correct last entry of linked list */
BufferDescriptors[NBuffers - 1].bufNext = -1;

LWLockRelease(BufMgrLock);
***************
*** 178,184 ****

/*
* Convert shmem offsets into addresses as seen by this process. This
! * is just to speed up the BufferGetBlock() macro.
*/
for (i = 0; i < NBuffers; i++)
BufferBlockPointers[i] = (Block) MAKE_PTR(BufferDescriptors[i].data);
--- 180,187 ----

/*
* Convert shmem offsets into addresses as seen by this process. This
! * is just to speed up the BufferGetBlock() macro. It is OK to do this
! * without any lock since the data pointers never change.
*/
for (i = 0; i < NBuffers; i++)
BufferBlockPointers[i] = (Block) MAKE_PTR(BufferDescriptors[i].data);
***************
*** 201,214 ****
/* size of data pages */
size += NBuffers * MAXALIGN(BLCKSZ);

! /* size of buffer hash table */
! size += hash_estimate_size(NBuffers * 2, sizeof(BufferLookupEnt));
!
! /* size of the shared replacement strategy control block */
! size += MAXALIGN(sizeof(BufferStrategyControl));
!
! /* size of the ARC directory blocks */
! size += MAXALIGN(NBuffers * 2 * sizeof(BufferStrategyCDB));

return size;
}
--- 204,211 ----
/* size of data pages */
size += NBuffers * MAXALIGN(BLCKSZ);

! /* size of stuff controlled by freelist.c */
! size += StrategyShmemSize();

return size;
}
*** src/backend/storage/buffer/buf_table.c.orig Fri Dec 31 17:46:08 2004
--- src/backend/storage/buffer/buf_table.c Thu Feb 3 14:40:17 2005
***************
*** 1,11 ****
/*-------------------------------------------------------------------------
*
* buf_table.c
! * routines for finding buffers in the buffer pool.
*
! * NOTE: these days, what this table actually provides is a mapping from
! * BufferTags to CDB indexes, not directly to buffers. The function names
! * are thus slight misnomers.
*
* Note: all routines in this file assume that the BufMgrLock is held
* by the caller, so no synchronization is needed.
--- 1,11 ----
/*-------------------------------------------------------------------------
*
* buf_table.c
! * routines for mapping BufferTags to buffer indexes.
*
! * NOTE: this module is called only by freelist.c, and the "buffer IDs"
! * it deals with are whatever freelist.c needs them to be; they may not be
! * directly equivalent to Buffer numbers.
*
* Note: all routines in this file assume that the BufMgrLock is held
* by the caller, so no synchronization is needed.
***************
*** 26,37 ****
#include "storage/bufmgr.h"


static HTAB *SharedBufHash;


/*
* Initialize shmem hash table for mapping buffers
! * size is the desired hash table size (2*NBuffers for ARC algorithm)
*/
void
InitBufTable(int size)
--- 26,54 ----
#include "storage/bufmgr.h"


+ /* entry for buffer lookup hashtable */
+ typedef struct
+ {
+ BufferTag key; /* Tag of a disk page */
+ int id; /* Associated buffer ID */
+ } BufferLookupEnt;
+
static HTAB *SharedBufHash;


/*
+ * Estimate space needed for mapping hashtable
+ * size is the desired hash table size (possibly more than NBuffers)
+ */
+ int
+ BufTableShmemSize(int size)
+ {
+ return hash_estimate_size(size, sizeof(BufferLookupEnt));
+ }
+
+ /*
* Initialize shmem hash table for mapping buffers
! * size is the desired hash table size (possibly more than NBuffers)
*/
void
InitBufTable(int size)
***************
*** 56,62 ****

/*
* BufTableLookup
! * Lookup the given BufferTag; return CDB index, or -1 if not found
*/
int
BufTableLookup(BufferTag *tagPtr)
--- 73,79 ----

/*
* BufTableLookup
! * Lookup the given BufferTag; return buffer ID, or -1 if not found
*/
int
BufTableLookup(BufferTag *tagPtr)
***************
*** 76,85 ****

/*
* BufTableInsert
! * Insert a hashtable entry for given tag and CDB index
*/
void
! BufTableInsert(BufferTag *tagPtr, int cdb_id)
{
BufferLookupEnt *result;
bool found;
--- 93,102 ----

/*
* BufTableInsert
! * Insert a hashtable entry for given tag and buffer ID
*/
void
! BufTableInsert(BufferTag *tagPtr, int buf_id)
{
BufferLookupEnt *result;
bool found;
***************
*** 92,106 ****
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory")));

! if (found) /* found something else in the table? */
elog(ERROR, "shared buffer hash table corrupted");

! result->id = cdb_id;
}

/*
* BufTableDelete
! * Delete the hashtable entry for given tag
*/
void
BufTableDelete(BufferTag *tagPtr)
--- 109,123 ----
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory")));

! if (found) /* found something already in the table? */
elog(ERROR, "shared buffer hash table corrupted");

! result->id = buf_id;
}

/*
* BufTableDelete
! * Delete the hashtable entry for given tag (which must exist)
*/
void
BufTableDelete(BufferTag *tagPtr)
*** src/backend/storage/buffer/freelist.c.orig Fri Dec 31 17:46:08 2004
--- src/backend/storage/buffer/freelist.c Thu Feb 3 14:45:50 2005
***************
*** 25,30 ****
--- 25,75 ----
#include "storage/bufmgr.h"


+ /*
+ * Definitions for the buffer replacement strategy
+ */
+ #define STRAT_LIST_UNUSED (-1)
+ #define STRAT_LIST_B1 0
+ #define STRAT_LIST_T1 1
+ #define STRAT_LIST_T2 2
+ #define STRAT_LIST_B2 3
+ #define STRAT_NUM_LISTS 4
+
+ /*
+ * The Cache Directory Block (CDB) of the Adaptive Replacement Cache (ARC)
+ */
+ typedef struct
+ {
+ int prev; /* list links */
+ int next;
+ short list; /* ID of list it is currently in */
+ bool t1_vacuum; /* t => present only because of VACUUM */
+ TransactionId t1_xid; /* the xid this entry went onto T1 */
+ BufferTag buf_tag; /* page identifier */
+ int buf_id; /* currently assigned data buffer, or -1 */
+ } BufferStrategyCDB;
+
+ /*
+ * The shared ARC control information.
+ */
+ typedef struct
+ {
+ int target_T1_size; /* What T1 size are we aiming for */
+ int listUnusedCDB; /* All unused StrategyCDB */
+ int listHead[STRAT_NUM_LISTS]; /* ARC lists B1, T1, T2
+ * and B2 */
+ int listTail[STRAT_NUM_LISTS];
+ int listSize[STRAT_NUM_LISTS];
+ Buffer listFreeBuffers; /* List of unused buffers */
+
+ long num_lookup; /* Some hit statistics */
+ long num_hit[STRAT_NUM_LISTS];
+ time_t stat_report;
+
+ /* Array of CDB's starts here */
+ BufferStrategyCDB cdb[1]; /* VARIABLE SIZE ARRAY */
+ } BufferStrategyControl;
+
/* GUC variable: time in seconds between statistics reports */
int DebugSharedBuffers = 0;

***************
*** 811,816 ****
--- 856,883 ----
return num_buffer_dirty;
}

+
+ /*
+ * StrategyShmemSize
+ *
+ * estimate the size of shared memory used by the freelist-related structures.
+ */
+ int
+ StrategyShmemSize(void)
+ {
+ int size = 0;
+
+ /* size of CDB lookup hash table */
+ size += BufTableShmemSize(NBuffers * 2);
+
+ /* size of the shared replacement strategy control block */
+ size += MAXALIGN(sizeof(BufferStrategyControl));
+
+ /* size of the ARC directory blocks */
+ size += MAXALIGN(NBuffers * 2 * sizeof(BufferStrategyCDB));
+
+ return size;
+ }

/*
* StrategyInitialize -- initialize the buffer cache replacement
*** src/include/storage/buf_internals.h.orig Fri Dec 31 17:47:02 2004
--- src/include/storage/buf_internals.h Thu Feb 3 14:51:11 2005
***************
*** 17,24 ****

#include "storage/backendid.h"
#include "storage/buf.h"
- #include "storage/lmgr.h"
#include "storage/lwlock.h"


/*
--- 17,25 ----

#include "storage/backendid.h"
#include "storage/buf.h"
#include "storage/lwlock.h"
+ #include "storage/shmem.h"
+ #include "utils/rel.h"


/*
***************
*** 40,49 ****
* Buffer tag identifies which disk block the buffer contains.
*
* Note: the BufferTag data must be sufficient to determine where to write the
! * block, even during a "blind write" with no relcache entry. It's possible
! * that the backend flushing the buffer doesn't even believe the relation is
! * visible yet (its xact may have started before the xact that created the
! * rel). The storage manager must be able to cope anyway.
*
* Note: if there's any pad bytes in the struct, INIT_BUFFERTAG will have
* to be fixed to zero them, since this struct is used as a hash key.
--- 41,50 ----
* Buffer tag identifies which disk block the buffer contains.
*
* Note: the BufferTag data must be sufficient to determine where to write the
! * block, without reference to pg_class or pg_tablespace entries. It's
! * possible that the backend flushing the buffer doesn't even believe the
! * relation is visible yet (its xact may have started before the xact that
! * created the rel). The storage manager must be able to cope anyway.
*
* Note: if there's any pad bytes in the struct, INIT_BUFFERTAG will have
* to be fixed to zero them, since this struct is used as a hash key.
***************
*** 107,164 ****

#define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)

- /* entry for buffer lookup hashtable */
- typedef struct
- {
- BufferTag key; /* Tag of a disk page */
- int id; /* CDB id of associated CDB */
- } BufferLookupEnt;

! /*
! * Definitions for the buffer replacement strategy
! */
! #define STRAT_LIST_UNUSED (-1)
! #define STRAT_LIST_B1 0
! #define STRAT_LIST_T1 1
! #define STRAT_LIST_T2 2
! #define STRAT_LIST_B2 3
! #define STRAT_NUM_LISTS 4
!
! /*
! * The Cache Directory Block (CDB) of the Adaptive Replacement Cache (ARC)
! */
! typedef struct
! {
! int prev; /* list links */
! int next;
! short list; /* ID of list it is currently in */
! bool t1_vacuum; /* t => present only because of VACUUM */
! TransactionId t1_xid; /* the xid this entry went onto T1 */
! BufferTag buf_tag; /* page identifier */
! int buf_id; /* currently assigned data buffer, or -1 */
! } BufferStrategyCDB;
!
! /*
! * The shared ARC control information.
! */
! typedef struct
! {
! int target_T1_size; /* What T1 size are we aiming for */
! int listUnusedCDB; /* All unused StrategyCDB */
! int listHead[STRAT_NUM_LISTS]; /* ARC lists B1, T1, T2
! * and B2 */
! int listTail[STRAT_NUM_LISTS];
! int listSize[STRAT_NUM_LISTS];
! Buffer listFreeBuffers; /* List of unused buffers */
!
! long num_lookup; /* Some hit statistics */
! long num_hit[STRAT_NUM_LISTS];
! time_t stat_report;
!
! /* Array of CDB's starts here */
! BufferStrategyCDB cdb[1]; /* VARIABLE SIZE ARRAY */
! } BufferStrategyControl;


/* counters in buf_init.c */
extern long int ReadBufferCount;
--- 108,119 ----

#define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)


! /* in bufmgr.c */
! extern BufferDesc *BufferDescriptors;

+ /* in localbuf.c */
+ extern BufferDesc *LocalBufferDescriptors;

/* counters in buf_init.c */
extern long int ReadBufferCount;
***************
*** 170,180 ****


/*
! * Bufmgr Interface:
*/

- /* Internal routines: only called by bufmgr */
-
/* freelist.c */
extern BufferDesc *StrategyBufferLookup(BufferTag *tagPtr, bool recheck,
int *cdb_found_index);
--- 125,133 ----


/*
! * Internal routines: only called by bufmgr
*/

/* freelist.c */
extern BufferDesc *StrategyBufferLookup(BufferTag *tagPtr, bool recheck,
int *cdb_found_index);
***************
*** 185,204 ****
extern void StrategyHintVacuum(bool vacuum_active);
extern int StrategyDirtyBufferList(BufferDesc **buffers, BufferTag *buftags,
int max_buffers);
extern void StrategyInitialize(bool init);

/* buf_table.c */
extern void InitBufTable(int size);
extern int BufTableLookup(BufferTag *tagPtr);
! extern void BufTableInsert(BufferTag *tagPtr, int cdb_id);
extern void BufTableDelete(BufferTag *tagPtr);

- /* bufmgr.c */
- extern BufferDesc *BufferDescriptors;
-
/* localbuf.c */
- extern BufferDesc *LocalBufferDescriptors;
-
extern BufferDesc *LocalBufferAlloc(Relation reln, BlockNumber blockNum,
bool *foundPtr);
extern void WriteLocalBuffer(Buffer buffer, bool release);
--- 138,154 ----
extern void StrategyHintVacuum(bool vacuum_active);
extern int StrategyDirtyBufferList(BufferDesc **buffers, BufferTag *buftags,
int max_buffers);
+ extern int StrategyShmemSize(void);
extern void StrategyInitialize(bool init);

/* buf_table.c */
+ extern int BufTableShmemSize(int size);
extern void InitBufTable(int size);
extern int BufTableLookup(BufferTag *tagPtr);
! extern void BufTableInsert(BufferTag *tagPtr, int buf_id);
extern void BufTableDelete(BufferTag *tagPtr);

/* localbuf.c */
extern BufferDesc *LocalBufferAlloc(Relation reln, BlockNumber blockNum,
bool *foundPtr);
extern void WriteLocalBuffer(Buffer buffer, bool release);

Browse pgsql-patches by date

  From Date Subject
Next Message Neil Conway 2005-02-04 02:06:06 Re: Refactoring lock.c
Previous Message Martin Pitt 2005-02-03 16:43:21 Re: libpq API incompatibility between 7.4 and 8.0