Index: src/backend/storage/buffer/buf_init.c =================================================================== RCS file: /home/pgsql/CvsRoot/pgsql-server/src/backend/storage/buffer/buf_init.c,v retrieving revision 1.54 diff -c -b -r1.54 buf_init.c *** src/backend/storage/buffer/buf_init.c 2003/08/04 02:40:03 1.54 --- src/backend/storage/buffer/buf_init.c 2003/11/03 13:43:55 *************** *** 48,56 **** int ShowPinTrace = 0; int Data_Descriptors; - int Free_List_Descriptor; - int Lookup_List_Descriptor; - int Num_Descriptors; BufferDesc *BufferDescriptors; Block *BufferBlockPointers; --- 48,53 ---- *************** *** 133,141 **** int i; Data_Descriptors = NBuffers; - Free_List_Descriptor = Data_Descriptors; - Lookup_List_Descriptor = Data_Descriptors + 1; - Num_Descriptors = Data_Descriptors + 1; /* * It's probably not really necessary to grab the lock --- if there's --- 130,135 ---- *************** *** 156,162 **** BufferDescriptors = (BufferDesc *) ShmemInitStruct("Buffer Descriptors", ! Num_Descriptors * sizeof(BufferDesc), &foundDescs); BufferBlocks = (char *) ShmemInitStruct("Buffer Blocks", --- 150,156 ---- BufferDescriptors = (BufferDesc *) ShmemInitStruct("Buffer Descriptors", ! Data_Descriptors * sizeof(BufferDesc), &foundDescs); BufferBlocks = (char *) ShmemInitStruct("Buffer Blocks", *************** *** 176,191 **** block = BufferBlocks; /* ! * link the buffers into a circular, doubly-linked list to ! * initialize free list, and initialize the buffer headers. Still ! * don't know anything about replacement strategy in this file. */ for (i = 0; i < Data_Descriptors; block += BLCKSZ, buf++, i++) { Assert(ShmemIsValid((unsigned long) block)); ! buf->freeNext = i + 1; ! buf->freePrev = i - 1; CLEAR_BUFFERTAG(&(buf->tag)); buf->buf_id = i; --- 170,183 ---- block = BufferBlocks; /* ! * link the buffers into a single linked list. This will become the ! * LiFo list of unused buffers returned by StragegyGetBuffer(). */ for (i = 0; i < Data_Descriptors; block += BLCKSZ, buf++, i++) { Assert(ShmemIsValid((unsigned long) block)); ! buf->bufNext = i + 1; CLEAR_BUFFERTAG(&(buf->tag)); buf->buf_id = i; *************** *** 199,212 **** buf->wait_backend_id = 0; } ! /* close the circular queue */ ! BufferDescriptors[0].freePrev = Data_Descriptors - 1; ! BufferDescriptors[Data_Descriptors - 1].freeNext = 0; } /* Init other shared buffer-management stuff */ ! InitBufTable(); ! InitFreeList(!foundDescs); LWLockRelease(BufMgrLock); } --- 191,202 ---- buf->wait_backend_id = 0; } ! /* Correct last entry */ ! BufferDescriptors[Data_Descriptors - 1].bufNext = -1; } /* Init other shared buffer-management stuff */ ! StrategyInitialize(!foundDescs); LWLockRelease(BufMgrLock); } Index: src/backend/storage/buffer/buf_table.c =================================================================== RCS file: /home/pgsql/CvsRoot/pgsql-server/src/backend/storage/buffer/buf_table.c,v retrieving revision 1.29 diff -c -b -r1.29 buf_table.c *** src/backend/storage/buffer/buf_table.c 2003/08/04 02:40:03 1.29 --- src/backend/storage/buffer/buf_table.c 2003/11/03 13:45:13 *************** *** 38,44 **** * Initialize shmem hash table for mapping buffers */ void ! InitBufTable(void) { HASHCTL info; --- 38,44 ---- * Initialize shmem hash table for mapping buffers */ void ! InitBufTable(int size) { HASHCTL info; *************** *** 50,56 **** info.hash = tag_hash; SharedBufHash = ShmemInitHash("Shared Buffer Lookup Table", ! NBuffers, NBuffers, &info, HASH_ELEM | HASH_FUNCTION); --- 50,56 ---- info.hash = tag_hash; SharedBufHash = ShmemInitHash("Shared Buffer Lookup Table", ! size, size, &info, HASH_ELEM | HASH_FUNCTION); *************** *** 58,136 **** elog(FATAL, "could not initialize shared buffer hash table"); } ! BufferDesc * BufTableLookup(BufferTag *tagPtr) { BufferLookupEnt *result; if (tagPtr->blockNum == P_NEW) ! return NULL; result = (BufferLookupEnt *) hash_search(SharedBufHash, (void *) tagPtr, HASH_FIND, NULL); if (!result) ! return NULL; ! return &(BufferDescriptors[result->id]); } /* * BufTableDelete */ bool ! BufTableDelete(BufferDesc *buf) { BufferLookupEnt *result; ! ! /* ! * buffer not initialized or has been removed from table already. ! * BM_DELETED keeps us from removing buffer twice. ! */ ! if (buf->flags & BM_DELETED) ! return TRUE; ! ! buf->flags |= BM_DELETED; result = (BufferLookupEnt *) ! hash_search(SharedBufHash, (void *) &(buf->tag), HASH_REMOVE, NULL); ! if (!result) /* shouldn't happen */ ! elog(ERROR, "shared buffer hash table corrupted"); ! /* ! * Clear the buffer's tag. This doesn't matter for the hash table, ! * since the buffer is already removed from it, but it ensures that ! * sequential searches through the buffer table won't think the buffer ! * is still valid for its old page. ! */ ! buf->tag.rnode.relNode = InvalidOid; ! buf->tag.rnode.tblNode = InvalidOid; return TRUE; } bool ! BufTableInsert(BufferDesc *buf) { BufferLookupEnt *result; - bool found; - - /* cannot insert it twice */ - Assert(buf->flags & BM_DELETED); - buf->flags &= ~(BM_DELETED); result = (BufferLookupEnt *) ! hash_search(SharedBufHash, (void *) &(buf->tag), HASH_ENTER, &found); ! if (!result) ! ereport(ERROR, ! (errcode(ERRCODE_OUT_OF_MEMORY), ! errmsg("out of shared memory"))); ! ! if (found) /* found something else in the table? */ elog(ERROR, "shared buffer hash table corrupted"); - result->id = buf->buf_id; return TRUE; } --- 58,120 ---- elog(FATAL, "could not initialize shared buffer hash table"); } ! /* ! * BufTableLookup ! */ ! int BufTableLookup(BufferTag *tagPtr) { BufferLookupEnt *result; if (tagPtr->blockNum == P_NEW) ! return -1; result = (BufferLookupEnt *) hash_search(SharedBufHash, (void *) tagPtr, HASH_FIND, NULL); if (!result) ! return -1; ! return result->id; } /* * BufTableDelete */ bool ! BufTableInsert(BufferTag *tagPtr, Buffer buf_id) { BufferLookupEnt *result; ! bool found; result = (BufferLookupEnt *) ! hash_search(SharedBufHash, (void *) tagPtr, HASH_ENTER, &found); ! if (!result) ! ereport(ERROR, ! (errcode(ERRCODE_OUT_OF_MEMORY), ! errmsg("out of shared memory"))); ! if (found) /* found something else in the table? */ ! elog(ERROR, "shared buffer hash table corrupted"); + result->id = buf_id; return TRUE; } + /* + * BufTableDelete + */ bool ! BufTableDelete(BufferTag *tagPtr) { BufferLookupEnt *result; result = (BufferLookupEnt *) ! hash_search(SharedBufHash, (void *) tagPtr, HASH_REMOVE, NULL); ! if (!result) /* shouldn't happen */ elog(ERROR, "shared buffer hash table corrupted"); return TRUE; } Index: src/backend/storage/buffer/bufmgr.c =================================================================== RCS file: /home/pgsql/CvsRoot/pgsql-server/src/backend/storage/buffer/bufmgr.c,v retrieving revision 1.141 diff -c -b -r1.141 bufmgr.c *** src/backend/storage/buffer/bufmgr.c 2003/09/25 06:58:01 1.141 --- src/backend/storage/buffer/bufmgr.c 2003/11/03 13:50:31 *************** *** 260,271 **** if (status == SM_FAIL) { /* IO Failed. cleanup the data structures and go home */ - if (!BufTableDelete(bufHdr)) - { - LWLockRelease(BufMgrLock); - elog(FATAL, "buffer table broken after I/O error"); - } /* remember that BufferAlloc() pinned the buffer */ UnpinBuffer(bufHdr); --- 260,267 ---- if (status == SM_FAIL) { /* IO Failed. cleanup the data structures and go home */ + StrategyInvalidateBuffer(bufHdr); /* remember that BufferAlloc() pinned the buffer */ UnpinBuffer(bufHdr); *************** *** 318,324 **** INIT_BUFFERTAG(&newTag, reln, blockNum); /* see if the block is in the buffer pool already */ ! buf = BufTableLookup(&newTag); if (buf != NULL) { /* --- 314,320 ---- INIT_BUFFERTAG(&newTag, reln, blockNum); /* see if the block is in the buffer pool already */ ! buf = StrategyBufferLookup(&newTag, false); if (buf != NULL) { /* *************** *** 379,385 **** inProgress = FALSE; for (buf = (BufferDesc *) NULL; buf == (BufferDesc *) NULL;) { ! buf = GetFreeBuffer(); /* GetFreeBuffer will abort if it can't find a free buffer */ Assert(buf); --- 375,381 ---- inProgress = FALSE; for (buf = (BufferDesc *) NULL; buf == (BufferDesc *) NULL;) { ! buf = StrategyGetBuffer(); /* GetFreeBuffer will abort if it can't find a free buffer */ Assert(buf); *************** *** 492,498 **** * we haven't gotten around to insert the new tag into the * buffer table. So we need to check here. -ay 3/95 */ ! buf2 = BufTableLookup(&newTag); if (buf2 != NULL) { /* --- 488,494 ---- * we haven't gotten around to insert the new tag into the * buffer table. So we need to check here. -ay 3/95 */ ! buf2 = StrategyBufferLookup(&newTag, true); if (buf2 != NULL) { /* *************** *** 535,563 **** */ /* ! * Change the name of the buffer in the lookup table: ! * ! * Need to update the lookup table before the read starts. If someone ! * comes along looking for the buffer while we are reading it in, we ! * don't want them to allocate a new buffer. For the same reason, we ! * didn't want to erase the buf table entry for the buffer we were ! * writing back until now, either. */ ! ! if (!BufTableDelete(buf)) ! { ! LWLockRelease(BufMgrLock); ! elog(FATAL, "buffer wasn't in the buffer hash table"); ! } ! INIT_BUFFERTAG(&(buf->tag), reln, blockNum); - if (!BufTableInsert(buf)) - { - LWLockRelease(BufMgrLock); - elog(FATAL, "buffer in buffer hash table twice"); - } - /* * Buffer contents are currently invalid. Have to mark IO IN PROGRESS * so no one fiddles with them until the read completes. If this --- 531,542 ---- */ /* ! * Tell the buffer replacement strategy that we are replacing the ! * buffer content. Then rename the buffer. */ ! StrategyReplaceBuffer(buf, reln, blockNum); INIT_BUFFERTAG(&(buf->tag), reln, blockNum); /* * Buffer contents are currently invalid. Have to mark IO IN PROGRESS * so no one fiddles with them until the read completes. If this *************** *** 959,967 **** if (isCommit) elog(WARNING, ! "buffer refcount leak: [%03d] (freeNext=%d, freePrev=%d, " "rel=%u/%u, blockNum=%u, flags=0x%x, refcount=%d %ld)", ! i, buf->freeNext, buf->freePrev, buf->tag.rnode.tblNode, buf->tag.rnode.relNode, buf->tag.blockNum, buf->flags, buf->refcount, PrivateRefCount[i]); --- 938,946 ---- if (isCommit) elog(WARNING, ! "buffer refcount leak: [%03d] (bufNext=%d, " "rel=%u/%u, blockNum=%u, flags=0x%x, refcount=%d %ld)", ! i, buf->bufNext, buf->tag.rnode.tblNode, buf->tag.rnode.relNode, buf->tag.blockNum, buf->flags, buf->refcount, PrivateRefCount[i]); *************** *** 1229,1235 **** /* * And mark the buffer as no longer occupied by this rel. */ ! BufTableDelete(bufHdr); } } --- 1208,1214 ---- /* * And mark the buffer as no longer occupied by this rel. */ ! StrategyInvalidateBuffer(bufHdr); } } *************** *** 1295,1301 **** /* * And mark the buffer as no longer occupied by this page. */ ! BufTableDelete(bufHdr); } } --- 1274,1280 ---- /* * And mark the buffer as no longer occupied by this page. */ ! StrategyInvalidateBuffer(bufHdr); } } *************** *** 1543,1549 **** return -2; } if (bufHdr->tag.blockNum >= firstDelBlock) ! BufTableDelete(bufHdr); } } --- 1522,1528 ---- return -2; } if (bufHdr->tag.blockNum >= firstDelBlock) ! StrategyInvalidateBuffer(bufHdr); } } Index: src/backend/storage/buffer/freelist.c =================================================================== RCS file: /home/pgsql/CvsRoot/pgsql-server/src/backend/storage/buffer/freelist.c,v retrieving revision 1.31 diff -c -b -r1.31 freelist.c *** src/backend/storage/buffer/freelist.c 2003/08/04 02:40:03 1.31 --- src/backend/storage/buffer/freelist.c 2003/11/03 14:26:49 *************** *** 1,8 **** /*------------------------------------------------------------------------- * * freelist.c ! * routines for manipulating the buffer pool's replacement strategy ! * freelist. * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California --- 1,7 ---- /*------------------------------------------------------------------------- * * freelist.c ! * routines for manipulating the buffer pool's replacement strategy. * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California *************** *** 32,85 **** #include "storage/ipc.h" #include "storage/proc.h" ! static BufferDesc *SharedFreeList; /* ! * State-checking macros */ ! #define IsInQueue(bf) \ ! ( \ ! AssertMacro((bf->freeNext != INVALID_DESCRIPTOR)), \ ! AssertMacro((bf->freePrev != INVALID_DESCRIPTOR)), \ ! AssertMacro((bf->flags & BM_FREE)) \ ! ) ! #define IsNotInQueue(bf) \ ! ( \ ! AssertMacro((bf->freeNext == INVALID_DESCRIPTOR)), \ ! AssertMacro((bf->freePrev == INVALID_DESCRIPTOR)), \ ! AssertMacro(! (bf->flags & BM_FREE)) \ ! ) /* ! * AddBufferToFreelist * ! * In theory, this is the only routine that needs to be changed ! * if the buffer replacement strategy changes. Just change ! * the manner in which buffers are added to the freelist queue. ! * Currently, they are added on an LRU basis. */ ! static void ! AddBufferToFreelist(BufferDesc *bf) { ! #ifdef BMTRACE ! _bm_trace(bf->tag.relId.dbId, bf->tag.relId.relId, bf->tag.blockNum, ! BufferDescriptorGetBuffer(bf), BMT_DEALLOC); ! #endif /* BMTRACE */ ! IsNotInQueue(bf); ! /* change bf so it points to inFrontOfNew and its successor */ ! bf->freePrev = SharedFreeList->freePrev; ! bf->freeNext = Free_List_Descriptor; ! /* insert new into chain */ ! BufferDescriptors[bf->freeNext].freePrev = bf->buf_id; ! BufferDescriptors[bf->freePrev].freeNext = bf->buf_id; } #undef PinBuffer /* --- 31,612 ---- #include "storage/ipc.h" #include "storage/proc.h" + #define STRAT_LIST_UNUSED -1 + #define STRAT_LIST_B1 0 + #define STRAT_LIST_T1 1 + #define STRAT_LIST_T2 2 + #define STRAT_LIST_B2 3 + #define STRAT_NUM_LISTS 4 ! #ifndef MAX ! #define MAX(a,b) (((a) > (b)) ? (a) : (b)) ! #endif ! #ifndef MIN ! #define MIN(a,b) (((a) < (b)) ? (a) : (b)) ! #endif ! ! /* ! * The Cache Directory Block (CDB) of the Adaptive Replacement Cache (ARC) ! */ ! typedef struct bufstratcdb ! { ! int prev; ! int next; ! int list; ! BufferTag buf_tag; ! Buffer buf_id; ! } BufferStrategyCDB; ! ! /* ! * The shared ARC control information. ! */ ! typedef struct bufstratcontrol ! { ! ! int target_T1_size; /* What T1 size are we aiming for */ ! int listUnusedCDB; /* All unused StrategyCDB */ ! int listHead[STRAT_NUM_LISTS]; /* ARC lists B1, T1, T2 and B2 */ ! int listTail[STRAT_NUM_LISTS]; ! int listSize[STRAT_NUM_LISTS]; ! Buffer listFreeBuffers; /* List of unused buffers */ ! ! BufferStrategyCDB cdb[1]; /* The cache directory */ ! } BufferStrategyControl; ! ! ! static BufferStrategyControl *StrategyControl = NULL; ! static BufferStrategyCDB *StrategyCDB = NULL; ! ! static int strategy_cdb_found; ! static int strategy_cdb_replace; ! static int strategy_get_from; ! ! ! #define T1_TARGET StrategyControl->target_T1_size ! #define B1_LENGTH StrategyControl->listSize[STRAT_LIST_B1] ! #define T1_LENGTH StrategyControl->listSize[STRAT_LIST_T1] ! #define T2_LENGTH StrategyControl->listSize[STRAT_LIST_T2] ! #define B2_LENGTH StrategyControl->listSize[STRAT_LIST_B2] ! ! ! /* ! * Macro to remove a CDB from whichever list it currently is on ! */ ! #define STRAT_LIST_REMOVE(cdb) \ ! AssertMacro((cdb)->list >= 0 && (cdb)->list < STRAT_NUM_LISTS); \ ! if ((cdb)->prev < 0) \ ! StrategyControl->listHead[(cdb)->list] = (cdb)->next; \ ! else \ ! StrategyCDB[(cdb)->prev].next = (cdb)->next; \ ! if ((cdb)->next < 0) \ ! StrategyControl->listTail[(cdb)->list] = (cdb)->prev; \ ! else \ ! StrategyCDB[(cdb)->next].prev = (cdb)->prev; \ ! StrategyControl->listSize[(cdb)->list]--; \ ! (cdb)->list = STRAT_LIST_UNUSED; ! ! /* ! * Macro to add a CDB to the tail of a list (MRU position) ! */ ! #define STRAT_MRU_INSERT(cdb,l) \ ! AssertMacro((cdb)->list == STRAT_LIST_UNUSED); \ ! if (StrategyControl->listTail[(l)] < 0) \ ! { \ ! (cdb)->prev = (cdb)->next = -1; \ ! StrategyControl->listHead[(l)] = \ ! StrategyControl->listTail[(l)] = \ ! ((cdb) - StrategyCDB); \ ! } \ ! else \ ! { \ ! (cdb)->next = -1; \ ! (cdb)->prev = StrategyControl->listTail[(l)]; \ ! StrategyCDB[StrategyControl->listTail[(l)]].next = \ ! ((cdb) - StrategyCDB); \ ! StrategyControl->listTail[(l)] = \ ! ((cdb) - StrategyCDB); \ ! } \ ! StrategyControl->listSize[(l)]++; \ ! (cdb)->list = (l); ! ! ! /* ! * StrategyBufferLookup ! * ! * Lookup a page request in the cache directory. A buffer is only ! * returned for a T1 or T2 cache hit. B1 and B2 hits are only ! * remembered here to later affect the behaviour. ! */ ! BufferDesc * ! StrategyBufferLookup(BufferTag *tagPtr, bool recheck) ! { ! BufferStrategyCDB *cdb; ! ! /* ! * Lookup the block in the shared hash table ! */ ! strategy_cdb_found = BufTableLookup(tagPtr); ! ! /* ! * Handle CDB lookup miss ! */ ! if (strategy_cdb_found < 0) ! { ! if (!recheck) ! { ! /* ! * This is an initial lookup and we have a complete ! * cache miss (block found nowhere). This means we ! * remember according to the current T1 size and the ! * target T1 size from where we take a block if we ! * need one later. ! */ ! if (T1_LENGTH >= MAX(1, T1_TARGET)) ! strategy_get_from = STRAT_LIST_T1; ! else ! strategy_get_from = STRAT_LIST_T2; ! } ! ! /* report cache miss */ ! return NULL; ! } ! ! /* ! * We found a CDB ! */ ! cdb = &StrategyCDB[strategy_cdb_found]; ! ! /* ! * If this is a T1 or T2 hit, we simply move the CDB to the ! * T2 MRU position and return the found buffer. ! */ ! if (cdb->list == STRAT_LIST_T1 || cdb->list == STRAT_LIST_T2) ! { ! STRAT_LIST_REMOVE(cdb); ! STRAT_MRU_INSERT(cdb, STRAT_LIST_T2); ! ! return &BufferDescriptors[cdb->buf_id]; ! } ! ! /* ! * In the case of a recheck we don't care about B1 or B2 hits here. ! * The bufmgr does this call only to make sure noone faulted in the ! * block while we where busy flushing another. Now for this really ! * to end up as a B1 or B2 cache hit, we must have been flushing for ! * quite some time as the block not only must have been read, but ! * also traveled through the queue and evicted from the T cache again ! * already. ! */ ! if (recheck) ! return NULL; ! ! /* ! * Adjust the target size of the T1 cache depending on if this is ! * a B1 or B2 hit. ! */ ! switch (cdb->list) ! { ! case STRAT_LIST_B1: ! /* ! * B1 hit means that the T1 cache is probably too ! * small. Adjust the T1 target size and continue ! * below. ! */ ! T1_TARGET = MIN(T1_TARGET + MAX(B2_LENGTH / B1_LENGTH, 1), ! Data_Descriptors); ! break; ! ! case STRAT_LIST_B2: ! /* ! * B2 hit means that the T2 cache is probably too ! * small. Adjust the T1 target size and continue ! * below. ! */ ! T1_TARGET = MAX(T1_TARGET - MAX(B1_LENGTH / B2_LENGTH, 1), 0); ! break; ! ! default: ! elog(ERROR, "Buffer hash table corrupted - CDB on list %d found", ! cdb->list); ! } ! ! /* ! * Decide where to take from if we will be out of ! * free blocks later in StrategyGetBuffer(). ! */ ! if (T1_LENGTH >= MAX(1, T1_TARGET)) ! strategy_get_from = STRAT_LIST_T1; ! else ! strategy_get_from = STRAT_LIST_T2; ! ! /* ! * Even if we had seen the block in the past, it's data is ! * not currently in memory ... cache miss to the bufmgr. ! */ ! return NULL; ! } ! ! ! /* ! * StrategyGetBuffer ! * ! * Called by the bufmgr to get the next candidate buffer to use in ! * BufferAlloc(). The only hard requirement BufferAlloc() has is that ! * this buffer must not currently be pinned. ! */ ! BufferDesc * ! StrategyGetBuffer(void) ! { ! int cdb_id; ! BufferDesc *buf; ! ! if (StrategyControl->listFreeBuffers < 0) ! { ! /* We don't have a free buffer, must take one from T1 or T2 */ ! ! if (strategy_get_from == STRAT_LIST_T1) ! { ! /* ! * We should take the first unpinned buffer from T1. ! */ ! cdb_id = StrategyControl->listHead[STRAT_LIST_T1]; ! while (cdb_id >= 0) ! { ! buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id]; ! if (buf->refcount == 0) ! { ! strategy_cdb_replace = cdb_id; ! Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T1); ! return buf; ! } ! cdb_id = StrategyCDB[cdb_id].next; ! } ! ! /* ! * No unpinned T1 buffer found - pardon T2 cache. ! */ ! cdb_id = StrategyControl->listHead[STRAT_LIST_T2]; ! while (cdb_id >= 0) ! { ! buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id]; ! if (buf->refcount == 0) ! { ! strategy_cdb_replace = cdb_id; ! Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T2); ! return buf; ! } ! cdb_id = StrategyCDB[cdb_id].next; ! } ! ! /* ! * No unpinned buffers at all!!! ! */ ! elog(ERROR, "StrategyGetBuffer(): Out of unpinned buffers"); ! } ! else ! { ! /* ! * We should take the first unpinned buffer from T2. ! */ ! cdb_id = StrategyControl->listHead[STRAT_LIST_T2]; ! while (cdb_id >= 0) ! { ! buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id]; ! if (buf->refcount == 0) ! { ! strategy_cdb_replace = cdb_id; ! Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T2); ! return buf; ! } ! cdb_id = StrategyCDB[cdb_id].next; ! } ! ! /* ! * No unpinned T2 buffer found - pardon T1 cache. ! */ ! cdb_id = StrategyControl->listHead[STRAT_LIST_T1]; ! while (cdb_id >= 0) ! { ! buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id]; ! if (buf->refcount == 0) ! { ! strategy_cdb_replace = cdb_id; ! Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T1); ! return buf; ! } ! cdb_id = StrategyCDB[cdb_id].next; ! } ! ! /* ! * No unpinned buffers at all!!! ! */ ! elog(ERROR, "StrategyGetBuffer(): Out of unpinned buffers"); ! } ! } ! else ! { ! /* There is a completely free buffer available - take it */ ! ! /* ! * Note: This code uses the side effect that a free buffer ! * can never be pinned or dirty and therefore the call to ! * StrategyReplaceBuffer() will happen without the bufmgr ! * releasing the bufmgr-lock in the meantime. That means, ! * that there will never be any reason to recheck. Otherwise ! * we would leak shared buffers here! ! */ ! strategy_cdb_replace = -1; ! buf = &BufferDescriptors[StrategyControl->listFreeBuffers]; ! ! StrategyControl->listFreeBuffers = buf->bufNext; ! buf->bufNext = -1; + /* Buffer of freelist cannot be pinned */ + Assert(buf->refcount == 0); + + return buf; + } + + /* not reached */ + return NULL; + } + + /* ! * StrategyReplaceBuffer ! * ! * Called by the buffer manager to inform us that he possibly flushed ! * a buffer and is now about to replace the content. Prior to this call, ! * the cache algorithm still reports the buffer as in the cache. After ! * this call we report the new block, even if IO might still need to ! * start. */ + void + StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum) + { + BufferStrategyCDB *cdb_found; + BufferStrategyCDB *cdb_replace; ! if (strategy_cdb_found >= 0) ! { ! /* This was a ghost buffer cache hit (B1 or B2) */ ! cdb_found = &StrategyCDB[strategy_cdb_found]; ! ! /* Assert that the buffer remembered in cdb_found is the one */ ! /* the buffer manager is currently faulting in */ ! Assert(BUFFERTAG_EQUALS(&(cdb_found->buf_tag), rnode, blockNum)); ! ! if (strategy_cdb_replace >= 0) ! { ! /* We are satisfying it with an evicted T buffer */ ! cdb_replace = &StrategyCDB[strategy_cdb_replace]; ! /* Assert that the buffer remembered in cdb_replace is */ ! /* the one the buffer manager has just evicted */ ! Assert(cdb_replace->list == STRAT_LIST_T1 || ! cdb_replace->list == STRAT_LIST_T2); ! Assert(cdb_replace->buf_id == buf->buf_id); ! Assert(BUFFERTAGS_EQUAL(&(cdb_replace->buf_tag), &(buf->tag))); + /* Move the evicted T list entry to it's corresponding B list */ + if (cdb_replace->list == STRAT_LIST_T1) + { + STRAT_LIST_REMOVE(cdb_replace); + STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B1); + } + else + { + STRAT_LIST_REMOVE(cdb_replace); + STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B2); + } + /* And clear it's block reference */ + cdb_replace->buf_id = -1; + } + else + { + /* or we satisfy it with an unused buffer */ + } + /* Now the found B CDB get's the buffer and is moved to T2 */ + cdb_found->buf_id = buf->buf_id; + STRAT_LIST_REMOVE(cdb_found); + STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T2); + } + else + { + /* This was a complete cache miss, so we need to create */ + /* a new CDB */ + if (StrategyControl->listUnusedCDB >= 0) + { + /* We have an unused one, let's take that */ + cdb_found = &StrategyCDB[StrategyControl->listUnusedCDB]; + StrategyControl->listUnusedCDB = cdb_found->next; + } + else + { + /* We want to evict a CDB from B2 for it. Only if B2 is */ + /* empty we take it from B1. */ + if (B2_LENGTH > 0) + cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B2]]; + else + cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B1]]; + + /* Remove it's hashtable entry and take it from the list */ + BufTableDelete(&(cdb_found->buf_tag)); + STRAT_LIST_REMOVE(cdb_found); + } + + /* Set the CDB's buf_tag and insert the hash key */ + INIT_BUFFERTAG(&(cdb_found->buf_tag), rnode, blockNum); + BufTableInsert(&(cdb_found->buf_tag), (cdb_found - StrategyCDB)); + + if (strategy_cdb_replace >= 0) + { + /* The buffer was formerly in a T list, move it's CDB + * to the corresponding B list */ + cdb_replace = &StrategyCDB[strategy_cdb_replace]; + + Assert(cdb_replace->list == STRAT_LIST_T1 || + cdb_replace->list == STRAT_LIST_T2); + Assert(cdb_replace->buf_id == buf->buf_id); + Assert(BUFFERTAGS_EQUAL(&(cdb_replace->buf_tag), &(buf->tag))); + + if (cdb_replace->list == STRAT_LIST_T1) + { + STRAT_LIST_REMOVE(cdb_replace); + STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B1); + } + else + { + STRAT_LIST_REMOVE(cdb_replace); + STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B2); + } + /* And clear it's block reference */ + cdb_replace->buf_id = -1; + } + else + { + /* or we satisfy it with an unused buffer */ + } + + /* Assign the buffer id to the new CDB and add it to T1 */ + cdb_found->buf_id = buf->buf_id; + STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1); + } + } + + /* ! * StrategyInvalidateBuffer * ! * Called by the buffer manager to inform us that a buffer content ! * is no longer valid. We simply throw away any eventual existing ! * buffer hash entry and move the CDB and buffer to the free lists. */ ! void ! StrategyInvalidateBuffer(BufferDesc *buf) { ! int cdb_id; ! BufferStrategyCDB *cdb; ! ! cdb_id = BufTableLookup(&(buf->tag)); ! /* If we have the buffer somewhere in the directory, remove it ! * and add the CDB to the list of unused CDB's. */ ! if (cdb_id >= 0) ! { ! cdb = &StrategyCDB[cdb_id]; ! BufTableDelete(&(cdb->buf_tag)); ! STRAT_LIST_REMOVE(cdb); ! cdb->buf_id = -1; ! cdb->next = StrategyControl->listUnusedCDB; ! StrategyControl->listUnusedCDB = cdb_id; ! } ! /* Buffer is unreferenced now and should not contain any valid data ! * so add it to the list of free buffers */ ! buf->bufNext = StrategyControl->listFreeBuffers; ! StrategyControl->listFreeBuffers = buf->buf_id; } + + /* + * StrategyInitialize -- initialize the buffer cache replacement + * strategy. + * + * Assume: All of the buffers are already building a linked list. + * Only called by postmaster and only during initialization. + */ + void + StrategyInitialize(bool init) + { + bool found; + int i; + + /* + * Initialize the shared CDB lookup hashtable + */ + InitBufTable(Data_Descriptors * 2); + + /* + * Get or create the shared strategy control block and the CDB's + */ + StrategyControl = (BufferStrategyControl *) + ShmemInitStruct("Buffer Strategy Status", + sizeof(BufferStrategyControl) + + sizeof(BufferStrategyCDB) * (Data_Descriptors * 2 - 1), + &found); + StrategyCDB = &(StrategyControl->cdb[0]); + + if (!found) + { + /* + * Only done once, usually in postmaster + */ + Assert(init); + + /* + * Grab the whole linked list of free buffers for our + * strategy + */ + StrategyControl->listFreeBuffers = 0; + + /* + * We start off with a target T1 list size of + * half the available cache blocks. + */ + StrategyControl->target_T1_size = Data_Descriptors / 2; + + /* + * Initialize B1, T1, T2 and B2 lists to be empty + */ + for (i = 0; i < STRAT_NUM_LISTS; i++) + { + StrategyControl->listHead[i] = -1; + StrategyControl->listTail[i] = -1; + StrategyControl->listSize[i] = 0; + } + + /* + * All CDB's are linked as the listUnusedCDB + */ + for (i = 0; i < Data_Descriptors * 2; i++) + { + StrategyCDB[i].next = i + 1; + StrategyCDB[i].list = STRAT_LIST_UNUSED; + CLEAR_BUFFERTAG(&(StrategyCDB[i].buf_tag)); + StrategyCDB[i].buf_id = -1; + } + StrategyCDB[Data_Descriptors * 2 - 1].next = -1; + StrategyControl->listUnusedCDB = 0; + } + else + { + Assert(!init); + } + } + + #undef PinBuffer /* *************** *** 95,112 **** if (buf->refcount == 0) { - IsInQueue(buf); - - /* remove from freelist queue */ - BufferDescriptors[buf->freeNext].freePrev = buf->freePrev; - BufferDescriptors[buf->freePrev].freeNext = buf->freeNext; - buf->freeNext = buf->freePrev = INVALID_DESCRIPTOR; - /* mark buffer as no longer free */ buf->flags &= ~BM_FREE; } - else - IsNotInQueue(buf); if (PrivateRefCount[b] == 0) buf->refcount++; --- 622,630 ---- *************** *** 144,150 **** { int b = BufferDescriptorGetBuffer(buf) - 1; - IsNotInQueue(buf); Assert(buf->refcount > 0); Assert(PrivateRefCount[b] > 0); PrivateRefCount[b]--; --- 662,667 ---- *************** *** 154,160 **** if (buf->refcount == 0) { /* buffer is now unpinned */ - AddBufferToFreelist(buf); buf->flags |= BM_FREE; } else if ((buf->flags & BM_PIN_COUNT_WAITER) != 0 && --- 671,676 ---- *************** *** 186,249 **** } } #endif - - /* - * GetFreeBuffer() -- get the 'next' buffer from the freelist. - */ - BufferDesc * - GetFreeBuffer(void) - { - BufferDesc *buf; - - if (Free_List_Descriptor == SharedFreeList->freeNext) - { - /* queue is empty. All buffers in the buffer pool are pinned. */ - ereport(ERROR, - (errcode(ERRCODE_INSUFFICIENT_RESOURCES), - errmsg("out of free buffers"))); - return NULL; - } - buf = &(BufferDescriptors[SharedFreeList->freeNext]); - - /* remove from freelist queue */ - BufferDescriptors[buf->freeNext].freePrev = buf->freePrev; - BufferDescriptors[buf->freePrev].freeNext = buf->freeNext; - buf->freeNext = buf->freePrev = INVALID_DESCRIPTOR; - - buf->flags &= ~(BM_FREE); - - return buf; - } - - /* - * InitFreeList -- initialize the dummy buffer descriptor used - * as a freelist head. - * - * Assume: All of the buffers are already linked in a circular - * queue. Only called by postmaster and only during - * initialization. - */ - void - InitFreeList(bool init) - { - SharedFreeList = &(BufferDescriptors[Free_List_Descriptor]); - - if (init) - { - /* we only do this once, normally in the postmaster */ - SharedFreeList->data = INVALID_OFFSET; - SharedFreeList->flags = 0; - SharedFreeList->flags &= ~(BM_VALID | BM_DELETED | BM_FREE); - SharedFreeList->buf_id = Free_List_Descriptor; - - /* insert it into a random spot in the circular queue */ - SharedFreeList->freeNext = BufferDescriptors[0].freeNext; - SharedFreeList->freePrev = 0; - BufferDescriptors[SharedFreeList->freeNext].freePrev = - BufferDescriptors[SharedFreeList->freePrev].freeNext = - Free_List_Descriptor; - } - } /* --- 702,707 ---- Index: src/include/storage/buf_internals.h =================================================================== RCS file: /home/pgsql/CvsRoot/pgsql-server/src/include/storage/buf_internals.h,v retrieving revision 1.61 diff -c -b -r1.61 buf_internals.h *** src/include/storage/buf_internals.h 2003/08/04 02:40:14 1.61 --- src/include/storage/buf_internals.h 2003/11/02 21:58:52 *************** *** 72,88 **** (a)->rnode = (xx_reln)->rd_node \ ) /* * BufferDesc -- shared buffer cache metadata for a single * shared buffer descriptor. */ typedef struct sbufdesc { ! Buffer freeNext; /* links for freelist chain */ ! Buffer freePrev; SHMEM_OFFSET data; /* pointer to data in buf pool */ ! /* tag and id must be together for table lookup (still true?) */ BufferTag tag; /* file/block identifier */ int buf_id; /* buffer's index number (from 0) */ --- 72,100 ---- (a)->rnode = (xx_reln)->rd_node \ ) + #define BUFFERTAG_EQUALS(a,xx_reln,xx_blockNum) \ + ( \ + (a)->rnode.tblNode == (xx_reln)->rd_node.tblNode && \ + (a)->rnode.relNode == (xx_reln)->rd_node.relNode && \ + (a)->blockNum == (xx_blockNum) \ + ) + #define BUFFERTAGS_EQUAL(a,b) \ + ( \ + (a)->rnode.tblNode == (b)->rnode.tblNode && \ + (a)->rnode.relNode == (b)->rnode.relNode && \ + (a)->blockNum == (b)->blockNum \ + ) + /* * BufferDesc -- shared buffer cache metadata for a single * shared buffer descriptor. */ typedef struct sbufdesc { ! Buffer bufNext; /* link in freelist chain */ SHMEM_OFFSET data; /* pointer to data in buf pool */ ! /* tag and id must be together for table lookup */ BufferTag tag; /* file/block identifier */ int buf_id; /* buffer's index number (from 0) */ *************** *** 107,112 **** --- 119,125 ---- #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1) + /* * Each backend has its own BufferLocks[] array holding flag bits * showing what locks it has set on each buffer. *************** *** 167,180 **** /*freelist.c*/ extern void PinBuffer(BufferDesc *buf); extern void UnpinBuffer(BufferDesc *buf); ! extern BufferDesc *GetFreeBuffer(void); ! extern void InitFreeList(bool init); /* buf_table.c */ ! extern void InitBufTable(void); ! extern BufferDesc *BufTableLookup(BufferTag *tagPtr); ! extern bool BufTableDelete(BufferDesc *buf); ! extern bool BufTableInsert(BufferDesc *buf); /* bufmgr.c */ extern BufferDesc *BufferDescriptors; --- 180,196 ---- /*freelist.c*/ extern void PinBuffer(BufferDesc *buf); extern void UnpinBuffer(BufferDesc *buf); ! extern BufferDesc *StrategyBufferLookup(BufferTag *tagPtr, bool recheck); ! extern BufferDesc *StrategyGetBuffer(void); ! extern void StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum); ! extern void StrategyInvalidateBuffer(BufferDesc *buf); ! extern void StrategyInitialize(bool init); /* buf_table.c */ ! extern void InitBufTable(int size); ! extern int BufTableLookup(BufferTag *tagPtr); ! extern bool BufTableInsert(BufferTag *tagPtr, Buffer buf_id); ! extern bool BufTableDelete(BufferTag *tagPtr); /* bufmgr.c */ extern BufferDesc *BufferDescriptors;