Index: doc/src/sgml/runtime.sgml
===================================================================
RCS file: /var/lib/cvs/pgsql/doc/src/sgml/runtime.sgml,v
retrieving revision 1.303
diff -c -r1.303 runtime.sgml
*** doc/src/sgml/runtime.sgml 23 Jan 2005 00:30:18 -0000 1.303
--- doc/src/sgml/runtime.sgml 24 Jan 2005 00:26:11 -0000
***************
*** 1393,1414 ****
-
- bgwriter_percent (integer)
-
- bgwriter_percent> configuration parameter
-
-
-
- In each round, no more than this percentage of the currently
- dirty buffers will be written (rounding up any fraction to
- the next whole number of buffers). The default value is
- 1. This option can only be set at server start or in the
- postgresql.conf file.
-
-
-
-
bgwriter_maxpages (integer)
--- 1393,1398 ----
***************
*** 1421,1439 ****
set at server start or in the
postgresql.conf file.
-
-
- Smaller values of bgwriter_percent and
- bgwriter_maxpages reduce the extra I/O load
- caused by the background writer, but leave more work to be done
- at checkpoint time. To reduce load spikes at checkpoints,
- increase the values. To disable background writing entirely,
- set bgwriter_percent and/or
- bgwriter_maxpages to zero.
-
--- 1405,1421 ----
set at server start or in the
postgresql.conf file.
+
+
+ Decreasing bgwriter_maxpages or increasing
+ bgwriter_delay will reduce the extra I/O
+ load caused by the background writer, but leaves more work to
+ be done at checkpoint time. To disable background writing
+ entirely, set bgwriter_maxpages to zero.
+
***************
*** 3866,3885 ****
-
- debug_shared_buffers (integer)
-
- debug_shared_buffers> configuration parameter
-
-
-
- Number of seconds between ARC reports.
- If set greater than zero, emit ARC statistics to the log every so many
- seconds. Zero (the default) disables reporting.
-
-
-
-
pre_auth_delay (integer)
--- 3848,3853 ----
Index: src/backend/catalog/index.c
===================================================================
RCS file: /var/lib/cvs/pgsql/src/backend/catalog/index.c,v
retrieving revision 1.244
diff -c -r1.244 index.c
*** src/backend/catalog/index.c 10 Jan 2005 20:02:19 -0000 1.244
--- src/backend/catalog/index.c 24 Jan 2005 00:18:49 -0000
***************
*** 1060,1066 ****
/* Send out shared cache inval if necessary */
if (!IsBootstrapProcessingMode())
CacheInvalidateHeapTuple(pg_class, tuple);
! BufferSync(-1, -1);
}
else if (dirty)
{
--- 1060,1066 ----
/* Send out shared cache inval if necessary */
if (!IsBootstrapProcessingMode())
CacheInvalidateHeapTuple(pg_class, tuple);
! BufferSync(-1);
}
else if (dirty)
{
Index: src/backend/commands/dbcommands.c
===================================================================
RCS file: /var/lib/cvs/pgsql/src/backend/commands/dbcommands.c,v
retrieving revision 1.148
diff -c -r1.148 dbcommands.c
*** src/backend/commands/dbcommands.c 31 Dec 2004 21:59:41 -0000 1.148
--- src/backend/commands/dbcommands.c 24 Jan 2005 00:18:49 -0000
***************
*** 332,338 ****
* up-to-date for the copy. (We really only need to flush buffers for
* the source database, but bufmgr.c provides no API for that.)
*/
! BufferSync(-1, -1);
/*
* Close virtual file descriptors so the kernel has more available for
--- 332,338 ----
* up-to-date for the copy. (We really only need to flush buffers for
* the source database, but bufmgr.c provides no API for that.)
*/
! BufferSync(-1);
/*
* Close virtual file descriptors so the kernel has more available for
***************
*** 1206,1212 ****
* up-to-date for the copy. (We really only need to flush buffers for
* the source database, but bufmgr.c provides no API for that.)
*/
! BufferSync(-1, -1);
#ifndef WIN32
--- 1206,1212 ----
* up-to-date for the copy. (We really only need to flush buffers for
* the source database, but bufmgr.c provides no API for that.)
*/
! BufferSync(-1);
#ifndef WIN32
Index: src/backend/commands/vacuum.c
===================================================================
RCS file: /var/lib/cvs/pgsql/src/backend/commands/vacuum.c,v
retrieving revision 1.299
diff -c -r1.299 vacuum.c
*** src/backend/commands/vacuum.c 31 Dec 2004 21:59:42 -0000 1.299
--- src/backend/commands/vacuum.c 24 Jan 2005 00:18:49 -0000
***************
*** 430,443 ****
old_context = MemoryContextSwitchTo(anl_context);
/*
! * Tell the buffer replacement strategy that vacuum is
! * causing the IO
*/
! StrategyHintVacuum(true);
!
analyze_rel(relid, vacstmt);
!
! StrategyHintVacuum(false);
if (use_own_xacts)
CommitTransactionCommand();
--- 430,441 ----
old_context = MemoryContextSwitchTo(anl_context);
/*
! * Tell the buffer manager that vacuum is causing the
! * IO
*/
! SetVacuumHint(true);
analyze_rel(relid, vacstmt);
! SetVacuumHint(false);
if (use_own_xacts)
CommitTransactionCommand();
***************
*** 453,458 ****
--- 451,458 ----
{
/* Make sure cost accounting is turned off after error */
VacuumCostActive = false;
+ /* Also turn off vacuum activity hint */
+ SetVacuumHint(false);
PG_RE_THROW();
}
PG_END_TRY();
***************
*** 890,899 ****
ActiveSnapshot = CopySnapshot(GetTransactionSnapshot());
/*
! * Tell the cache replacement strategy that vacuum is causing all
! * following IO
*/
! StrategyHintVacuum(true);
/*
* Check for user-requested abort. Note we want this to be inside a
--- 890,898 ----
ActiveSnapshot = CopySnapshot(GetTransactionSnapshot());
/*
! * Tell the buffer manager that vacuum is causing all following IO
*/
! SetVacuumHint(true);
/*
* Check for user-requested abort. Note we want this to be inside a
***************
*** 909,915 ****
ObjectIdGetDatum(relid),
0, 0, 0))
{
! StrategyHintVacuum(false);
CommitTransactionCommand();
return true; /* okay 'cause no data there */
}
--- 908,914 ----
ObjectIdGetDatum(relid),
0, 0, 0))
{
! SetVacuumHint(false);
CommitTransactionCommand();
return true; /* okay 'cause no data there */
}
***************
*** 943,949 ****
(errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
RelationGetRelationName(onerel))));
relation_close(onerel, lmode);
! StrategyHintVacuum(false);
CommitTransactionCommand();
return false;
}
--- 942,948 ----
(errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
RelationGetRelationName(onerel))));
relation_close(onerel, lmode);
! SetVacuumHint(false);
CommitTransactionCommand();
return false;
}
***************
*** 958,964 ****
(errmsg("skipping \"%s\" --- cannot vacuum indexes, views, or special system tables",
RelationGetRelationName(onerel))));
relation_close(onerel, lmode);
! StrategyHintVacuum(false);
CommitTransactionCommand();
return false;
}
--- 957,963 ----
(errmsg("skipping \"%s\" --- cannot vacuum indexes, views, or special system tables",
RelationGetRelationName(onerel))));
relation_close(onerel, lmode);
! SetVacuumHint(false);
CommitTransactionCommand();
return false;
}
***************
*** 973,979 ****
if (isOtherTempNamespace(RelationGetNamespace(onerel)))
{
relation_close(onerel, lmode);
! StrategyHintVacuum(false);
CommitTransactionCommand();
return true; /* assume no long-lived data in temp
* tables */
--- 972,978 ----
if (isOtherTempNamespace(RelationGetNamespace(onerel)))
{
relation_close(onerel, lmode);
! SetVacuumHint(false);
CommitTransactionCommand();
return true; /* assume no long-lived data in temp
* tables */
***************
*** 1013,1019 ****
/*
* Complete the transaction and free all temporary memory used.
*/
! StrategyHintVacuum(false);
CommitTransactionCommand();
/*
--- 1012,1018 ----
/*
* Complete the transaction and free all temporary memory used.
*/
! SetVacuumHint(false);
CommitTransactionCommand();
/*
Index: src/backend/postmaster/bgwriter.c
===================================================================
RCS file: /var/lib/cvs/pgsql/src/backend/postmaster/bgwriter.c,v
retrieving revision 1.13
diff -c -r1.13 bgwriter.c
*** src/backend/postmaster/bgwriter.c 10 Jan 2005 20:02:20 -0000 1.13
--- src/backend/postmaster/bgwriter.c 24 Jan 2005 00:20:21 -0000
***************
*** 116,122 ****
* GUC parameters
*/
int BgWriterDelay = 200;
- int BgWriterPercent = 1;
int BgWriterMaxPages = 100;
int CheckPointTimeout = 300;
--- 116,121 ----
***************
*** 370,376 ****
n = 1;
}
else
! n = BufferSync(BgWriterPercent, BgWriterMaxPages);
/*
* Nap for the configured time or sleep for 10 seconds if there
--- 369,375 ----
n = 1;
}
else
! n = BufferSync(BgWriterMaxPages);
/*
* Nap for the configured time or sleep for 10 seconds if there
Index: src/backend/storage/buffer/buf_init.c
===================================================================
RCS file: /var/lib/cvs/pgsql/src/backend/storage/buffer/buf_init.c,v
retrieving revision 1.70
diff -c -r1.70 buf_init.c
*** src/backend/storage/buffer/buf_init.c 31 Dec 2004 22:00:49 -0000 1.70
--- src/backend/storage/buffer/buf_init.c 24 Jan 2005 05:01:02 -0000
***************
*** 33,39 ****
/*
* Data Structures:
! * buffers live in a freelist and a lookup data structure.
*
*
* Buffer Lookup:
--- 33,39 ----
/*
* Data Structures:
! * buffers live in a freelist and a lookup hash table.
*
*
* Buffer Lookup:
***************
*** 51,59 ****
* Synchronization/Locking:
*
* BufMgrLock lock -- must be acquired before manipulating the
! * buffer search datastructures (lookup/freelist, as well as the
! * flag bits of any buffer). Must be released
! * before exit and before doing any IO.
*
* IO_IN_PROGRESS -- this is a flag in the buffer descriptor.
* It must be set when an IO is initiated and cleared at
--- 51,59 ----
* Synchronization/Locking:
*
* BufMgrLock lock -- must be acquired before manipulating the
! * buffer search data structures (lookup/freelist, as well as the
! * flag bits of any buffer). Must be released before exit and
! * before doing any IO.
*
* IO_IN_PROGRESS -- this is a flag in the buffer descriptor.
* It must be set when an IO is initiated and cleared at
***************
*** 80,87 ****
/*
* Initialize shared buffer pool
*
! * This is called once during shared-memory initialization (either in the
! * postmaster, or in a standalone backend).
*/
void
InitBufferPool(void)
--- 80,88 ----
/*
* Initialize shared buffer pool
*
! * This is called once during shared-memory initialization (either in
! * the postmaster, a standalone backend, or a forked backend in the
! * case of EXEC_BACKEND).
*/
void
InitBufferPool(void)
***************
*** 101,107 ****
if (foundDescs || foundBufs)
{
! /* both should be present or neither */
Assert(foundDescs && foundBufs);
}
else
--- 102,113 ----
if (foundDescs || foundBufs)
{
! /*
! * In the case of EXEC_BACKEND, we expect the shared data
! * structures to already exist, we just need to attach to
! * them. However, we expect both of them to exist if either
! * does.
! */
Assert(foundDescs && foundBufs);
}
else
***************
*** 120,140 ****
block = BufferBlocks;
/*
! * link the buffers into a single linked list. This will become
! * the LIFO list of unused buffers returned by
! * StrategyGetBuffer().
*/
for (i = 0; i < NBuffers; block += BLCKSZ, buf++, i++)
{
Assert(ShmemIsValid((unsigned long) block));
! buf->bufNext = i + 1;
CLEAR_BUFFERTAG(buf->tag);
buf->buf_id = i;
buf->data = MAKE_OFFSET(block);
! buf->flags = 0;
buf->refcount = 0;
buf->io_in_progress_lock = LWLockAssign();
buf->cntx_lock = LWLockAssign();
--- 126,150 ----
block = BufferBlocks;
/*
! * Link the buffers into a doubly-linked list
*/
for (i = 0; i < NBuffers; block += BLCKSZ, buf++, i++)
{
Assert(ShmemIsValid((unsigned long) block));
! /*
! * Note that Buffers are indexed beginning at 1, whereas
! * array indices (such as "i") begin at 0. Yes, this is
! * confusing, perhaps needlessly so.
! */
! buf->freeNext = i + 1 + 1;
! buf->freePrev = i + 1 - 1;
CLEAR_BUFFERTAG(buf->tag);
buf->buf_id = i;
buf->data = MAKE_OFFSET(block);
! buf->flags = BM_DELETED;
buf->refcount = 0;
buf->io_in_progress_lock = LWLockAssign();
buf->cntx_lock = LWLockAssign();
***************
*** 142,155 ****
buf->wait_backend_id = 0;
}
! /* Correct last entry */
! BufferDescriptors[NBuffers - 1].bufNext = -1;
LWLockRelease(BufMgrLock);
}
! /* Init other shared buffer-management stuff */
! StrategyInitialize(!foundDescs);
}
/*
--- 152,169 ----
buf->wait_backend_id = 0;
}
! /* Correct first and last entries */
! BufferDescriptors[0].freePrev = InvalidBuffer;
! BufferDescriptors[NBuffers - 1].freeNext = InvalidBuffer;
LWLockRelease(BufMgrLock);
}
! /* Setup lookup hash table */
! InitBufTable(NBuffers);
!
! /* Remaining free list initialization */
! InitFreeList();
}
/*
***************
*** 202,214 ****
size += NBuffers * MAXALIGN(BLCKSZ);
/* size of buffer hash table */
! size += hash_estimate_size(NBuffers * 2, sizeof(BufferLookupEnt));
!
! /* size of the shared replacement strategy control block */
! size += MAXALIGN(sizeof(BufferStrategyControl));
! /* size of the ARC directory blocks */
! size += MAXALIGN(NBuffers * 2 * sizeof(BufferStrategyCDB));
return size;
}
--- 216,225 ----
size += NBuffers * MAXALIGN(BLCKSZ);
/* size of buffer hash table */
! size += hash_estimate_size(NBuffers, sizeof(BufferLookupEnt));
! /* size of the shared control data for free list */
! size += MAXALIGN(sizeof(FreeListControl));
return size;
}
Index: src/backend/storage/buffer/buf_table.c
===================================================================
RCS file: /var/lib/cvs/pgsql/src/backend/storage/buffer/buf_table.c,v
retrieving revision 1.38
diff -c -r1.38 buf_table.c
*** src/backend/storage/buffer/buf_table.c 31 Dec 2004 22:00:49 -0000 1.38
--- src/backend/storage/buffer/buf_table.c 24 Jan 2005 03:32:26 -0000
***************
*** 31,37 ****
/*
* Initialize shmem hash table for mapping buffers
! * size is the desired hash table size (2*NBuffers for ARC algorithm)
*/
void
InitBufTable(int size)
--- 31,37 ----
/*
* Initialize shmem hash table for mapping buffers
! * size is the desired hash table size
*/
void
InitBufTable(int size)
***************
*** 56,91 ****
/*
* BufTableLookup
! * Lookup the given BufferTag; return CDB index, or -1 if not found
*/
! int
BufTableLookup(BufferTag *tagPtr)
{
BufferLookupEnt *result;
if (tagPtr->blockNum == P_NEW)
! return -1;
result = (BufferLookupEnt *)
hash_search(SharedBufHash, (void *) tagPtr, HASH_FIND, NULL);
if (!result)
! return -1;
! return result->id;
}
/*
* BufTableInsert
! * Insert a hashtable entry for given tag and CDB index
*/
void
! BufTableInsert(BufferTag *tagPtr, int cdb_id)
{
BufferLookupEnt *result;
bool found;
result = (BufferLookupEnt *)
! hash_search(SharedBufHash, (void *) tagPtr, HASH_ENTER, &found);
if (!result)
ereport(ERROR,
--- 56,112 ----
/*
* BufTableLookup
! * Lookup the given BufferTag; return the buffer's descriptor, or
! * NULL if not found
*/
! BufferDesc *
BufTableLookup(BufferTag *tagPtr)
{
BufferLookupEnt *result;
+ BufferDesc *desc;
+ /*
+ * XXX: this is just a performance hack to avoid the need to do a
+ * hash table lookup for a buffer that obviously isn't in the
+ * table, right?
+ */
if (tagPtr->blockNum == P_NEW)
! return NULL;
result = (BufferLookupEnt *)
hash_search(SharedBufHash, (void *) tagPtr, HASH_FIND, NULL);
if (!result)
! return NULL;
! desc = &(BufferDescriptors[result->id]);
!
! /*
! * Sanity checks: make sure that we found the right buffer and
! * that the buffer is not marked as deleted (or else it shouldn't
! * be in the hash table)
! */
! Assert(desc->buf_id == result->id);
! Assert(!(desc->flags & BM_DELETED));
!
! return desc;
}
/*
* BufTableInsert
! * Insert a hashtable entry for given buffer
*/
void
! BufTableInsert(BufferDesc *buf)
{
BufferLookupEnt *result;
bool found;
+ /* cannot insert it twice */
+ Assert(buf->flags & BM_DELETED);
+ buf->flags &= ~(BM_DELETED);
+
result = (BufferLookupEnt *)
! hash_search(SharedBufHash, (void *) &(buf->tag), HASH_ENTER, &found);
if (!result)
ereport(ERROR,
***************
*** 95,115 ****
if (found) /* found something else in the table? */
elog(ERROR, "shared buffer hash table corrupted");
! result->id = cdb_id;
}
/*
* BufTableDelete
! * Delete the hashtable entry for given tag
*/
void
! BufTableDelete(BufferTag *tagPtr)
{
BufferLookupEnt *result;
result = (BufferLookupEnt *)
! hash_search(SharedBufHash, (void *) tagPtr, HASH_REMOVE, NULL);
if (!result) /* shouldn't happen */
elog(ERROR, "shared buffer hash table corrupted");
}
--- 116,156 ----
if (found) /* found something else in the table? */
elog(ERROR, "shared buffer hash table corrupted");
! result->id = buf->buf_id;
}
/*
* BufTableDelete
! * Delete the hashtable entry for the given buffer
*/
void
! BufTableDelete(BufferDesc *buf)
{
BufferLookupEnt *result;
+ /*
+ * If the buffer has not yet been initialized or has been removed
+ * from the hash table already, don't try to remove it again.
+ */
+ if (buf->flags & BM_DELETED)
+ return;
+
+ buf->flags |= BM_DELETED;
+
result = (BufferLookupEnt *)
! hash_search(SharedBufHash, (void *) &(buf->tag), HASH_REMOVE, NULL);
if (!result) /* shouldn't happen */
elog(ERROR, "shared buffer hash table corrupted");
+
+ /* sanity check that we deleted the right buffer */
+ Assert(result->id == buf->buf_id);
+
+ /*
+ * Clear the buffer's tag. This doesn't matter for the hash table,
+ * since the buffer is already removed from it, but it ensures that
+ * sequential searches through the buffer table won't think the buffer
+ * is still valid for its old page.
+ */
+ CLEAR_BUFFERTAG(buf->tag);
}
Index: src/backend/storage/buffer/bufmgr.c
===================================================================
RCS file: /var/lib/cvs/pgsql/src/backend/storage/buffer/bufmgr.c,v
retrieving revision 1.185
diff -c -r1.185 bufmgr.c
*** src/backend/storage/buffer/bufmgr.c 10 Jan 2005 20:02:21 -0000 1.185
--- src/backend/storage/buffer/bufmgr.c 24 Jan 2005 04:38:22 -0000
***************
*** 73,80 ****
static BufferDesc *PinCountWaitBuf = NULL;
- static void PinBuffer(BufferDesc *buf, bool fixOwner);
- static void UnpinBuffer(BufferDesc *buf, bool fixOwner);
static void WaitIO(BufferDesc *buf);
static void StartBufferIO(BufferDesc *buf, bool forInput);
static void TerminateBufferIO(BufferDesc *buf, int err_flag);
--- 73,78 ----
***************
*** 274,288 ****
BufferTag newTag; /* identity of requested block */
BufferDesc *buf,
*buf2;
- int cdb_found_index,
- cdb_replace_index;
bool inProgress; /* did we already do StartBufferIO? */
/* create a tag so we can lookup the buffer */
INIT_BUFFERTAG(newTag, reln, blockNum);
/* see if the block is in the buffer pool already */
! buf = StrategyBufferLookup(&newTag, false, &cdb_found_index);
if (buf != NULL)
{
/*
--- 272,284 ----
BufferTag newTag; /* identity of requested block */
BufferDesc *buf,
*buf2;
bool inProgress; /* did we already do StartBufferIO? */
/* create a tag so we can lookup the buffer */
INIT_BUFFERTAG(newTag, reln, blockNum);
/* see if the block is in the buffer pool already */
! buf = BufTableLookup(&newTag);
if (buf != NULL)
{
/*
***************
*** 293,299 ****
* is writing rather than reading.)
*/
*foundPtr = TRUE;
-
PinBuffer(buf, true);
if (!(buf->flags & BM_VALID))
--- 289,294 ----
***************
*** 329,355 ****
inProgress = FALSE;
do
{
! buf = StrategyGetBuffer(&cdb_replace_index);
!
! /* StrategyGetBuffer will elog if it can't find a free buffer */
! Assert(buf);
/*
! * There should be exactly one pin on the buffer after it is
! * allocated -- ours. If it had a pin it wouldn't have been on
! * the free list. No one else could have pinned it between
! * StrategyGetBuffer and here because we have the BufMgrLock.
! *
! * (We must pin the buffer before releasing BufMgrLock ourselves,
! * to ensure StrategyGetBuffer won't give the same buffer to someone
! * else.)
*/
! Assert(buf->refcount == 0);
! buf->refcount = 1;
! PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 1;
!
! ResourceOwnerRememberBuffer(CurrentResourceOwner,
! BufferDescriptorGetBuffer(buf));
if ((buf->flags & BM_VALID) &&
(buf->flags & BM_DIRTY || buf->cntxDirty))
--- 324,339 ----
inProgress = FALSE;
do
{
! buf = GetFreeBuffer();
/*
! * GetFreeBuffer() will return a pinned buffer; since the
! * buffer has just been removed from the free list, there
! * should be no other pins on it (no one else could have
! * pinned it since we still hold the BufMgrLock).
*/
! Assert(buf->refcount == 1);
! Assert(PrivateRefCount[buf->buf_id] == 1);
if ((buf->flags & BM_VALID) &&
(buf->flags & BM_DIRTY || buf->cntxDirty))
***************
*** 397,403 ****
* cdb_found_index, since the CDB could have disappeared from
* B1/B2 list while we were writing.
*/
! buf2 = StrategyBufferLookup(&newTag, true, &cdb_found_index);
if (buf2 != NULL)
{
/*
--- 381,387 ----
* cdb_found_index, since the CDB could have disappeared from
* B1/B2 list while we were writing.
*/
! buf2 = BufTableLookup(&newTag);
if (buf2 != NULL)
{
/*
***************
*** 412,420 ****
buf = buf2;
/* remaining code should match code at top of routine */
-
*foundPtr = TRUE;
-
PinBuffer(buf, true);
if (!(buf->flags & BM_VALID))
--- 396,402 ----
***************
*** 464,478 ****
*/
/*
! * Tell the buffer replacement strategy that we are replacing the
! * buffer content. Then rename the buffer. Clearing BM_VALID here is
! * necessary, clearing the dirtybits is just paranoia.
*/
! StrategyReplaceBuffer(buf, &newTag, cdb_found_index, cdb_replace_index);
buf->tag = newTag;
buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
buf->cntxDirty = false;
/*
* Buffer contents are currently invalid. Have to mark IO IN PROGRESS
* so no one fiddles with them until the read completes. We may have
--- 446,462 ----
*/
/*
! * Rename the buffer. Clearing BM_VALID here is necessary,
! * clearing the dirtybits is just paranoia.
*/
! BufTableDelete(buf);
!
buf->tag = newTag;
buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
buf->cntxDirty = false;
+ BufTableInsert(buf);
+
/*
* Buffer contents are currently invalid. Have to mark IO IN PROGRESS
* so no one fiddles with them until the read completes. We may have
***************
*** 617,731 ****
}
/*
- * PinBuffer -- make buffer unavailable for replacement.
- *
- * This should be applied only to shared buffers, never local ones.
- * Bufmgr lock must be held by caller.
- *
- * Most but not all callers want CurrentResourceOwner to be adjusted.
- * Note that ResourceOwnerEnlargeBuffers must have been done already.
- */
- static void
- PinBuffer(BufferDesc *buf, bool fixOwner)
- {
- int b = BufferDescriptorGetBuffer(buf) - 1;
-
- if (PrivateRefCount[b] == 0)
- buf->refcount++;
- PrivateRefCount[b]++;
- Assert(PrivateRefCount[b] > 0);
- if (fixOwner)
- ResourceOwnerRememberBuffer(CurrentResourceOwner,
- BufferDescriptorGetBuffer(buf));
- }
-
- /*
- * UnpinBuffer -- make buffer available for replacement.
- *
- * This should be applied only to shared buffers, never local ones.
- * Bufmgr lock must be held by caller.
- *
- * Most but not all callers want CurrentResourceOwner to be adjusted.
- */
- static void
- UnpinBuffer(BufferDesc *buf, bool fixOwner)
- {
- int b = BufferDescriptorGetBuffer(buf) - 1;
-
- if (fixOwner)
- ResourceOwnerForgetBuffer(CurrentResourceOwner,
- BufferDescriptorGetBuffer(buf));
-
- Assert(buf->refcount > 0);
- Assert(PrivateRefCount[b] > 0);
- PrivateRefCount[b]--;
- if (PrivateRefCount[b] == 0)
- {
- buf->refcount--;
- /* I'd better not still hold any locks on the buffer */
- Assert(!LWLockHeldByMe(buf->cntx_lock));
- Assert(!LWLockHeldByMe(buf->io_in_progress_lock));
- }
-
- if ((buf->flags & BM_PIN_COUNT_WAITER) != 0 &&
- buf->refcount == 1)
- {
- /* we just released the last pin other than the waiter's */
- buf->flags &= ~BM_PIN_COUNT_WAITER;
- ProcSendSignal(buf->wait_backend_id);
- }
- else
- {
- /* do nothing */
- }
- }
-
- /*
* BufferSync -- Write out dirty buffers in the pool.
*
! * This is called at checkpoint time to write out all dirty shared buffers,
! * and by the background writer process to write out some of the dirty blocks.
! * percent/maxpages should be -1 in the former case, and limit values (>= 0)
! * in the latter.
*
* Returns the number of buffers written.
*/
int
! BufferSync(int percent, int maxpages)
{
BufferDesc **dirty_buffers;
BufferTag *buftags;
int num_buffer_dirty;
int i;
! /* If either limit is zero then we are disabled from doing anything... */
! if (percent == 0 || maxpages == 0)
return 0;
/*
! * Get a list of all currently dirty buffers and how many there are.
! * We do not flush buffers that get dirtied after we started. They
! * have to wait until the next checkpoint.
*/
! dirty_buffers = (BufferDesc **) palloc(NBuffers * sizeof(BufferDesc *));
! buftags = (BufferTag *) palloc(NBuffers * sizeof(BufferTag));
LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
! num_buffer_dirty = StrategyDirtyBufferList(dirty_buffers, buftags,
! NBuffers);
! /*
! * If called by the background writer, we are usually asked to only
! * write out some portion of dirty buffers now, to prevent the IO
! * storm at checkpoint time.
! */
! if (percent > 0)
! {
! Assert(percent <= 100);
! num_buffer_dirty = (num_buffer_dirty * percent + 99) / 100;
! }
! if (maxpages > 0 && num_buffer_dirty > maxpages)
! num_buffer_dirty = maxpages;
/* Make sure we can handle the pin inside the loop */
ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
--- 601,651 ----
}
/*
* BufferSync -- Write out dirty buffers in the pool.
*
! * This is called at checkpoint time to write out all dirty shared
! * buffers, and by the background writer process to write out some of
! * the dirty blocks. maxpages should be -1 in the former case, and a
! * limit value (>= 0) in the latter.
*
* Returns the number of buffers written.
*/
int
! BufferSync(int maxpages)
{
BufferDesc **dirty_buffers;
BufferTag *buftags;
int num_buffer_dirty;
int i;
+ bool is_checkpoint = false;
! /* If the limit is zero then we are disabled from doing anything... */
! if (maxpages == 0)
return 0;
/*
! * If called by the checkpoint process, we want to get a list of
! * all dirty buffers. If called by the background writer, we want
! * to get a list of up to N unpinned, dirty buffers.
*/
! if (maxpages == -1) /* checkpoint process */
! {
! maxpages = NBuffers;
! is_checkpoint = true;
! }
!
! dirty_buffers = (BufferDesc **) palloc(maxpages * sizeof(BufferDesc *));
! buftags = (BufferTag *) palloc(maxpages * sizeof(BufferTag));
LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
! if (is_checkpoint)
! num_buffer_dirty = GetAllDirtyBuffers(dirty_buffers, buftags);
! else
! num_buffer_dirty = GetUnpinnedDirtyBuffers(dirty_buffers,
! buftags,
! maxpages);
! Assert(num_buffer_dirty <= maxpages);
/* Make sure we can handle the pin inside the loop */
ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
***************
*** 757,762 ****
--- 677,687 ----
continue;
/*
+ * XXX: if this is the bgwriter, should we check that the page
+ * is still unreferenced?
+ */
+
+ /*
* IO synchronization. Note that we do it with unpinned buffer to
* avoid conflicts with FlushRelationBuffers.
*/
***************
*** 961,967 ****
void
FlushBufferPool(void)
{
! BufferSync(-1, -1);
smgrsync();
}
--- 886,892 ----
void
FlushBufferPool(void)
{
! BufferSync(-1);
smgrsync();
}
***************
*** 1030,1036 ****
* caller is also responsible for doing StartBufferIO/TerminateBufferIO.
*
* If the caller has an smgr reference for the buffer's relation, pass it
! * as the second parameter. If not, pass NULL. (Do not open relation
* while holding BufMgrLock!)
*
* When earlylock is TRUE, we grab the per-buffer sharelock before releasing
--- 955,961 ----
* caller is also responsible for doing StartBufferIO/TerminateBufferIO.
*
* If the caller has an smgr reference for the buffer's relation, pass it
! * as the second parameter. If not, pass NULL. (Do not open relations
* while holding BufMgrLock!)
*
* When earlylock is TRUE, we grab the per-buffer sharelock before releasing
***************
*** 1261,1267 ****
/*
* And mark the buffer as no longer occupied by this rel.
*/
! StrategyInvalidateBuffer(bufHdr);
}
}
--- 1186,1192 ----
/*
* And mark the buffer as no longer occupied by this rel.
*/
! BufTableDelete(bufHdr);
}
}
***************
*** 1321,1327 ****
/*
* And mark the buffer as no longer occupied by this page.
*/
! StrategyInvalidateBuffer(bufHdr);
}
}
--- 1246,1252 ----
/*
* And mark the buffer as no longer occupied by this page.
*/
! BufTableDelete(bufHdr);
}
}
***************
*** 1523,1529 ****
bufHdr->tag.blockNum,
PrivateRefCount[i], bufHdr->refcount);
if (bufHdr->tag.blockNum >= firstDelBlock)
! StrategyInvalidateBuffer(bufHdr);
}
}
--- 1448,1454 ----
bufHdr->tag.blockNum,
PrivateRefCount[i], bufHdr->refcount);
if (bufHdr->tag.blockNum >= firstDelBlock)
! BufTableDelete(bufHdr);
}
}
Index: src/backend/storage/buffer/freelist.c
===================================================================
RCS file: /var/lib/cvs/pgsql/src/backend/storage/buffer/freelist.c,v
retrieving revision 1.49
diff -c -r1.49 freelist.c
*** src/backend/storage/buffer/freelist.c 31 Dec 2004 22:00:49 -0000 1.49
--- src/backend/storage/buffer/freelist.c 24 Jan 2005 05:19:43 -0000
***************
*** 23,890 ****
#include "access/xact.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
!
!
! /* GUC variable: time in seconds between statistics reports */
! int DebugSharedBuffers = 0;
!
! /* Pointers to shared state */
! static BufferStrategyControl *StrategyControl = NULL;
! static BufferStrategyCDB *StrategyCDB = NULL;
/* Backend-local state about whether currently vacuuming */
! static bool strategy_hint_vacuum = false;
! static TransactionId strategy_vacuum_xid;
!
!
! #define T1_TARGET (StrategyControl->target_T1_size)
! #define B1_LENGTH (StrategyControl->listSize[STRAT_LIST_B1])
! #define T1_LENGTH (StrategyControl->listSize[STRAT_LIST_T1])
! #define T2_LENGTH (StrategyControl->listSize[STRAT_LIST_T2])
! #define B2_LENGTH (StrategyControl->listSize[STRAT_LIST_B2])
!
!
! /*
! * Macro to remove a CDB from whichever list it currently is on
! */
! #define STRAT_LIST_REMOVE(cdb) \
! do { \
! Assert((cdb)->list >= 0 && (cdb)->list < STRAT_NUM_LISTS); \
! if ((cdb)->prev < 0) \
! StrategyControl->listHead[(cdb)->list] = (cdb)->next; \
! else \
! StrategyCDB[(cdb)->prev].next = (cdb)->next; \
! if ((cdb)->next < 0) \
! StrategyControl->listTail[(cdb)->list] = (cdb)->prev; \
! else \
! StrategyCDB[(cdb)->next].prev = (cdb)->prev; \
! StrategyControl->listSize[(cdb)->list]--; \
! (cdb)->list = STRAT_LIST_UNUSED; \
! } while(0)
! /*
! * Macro to add a CDB to the tail of a list (MRU position)
! */
! #define STRAT_MRU_INSERT(cdb,l) \
! do { \
! Assert((cdb)->list == STRAT_LIST_UNUSED); \
! if (StrategyControl->listTail[(l)] < 0) \
! { \
! (cdb)->prev = (cdb)->next = -1; \
! StrategyControl->listHead[(l)] = \
! StrategyControl->listTail[(l)] = \
! ((cdb) - StrategyCDB); \
! } \
! else \
! { \
! (cdb)->next = -1; \
! (cdb)->prev = StrategyControl->listTail[(l)]; \
! StrategyCDB[StrategyControl->listTail[(l)]].next = \
! ((cdb) - StrategyCDB); \
! StrategyControl->listTail[(l)] = \
! ((cdb) - StrategyCDB); \
! } \
! StrategyControl->listSize[(l)]++; \
! (cdb)->list = (l); \
! } while(0)
/*
! * Macro to add a CDB to the head of a list (LRU position)
*/
! #define STRAT_LRU_INSERT(cdb,l) \
! do { \
! Assert((cdb)->list == STRAT_LIST_UNUSED); \
! if (StrategyControl->listHead[(l)] < 0) \
! { \
! (cdb)->prev = (cdb)->next = -1; \
! StrategyControl->listHead[(l)] = \
! StrategyControl->listTail[(l)] = \
! ((cdb) - StrategyCDB); \
! } \
! else \
! { \
! (cdb)->prev = -1; \
! (cdb)->next = StrategyControl->listHead[(l)]; \
! StrategyCDB[StrategyControl->listHead[(l)]].prev = \
! ((cdb) - StrategyCDB); \
! StrategyControl->listHead[(l)] = \
! ((cdb) - StrategyCDB); \
! } \
! StrategyControl->listSize[(l)]++; \
! (cdb)->list = (l); \
! } while(0)
!
!
! /*
! * Printout for use when DebugSharedBuffers is enabled
! */
! static void
! StrategyStatsDump(void)
{
! time_t now = time(NULL);
! if (StrategyControl->stat_report + DebugSharedBuffers < now)
! {
! long all_hit,
! b1_hit,
! t1_hit,
! t2_hit,
! b2_hit;
! int id,
! t1_clean,
! t2_clean;
! ErrorContextCallback *errcxtold;
!
! id = StrategyControl->listHead[STRAT_LIST_T1];
! t1_clean = 0;
! while (id >= 0)
! {
! if (BufferDescriptors[StrategyCDB[id].buf_id].flags & BM_DIRTY)
! break;
! t1_clean++;
! id = StrategyCDB[id].next;
! }
! id = StrategyControl->listHead[STRAT_LIST_T2];
! t2_clean = 0;
! while (id >= 0)
! {
! if (BufferDescriptors[StrategyCDB[id].buf_id].flags & BM_DIRTY)
! break;
! t2_clean++;
! id = StrategyCDB[id].next;
! }
!
! if (StrategyControl->num_lookup == 0)
! all_hit = b1_hit = t1_hit = t2_hit = b2_hit = 0;
! else
! {
! b1_hit = (StrategyControl->num_hit[STRAT_LIST_B1] * 100 /
! StrategyControl->num_lookup);
! t1_hit = (StrategyControl->num_hit[STRAT_LIST_T1] * 100 /
! StrategyControl->num_lookup);
! t2_hit = (StrategyControl->num_hit[STRAT_LIST_T2] * 100 /
! StrategyControl->num_lookup);
! b2_hit = (StrategyControl->num_hit[STRAT_LIST_B2] * 100 /
! StrategyControl->num_lookup);
! all_hit = b1_hit + t1_hit + t2_hit + b2_hit;
! }
! errcxtold = error_context_stack;
! error_context_stack = NULL;
! elog(DEBUG1, "ARC T1target=%5d B1len=%5d T1len=%5d T2len=%5d B2len=%5d",
! T1_TARGET, B1_LENGTH, T1_LENGTH, T2_LENGTH, B2_LENGTH);
! elog(DEBUG1, "ARC total =%4ld%% B1hit=%4ld%% T1hit=%4ld%% T2hit=%4ld%% B2hit=%4ld%%",
! all_hit, b1_hit, t1_hit, t2_hit, b2_hit);
! elog(DEBUG1, "ARC clean buffers at LRU T1= %5d T2= %5d",
! t1_clean, t2_clean);
! error_context_stack = errcxtold;
!
! StrategyControl->num_lookup = 0;
! StrategyControl->num_hit[STRAT_LIST_B1] = 0;
! StrategyControl->num_hit[STRAT_LIST_T1] = 0;
! StrategyControl->num_hit[STRAT_LIST_T2] = 0;
! StrategyControl->num_hit[STRAT_LIST_B2] = 0;
! StrategyControl->stat_report = now;
}
}
! /*
! * StrategyBufferLookup
! *
! * Lookup a page request in the cache directory. A buffer is only
! * returned for a T1 or T2 cache hit. B1 and B2 hits are just
! * remembered here, to possibly affect the behaviour later.
! *
! * recheck indicates we are rechecking after I/O wait; do not change
! * internal status in this case.
! *
! * *cdb_found_index is set to the index of the found CDB, or -1 if none.
! * This is not intended to be used by the caller, except to pass to
! * StrategyReplaceBuffer().
! */
! BufferDesc *
! StrategyBufferLookup(BufferTag *tagPtr, bool recheck,
! int *cdb_found_index)
{
! BufferStrategyCDB *cdb;
! /* Optional stats printout */
! if (DebugSharedBuffers > 0)
! StrategyStatsDump();
! /*
! * Count lookups
! */
! StrategyControl->num_lookup++;
! /*
! * Lookup the block in the shared hash table
! */
! *cdb_found_index = BufTableLookup(tagPtr);
! /*
! * Done if complete CDB lookup miss
! */
! if (*cdb_found_index < 0)
! return NULL;
! /*
! * We found a CDB
! */
! cdb = &StrategyCDB[*cdb_found_index];
! /*
! * Count hits
! */
! StrategyControl->num_hit[cdb->list]++;
! /*
! * If this is a T2 hit, we simply move the CDB to the T2 MRU position
! * and return the found buffer.
! *
! * A CDB in T2 cannot have t1_vacuum set, so we needn't check. However,
! * if the current process is VACUUM then it doesn't promote to MRU.
! */
! if (cdb->list == STRAT_LIST_T2)
{
! if (!strategy_hint_vacuum)
! {
! STRAT_LIST_REMOVE(cdb);
! STRAT_MRU_INSERT(cdb, STRAT_LIST_T2);
! }
!
! return &BufferDescriptors[cdb->buf_id];
}
!
! /*
! * If this is a T1 hit, we move the buffer to the T2 MRU only if
! * another transaction had read it into T1, *and* neither transaction
! * is a VACUUM. This is required because any UPDATE or DELETE in
! * PostgreSQL does multiple ReadBuffer(), first during the scan, later
! * during the heap_update() or heap_delete(). Otherwise move to T1
! * MRU. VACUUM doesn't even get to make that happen.
! */
! if (cdb->list == STRAT_LIST_T1)
{
! if (!strategy_hint_vacuum)
! {
! if (!cdb->t1_vacuum &&
! !TransactionIdEquals(cdb->t1_xid, GetTopTransactionId()))
! {
! STRAT_LIST_REMOVE(cdb);
! STRAT_MRU_INSERT(cdb, STRAT_LIST_T2);
! }
! else
! {
! STRAT_LIST_REMOVE(cdb);
! STRAT_MRU_INSERT(cdb, STRAT_LIST_T1);
!
! /*
! * If a non-VACUUM process references a page recently
! * loaded by VACUUM, clear the stigma; the state will now
! * be the same as if this process loaded it originally.
! */
! if (cdb->t1_vacuum)
! {
! cdb->t1_xid = GetTopTransactionId();
! cdb->t1_vacuum = false;
! }
! }
! }
!
! return &BufferDescriptors[cdb->buf_id];
}
! /*
! * In the case of a recheck we don't care about B1 or B2 hits here.
! * The bufmgr does this call only to make sure no-one faulted in the
! * block while we where busy flushing another; we don't want to doubly
! * adjust the T1target.
! *
! * Now for this really to end up as a B1 or B2 cache hit, we must have
! * been flushing for quite some time as the block not only must have
! * been read, but also traveled through the queue and evicted from the
! * T cache again already.
! *
! * VACUUM re-reads shouldn't adjust the target either.
! */
! if (recheck || strategy_hint_vacuum)
! return NULL;
!
! /*
! * Adjust the target size of the T1 cache depending on if this is a B1
! * or B2 hit.
! */
! switch (cdb->list)
{
! case STRAT_LIST_B1:
!
! /*
! * B1 hit means that the T1 cache is probably too small.
! * Adjust the T1 target size and continue below.
! */
! T1_TARGET = Min(T1_TARGET + Max(B2_LENGTH / B1_LENGTH, 1),
! NBuffers);
! break;
!
! case STRAT_LIST_B2:
!
! /*
! * B2 hit means that the T2 cache is probably too small.
! * Adjust the T1 target size and continue below.
! */
! T1_TARGET = Max(T1_TARGET - Max(B1_LENGTH / B2_LENGTH, 1), 0);
! break;
!
! default:
! elog(ERROR, "buffer hash table corrupted: CDB->list = %d",
! cdb->list);
}
! /*
! * Even though we had seen the block in the past, its data is not
! * currently in memory ... cache miss to the bufmgr.
! */
! return NULL;
}
-
/*
! * StrategyGetBuffer
! *
! * Called by the bufmgr to get the next candidate buffer to use in
! * BufferAlloc(). The only hard requirement BufferAlloc() has is that
! * this buffer must not currently be pinned.
! *
! * *cdb_replace_index is set to the index of the candidate CDB, or -1 if
! * none (meaning we are using a previously free buffer). This is not
! * intended to be used by the caller, except to pass to
! * StrategyReplaceBuffer().
*/
! BufferDesc *
! StrategyGetBuffer(int *cdb_replace_index)
{
! int cdb_id;
! BufferDesc *buf;
! if (StrategyControl->listFreeBuffers < 0)
! {
! /*
! * We don't have a free buffer, must take one from T1 or T2.
! * Choose based on trying to converge T1len to T1target.
! */
! if (T1_LENGTH >= Max(1, T1_TARGET))
! {
! /*
! * We should take the first unpinned buffer from T1.
! */
! cdb_id = StrategyControl->listHead[STRAT_LIST_T1];
! while (cdb_id >= 0)
! {
! buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id];
! if (buf->refcount == 0)
! {
! *cdb_replace_index = cdb_id;
! Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T1);
! return buf;
! }
! cdb_id = StrategyCDB[cdb_id].next;
! }
! /*
! * No unpinned T1 buffer found - try T2 cache.
! */
! cdb_id = StrategyControl->listHead[STRAT_LIST_T2];
! while (cdb_id >= 0)
! {
! buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id];
! if (buf->refcount == 0)
! {
! *cdb_replace_index = cdb_id;
! Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T2);
! return buf;
! }
! cdb_id = StrategyCDB[cdb_id].next;
! }
!
! /*
! * No unpinned buffers at all!!!
! */
! elog(ERROR, "no unpinned buffers available");
! }
! else
! {
! /*
! * We should take the first unpinned buffer from T2.
! */
! cdb_id = StrategyControl->listHead[STRAT_LIST_T2];
! while (cdb_id >= 0)
! {
! buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id];
! if (buf->refcount == 0)
! {
! *cdb_replace_index = cdb_id;
! Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T2);
! return buf;
! }
! cdb_id = StrategyCDB[cdb_id].next;
! }
!
! /*
! * No unpinned T2 buffer found - try T1 cache.
! */
! cdb_id = StrategyControl->listHead[STRAT_LIST_T1];
! while (cdb_id >= 0)
! {
! buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id];
! if (buf->refcount == 0)
! {
! *cdb_replace_index = cdb_id;
! Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T1);
! return buf;
! }
! cdb_id = StrategyCDB[cdb_id].next;
! }
! /*
! * No unpinned buffers at all!!!
! */
! elog(ERROR, "no unpinned buffers available");
! }
}
else
{
! /* There is a completely free buffer available - take it */
!
! /*
! * Note: This code uses the side effect that a free buffer can
! * never be pinned or dirty and therefore the call to
! * StrategyReplaceBuffer() will happen without the bufmgr
! * releasing the bufmgr-lock in the meantime. That means, that
! * there will never be any reason to recheck. Otherwise we would
! * leak shared buffers here!
! */
! *cdb_replace_index = -1;
! buf = &BufferDescriptors[StrategyControl->listFreeBuffers];
! StrategyControl->listFreeBuffers = buf->bufNext;
! buf->bufNext = -1;
!
! /* Buffer in freelist cannot be pinned */
! Assert(buf->refcount == 0);
! Assert(!(buf->flags & BM_DIRTY));
!
! return buf;
}
! /* not reached */
! return NULL;
}
-
/*
! * StrategyReplaceBuffer
! *
! * Called by the buffer manager to inform us that he flushed a buffer
! * and is now about to replace the content. Prior to this call,
! * the cache algorithm still reports the buffer as in the cache. After
! * this call we report the new block, even if IO might still need to
! * be done to bring in the new content.
! *
! * cdb_found_index and cdb_replace_index must be the auxiliary values
! * returned by previous calls to StrategyBufferLookup and StrategyGetBuffer.
*/
! void
! StrategyReplaceBuffer(BufferDesc *buf, BufferTag *newTag,
! int cdb_found_index, int cdb_replace_index)
{
! BufferStrategyCDB *cdb_found;
! BufferStrategyCDB *cdb_replace;
! if (cdb_found_index >= 0)
! {
! /* This must have been a ghost buffer cache hit (B1 or B2) */
! cdb_found = &StrategyCDB[cdb_found_index];
!
! /* Assert that the buffer remembered in cdb_found is the one */
! /* the buffer manager is currently faulting in */
! Assert(BUFFERTAGS_EQUAL(cdb_found->buf_tag, *newTag));
! if (cdb_replace_index >= 0)
! {
! /* We are satisfying it with an evicted T buffer */
! cdb_replace = &StrategyCDB[cdb_replace_index];
! /* Assert that the buffer remembered in cdb_replace is */
! /* the one the buffer manager has just evicted */
! Assert(cdb_replace->list == STRAT_LIST_T1 ||
! cdb_replace->list == STRAT_LIST_T2);
! Assert(cdb_replace->buf_id == buf->buf_id);
! Assert(BUFFERTAGS_EQUAL(cdb_replace->buf_tag, buf->tag));
! /*
! * Under normal circumstances we move the evicted T list entry
! * to the corresponding B list. However, T1 entries that
! * exist only because of VACUUM are just thrown into the
! * unused list instead. We don't expect them to be touched
! * again by the VACUUM, and if we put them into B1 then VACUUM
! * would skew T1_target adjusting.
! */
! if (cdb_replace->t1_vacuum)
! {
! BufTableDelete(&(cdb_replace->buf_tag));
! STRAT_LIST_REMOVE(cdb_replace);
! cdb_replace->next = StrategyControl->listUnusedCDB;
! StrategyControl->listUnusedCDB = cdb_replace_index;
! }
! else
! {
! if (cdb_replace->list == STRAT_LIST_T1)
! {
! STRAT_LIST_REMOVE(cdb_replace);
! STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B1);
! }
! else
! {
! STRAT_LIST_REMOVE(cdb_replace);
! STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B2);
! }
! }
! /* And clear its block reference */
! cdb_replace->buf_id = -1;
! }
! else
! {
! /* We are satisfying it with an unused buffer */
! }
!
! /* Now the found B CDB gets the buffer and is moved to T2 */
! cdb_found->buf_id = buf->buf_id;
! STRAT_LIST_REMOVE(cdb_found);
! STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T2);
! }
! else
{
/*
! * This was a complete cache miss, so we need to create a new CDB.
! * The goal is to keep T1len+B1len <= c.
*/
! if (B1_LENGTH > 0 && (T1_LENGTH + B1_LENGTH) >= NBuffers)
! {
! /* So if B1 isn't empty and T1len+B1len >= c we take B1-LRU */
! cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B1]];
!
! BufTableDelete(&(cdb_found->buf_tag));
! STRAT_LIST_REMOVE(cdb_found);
! }
! else
! {
! /* Otherwise, we try to use a free one */
! if (StrategyControl->listUnusedCDB >= 0)
! {
! cdb_found = &StrategyCDB[StrategyControl->listUnusedCDB];
! StrategyControl->listUnusedCDB = cdb_found->next;
! }
! else
! {
! /* If there isn't, we take B2-LRU ... except if */
! /* T1len+B1len+T2len = c ... oh my */
! if (B2_LENGTH > 0)
! cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B2]];
! else
! cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B1]];
!
! BufTableDelete(&(cdb_found->buf_tag));
! STRAT_LIST_REMOVE(cdb_found);
! }
! }
!
! /* Set the CDB's buf_tag and insert it into the hash table */
! cdb_found->buf_tag = *newTag;
! BufTableInsert(&(cdb_found->buf_tag), (cdb_found - StrategyCDB));
! if (cdb_replace_index >= 0)
{
! /*
! * The buffer was formerly in a T list, move its CDB to the
! * corresponding B list
! */
! cdb_replace = &StrategyCDB[cdb_replace_index];
! Assert(cdb_replace->list == STRAT_LIST_T1 ||
! cdb_replace->list == STRAT_LIST_T2);
! Assert(cdb_replace->buf_id == buf->buf_id);
! Assert(BUFFERTAGS_EQUAL(cdb_replace->buf_tag, buf->tag));
!
! if (cdb_replace->list == STRAT_LIST_T1)
! {
! STRAT_LIST_REMOVE(cdb_replace);
! STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B1);
! }
else
! {
! STRAT_LIST_REMOVE(cdb_replace);
! STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B2);
! }
! /* And clear its block reference */
! cdb_replace->buf_id = -1;
! }
! else
! {
! /* We are satisfying it with an unused buffer */
! }
! /* Assign the buffer id to the new CDB */
! cdb_found->buf_id = buf->buf_id;
!
! /*
! * Specialized VACUUM optimization. If this complete cache miss
! * happened because vacuum needed the page, we place it at the LRU
! * position of T1; normally it goes at the MRU position.
! */
! if (strategy_hint_vacuum)
! {
! if (TransactionIdEquals(strategy_vacuum_xid,
! GetTopTransactionId()))
! STRAT_LRU_INSERT(cdb_found, STRAT_LIST_T1);
else
! {
! /* VACUUM must have been aborted by error, reset flag */
! strategy_hint_vacuum = false;
! STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1);
! }
! }
! else
! STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1);
! /*
! * Remember the Xid when this buffer went onto T1 to avoid a
! * single UPDATE promoting a newcomer straight into T2. Also
! * remember if it was loaded for VACUUM.
! */
! cdb_found->t1_xid = GetTopTransactionId();
! cdb_found->t1_vacuum = strategy_hint_vacuum;
}
- }
/*
! * StrategyInvalidateBuffer
*
! * Called by the buffer manager to inform us that a buffer content
! * is no longer valid. We simply throw away any eventual existing
! * buffer hash entry and move the CDB and buffer to the free lists.
*/
void
! StrategyInvalidateBuffer(BufferDesc *buf)
{
! int cdb_id;
! BufferStrategyCDB *cdb;
! /* The buffer cannot be dirty or pinned */
! Assert(!(buf->flags & BM_DIRTY) || !(buf->flags & BM_VALID));
! Assert(buf->refcount == 0);
! /*
! * Lookup the cache directory block for this buffer
! */
! cdb_id = BufTableLookup(&(buf->tag));
! if (cdb_id < 0)
! elog(ERROR, "buffer %d not in buffer hash table", buf->buf_id);
! cdb = &StrategyCDB[cdb_id];
! /*
! * Remove the CDB from the hashtable and the ARC queue it is currently
! * on.
! */
! BufTableDelete(&(cdb->buf_tag));
! STRAT_LIST_REMOVE(cdb);
! /*
! * Clear out the CDB's buffer tag and association with the buffer and
! * add it to the list of unused CDB's
! */
! CLEAR_BUFFERTAG(cdb->buf_tag);
! cdb->buf_id = -1;
! cdb->next = StrategyControl->listUnusedCDB;
! StrategyControl->listUnusedCDB = cdb_id;
! /*
! * Clear out the buffer's tag and add it to the list of currently
! * unused buffers. We must do this to ensure that linear scans of the
! * buffer array don't think the buffer is valid.
! */
! CLEAR_BUFFERTAG(buf->tag);
! buf->flags &= ~(BM_VALID | BM_DIRTY);
! buf->cntxDirty = false;
! buf->bufNext = StrategyControl->listFreeBuffers;
! StrategyControl->listFreeBuffers = buf->buf_id;
}
/*
! * StrategyHintVacuum -- tell us whether VACUUM is active
*/
void
! StrategyHintVacuum(bool vacuum_active)
{
! strategy_hint_vacuum = vacuum_active;
! strategy_vacuum_xid = GetTopTransactionId();
}
/*
! * StrategyDirtyBufferList
*
! * Returns a list of dirty buffers, in priority order for writing.
! * Note that the caller may choose not to write them all.
! *
! * The caller must beware of the possibility that a buffer is no longer dirty,
! * or even contains a different page, by the time he reaches it. If it no
! * longer contains the same page it need not be written, even if it is (again)
! * dirty.
! *
! * Buffer pointers are stored into buffers[], and corresponding tags into
! * buftags[], both of size max_buffers. The function returns the number of
! * buffer IDs stored.
*/
int
! StrategyDirtyBufferList(BufferDesc **buffers, BufferTag *buftags,
int max_buffers)
{
int num_buffer_dirty = 0;
! int cdb_id_t1;
! int cdb_id_t2;
! int buf_id;
! BufferDesc *buf;
/*
! * Traverse the T1 and T2 list LRU to MRU in "parallel" and add all
! * dirty buffers found in that order to the list. The ARC strategy
! * keeps all used buffers including pinned ones in the T1 or T2 list.
! * So we cannot miss any dirty buffers.
*/
! cdb_id_t1 = StrategyControl->listHead[STRAT_LIST_T1];
! cdb_id_t2 = StrategyControl->listHead[STRAT_LIST_T2];
!
! while (cdb_id_t1 >= 0 || cdb_id_t2 >= 0)
{
! if (cdb_id_t1 >= 0)
! {
! buf_id = StrategyCDB[cdb_id_t1].buf_id;
! buf = &BufferDescriptors[buf_id];
!
! if (buf->flags & BM_VALID)
! {
! if ((buf->flags & BM_DIRTY) || (buf->cntxDirty))
! {
! buffers[num_buffer_dirty] = buf;
! buftags[num_buffer_dirty] = buf->tag;
! num_buffer_dirty++;
! if (num_buffer_dirty >= max_buffers)
! break;
! }
! }
! cdb_id_t1 = StrategyCDB[cdb_id_t1].next;
! }
! if (cdb_id_t2 >= 0)
{
! buf_id = StrategyCDB[cdb_id_t2].buf_id;
! buf = &BufferDescriptors[buf_id];
!
! if (buf->flags & BM_VALID)
! {
! if ((buf->flags & BM_DIRTY) || (buf->cntxDirty))
! {
! buffers[num_buffer_dirty] = buf;
! buftags[num_buffer_dirty] = buf->tag;
! num_buffer_dirty++;
! if (num_buffer_dirty >= max_buffers)
! break;
! }
! }
!
! cdb_id_t2 = StrategyCDB[cdb_id_t2].next;
}
}
return num_buffer_dirty;
}
-
/*
! * StrategyInitialize -- initialize the buffer cache replacement
! * strategy.
*
! * Assume: All of the buffers are already building a linked list.
! * Only called by postmaster and only during initialization.
*/
! void
! StrategyInitialize(bool init)
{
! bool found;
! int i;
/*
! * Initialize the shared CDB lookup hashtable
*/
! InitBufTable(NBuffers * 2);
!
! /*
! * Get or create the shared strategy control block and the CDB's
! */
! StrategyControl = (BufferStrategyControl *)
! ShmemInitStruct("Buffer Strategy Status",
! sizeof(BufferStrategyControl) +
! sizeof(BufferStrategyCDB) * (NBuffers * 2 - 1),
! &found);
! StrategyCDB = &(StrategyControl->cdb[0]);
!
! if (!found)
{
! /*
! * Only done once, usually in postmaster
! */
! Assert(init);
!
! /*
! * Grab the whole linked list of free buffers for our strategy. We
! * assume it was previously set up by InitBufferPool().
! */
! StrategyControl->listFreeBuffers = 0;
!
! /*
! * We start off with a target T1 list size of half the available
! * cache blocks.
! */
! StrategyControl->target_T1_size = NBuffers / 2;
! /*
! * Initialize B1, T1, T2 and B2 lists to be empty
! */
! for (i = 0; i < STRAT_NUM_LISTS; i++)
{
! StrategyControl->listHead[i] = -1;
! StrategyControl->listTail[i] = -1;
! StrategyControl->listSize[i] = 0;
! StrategyControl->num_hit[i] = 0;
! }
! StrategyControl->num_lookup = 0;
! StrategyControl->stat_report = 0;
!
! /*
! * All CDB's are linked as the listUnusedCDB
! */
! for (i = 0; i < NBuffers * 2; i++)
! {
! StrategyCDB[i].next = i + 1;
! StrategyCDB[i].list = STRAT_LIST_UNUSED;
! CLEAR_BUFFERTAG(StrategyCDB[i].buf_tag);
! StrategyCDB[i].buf_id = -1;
}
- StrategyCDB[NBuffers * 2 - 1].next = -1;
- StrategyControl->listUnusedCDB = 0;
}
! else
! Assert(!init);
}
--- 23,421 ----
#include "access/xact.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
! #include "storage/proc.h"
! #include "utils/resowner.h"
/* Backend-local state about whether currently vacuuming */
! static bool vacuum_active_hint = false;
! /* Pointer to shared data structure */
! static FreeListControl *listControl;
/*
! * Initialize the free list. Called during postmaster startup or
! * standalone backend startup, as well as by each backend in
! * EXEC_BACKEND. The buffers have already been linked into a
! * doubly-linked list, so we don't have much more to do here. Since
! * all the buffers are initially free and unpinned, the freelist is
! * initially the entire collection of buffer pages. We arbitrarily
! * pick the first buffer to be the head of the freelist and the last
! * buffer to be the tail ("first" and "last" being positions in the
! * shmem array of buffer descs).
*/
! void
! InitFreeList(void)
{
! bool foundControl;
! listControl = (FreeListControl *)
! ShmemInitStruct("Free list control structure",
! sizeof(*listControl),
! &foundControl);
! /*
! * If we created the shared control structure (which will be the
! * case when not EXEC_BACKEND), initialize it as well.
! */
! if (!foundControl)
! {
! listControl->freeListHead =
! BufferDescriptorGetBuffer(&BufferDescriptors[0]);
! listControl->freeListTail =
! BufferDescriptorGetBuffer(&BufferDescriptors[NBuffers - 1]);
}
+
+ Assert(BufferIsValid(listControl->freeListHead));
+ Assert(BufferIsValid(listControl->freeListTail));
}
! static void
! CheckBufferOnFreeList(Buffer buf)
{
! BufferDesc *buf_desc = BufferGetBufferDescriptor(buf);
! Assert(BufferIsValid(buf_desc->freePrev) ||
! BufferIsValid(buf_desc->freeNext));
! if (BufferIsInvalid(buf_desc->freePrev))
! Assert(listControl->freeListHead == buf);
! if (BufferIsInvalid(buf_desc->freeNext))
! Assert(listControl->freeListTail == buf);
! Assert(buf_desc->refcount == 0);
! Assert(PrivateRefCount[buf_desc->buf_id] == 0);
! Assert(!LWLockHeldByMe(buf_desc->cntx_lock));
! Assert(!LWLockHeldByMe(buf_desc->io_in_progress_lock));
! }
! static void
! RemoveFromFreeList(Buffer buf)
! {
! BufferDesc *buf_desc = BufferGetBufferDescriptor(buf);
! CheckBufferOnFreeList(buf);
! if (BufferIsValid(buf_desc->freeNext))
{
! BufferDesc *next_desc = BufferGetBufferDescriptor(buf_desc->freeNext);
! next_desc->freePrev = buf_desc->freePrev;
}
! else
{
! /* removing the tail of the list */
! Assert(listControl->freeListTail == buf);
! listControl->freeListTail = buf_desc->freePrev;
}
! if (BufferIsValid(buf_desc->freePrev))
{
! BufferDesc *prev_desc = BufferGetBufferDescriptor(buf_desc->freePrev);
! prev_desc->freeNext = buf_desc->freeNext;
! }
! else
! {
! /* removing the head of the list */
! Assert(listControl->freeListHead == buf);
! listControl->freeListHead = buf_desc->freeNext;
}
! /* No longer on free list, so clear free list links */
! buf_desc->freeNext = InvalidBuffer;
! buf_desc->freePrev = InvalidBuffer;
}
/*
! * Add the specified buffer to the free list. If add_to_tail is true,
! * make the buffer the tail (LRU) of the free list -- otherwise make
! * it the head (MRU).
*/
! static void
! AddToFreeList(Buffer buf, bool add_to_tail)
{
! BufferDesc *buf_desc = BufferGetBufferDescriptor(buf);
! /* Can't already be on the free list */
! Assert(BufferIsInvalid(buf_desc->freePrev));
! Assert(BufferIsInvalid(buf_desc->freeNext));
! if (add_to_tail)
! {
! buf_desc->freeNext = InvalidBuffer;
! buf_desc->freePrev = listControl->freeListTail;
! listControl->freeListTail = buf;
! BufferGetBufferDescriptor(buf_desc->freePrev)->freeNext = buf;
}
else
{
! buf_desc->freeNext = listControl->freeListHead;
! buf_desc->freePrev = InvalidBuffer;
! listControl->freeListHead = buf;
! BufferGetBufferDescriptor(buf_desc->freeNext)->freePrev = buf;
}
! CheckBufferOnFreeList(buf);
}
/*
! * Get a free buffer from the freelist; it is pinned before being
! * returned. We return the least-recently used buffer (i.e. the tail
! * of the freelist). In the unlikely event that there are no free
! * buffers, we elog(ERROR).
! *
! * XXX: is it worth skipping over a few buffers close to the tail
! * (say, up to 10) if the buffers are dirty? Trying to avoid returning
! * dirty buffers to the caller means it is more likely the bgwriter
! * will get a chance to flush them to disk, therefore avoiding wasting
! * time in a "real" backend.
*/
! BufferDesc *
! GetFreeBuffer(void)
{
! Buffer victim_buf;
! BufferDesc *victim_desc;
! victim_buf = listControl->freeListTail;
! if (BufferIsInvalid(victim_buf))
! elog(ERROR, "no free buffers");
! victim_desc = BufferGetBufferDescriptor(victim_buf);
! PinBuffer(victim_desc, true);
! /* we should have chosen another tail for the free list */
! Assert(listControl->freeListTail != victim_buf);
! /* sanity check the free list */
{
+ Buffer b = listControl->freeListHead;
+
/*
! * Assume we're not going to run out of free buffers
! * completely
*/
! Assert(BufferIsValid(listControl->freeListHead));
! Assert(BufferIsValid(listControl->freeListTail));
! while (BufferIsValid(b))
{
! BufferDesc *buf_desc = BufferGetBufferDescriptor(b);
! if (BufferIsValid(buf_desc->freeNext))
! Assert(BufferGetBufferDescriptor(buf_desc->freeNext)->freePrev == b);
else
! Assert(b == listControl->freeListTail);
! if (BufferIsValid(buf_desc->freePrev))
! Assert(BufferGetBufferDescriptor(buf_desc->freePrev)->freeNext == b);
else
! Assert(b == listControl->freeListHead);
! CheckBufferOnFreeList(b);
! b = buf_desc->freeNext;
! }
}
+ return victim_desc;
+ }
/*
! * PinBuffer -- make buffer unavailable for replacement.
! *
! * This should be applied only to shared buffers, never local ones.
! * Bufmgr lock must be held by caller.
*
! * Most but not all callers want CurrentResourceOwner to be adjusted.
! * Note that ResourceOwnerEnlargeBuffers must have been done already.
*/
void
! PinBuffer(BufferDesc *buf, bool fixOwner)
{
! int buf_idx = buf->buf_id;
! if (buf->refcount == 0)
! Assert(PrivateRefCount[buf_idx] == 0);
! if (PrivateRefCount[buf_idx] > 0)
! Assert(buf->refcount > 0);
!
! /* If first pin on buffer, remove from free list */
! if (buf->refcount == 0)
! RemoveFromFreeList(BufferDescriptorGetBuffer(buf));
!
! if (PrivateRefCount[buf_idx] == 0)
! buf->refcount++;
!
! PrivateRefCount[buf_idx]++;
! Assert(PrivateRefCount[buf_idx] > 0);
!
! if (fixOwner)
! ResourceOwnerRememberBuffer(CurrentResourceOwner,
! BufferDescriptorGetBuffer(buf));
! }
! /*
! * UnpinBuffer -- decrement reference count on buffer. If it is now
! * unreferenced, make it available for replacement (i.e. put it on the
! * free list). Note that we define the "least recently used" buffer as
! * the buffer that was unpinned the longest time ago -- so we need
! * only rearrange a buffer's free list links on pin and unpin.
! *
! * This should be applied only to shared buffers, never local ones.
! * Bufmgr lock must be held by caller.
! *
! * Most but not all callers want CurrentResourceOwner to be adjusted.
! */
! void
! UnpinBuffer(BufferDesc *buf, bool fixOwner)
! {
! int buf_idx = buf->buf_id;
! if (fixOwner)
! ResourceOwnerForgetBuffer(CurrentResourceOwner,
! BufferDescriptorGetBuffer(buf));
!
! Assert(buf->refcount > 0);
! Assert(PrivateRefCount[buf_idx] > 0);
! PrivateRefCount[buf_idx]--;
! if (PrivateRefCount[buf_idx] == 0)
! {
! buf->refcount--;
! /* I'd better not still hold any locks on the buffer */
! Assert(!LWLockHeldByMe(buf->cntx_lock));
! Assert(!LWLockHeldByMe(buf->io_in_progress_lock));
! /*
! * If the buffer is now unreferenced, we need to move it to
! * the free list. Normally, we add the buffer to the MRU
! * position (on the theory that it is likely to be accessed
! * again), but if a VACUUM is currently active, we make the
! * buffer the LRU (since VACUUM I/O shouldn't trash the buffer
! * cache). Note that we only change this behavior when (a)
! * VACUUM is active (b) the refcount of the buffer is now zero
! * -- which makes sense, because we don't want to VACUUM to
! * penalize a buffer that would be hot anyway.
! */
! if (buf->refcount == 0)
! AddToFreeList(BufferDescriptorGetBuffer(buf),
! vacuum_active_hint);
! }
! if ((buf->flags & BM_PIN_COUNT_WAITER) != 0 && buf->refcount == 1)
! {
! /* we just released the last pin other than the waiter's */
! buf->flags &= ~BM_PIN_COUNT_WAITER;
! ProcSendSignal(buf->wait_backend_id);
! }
}
/*
! * SetVacuumHint -- tell us whether VACUUM is active
*/
void
! SetVacuumHint(bool vacuum_active)
{
! vacuum_active_hint = vacuum_active;
}
/*
! * GetSomeDirtyBuffers
*
! * Returns a list of dirty, unpinned buffers, in priority order for
! * writing. Note that the caller may choose not to write them
! * all. This function *only* looks for dirty buffers on the free list
! * -- dirty, pinned buffers will not be examined or returned.
! *
! * The caller must beware of the possibility that a buffer is no
! * longer dirty, or even contains a different page, by the time he
! * reaches it. If it no longer contains the same page it need not be
! * written, even if it is (again) dirty.
! *
! * Buffer pointers are stored into buffers[], and corresponding tags
! * into buftags[], both of size max_buffers. The function returns the
! * number of buffer IDs stored.
*/
int
! GetUnpinnedDirtyBuffers(BufferDesc **buffers, BufferTag *buftags,
int max_buffers)
{
int num_buffer_dirty = 0;
! Buffer buf;
/*
! * Start at the least-recently used end of the free list and work
! * backward. We prefer to flush pages that haven't been used in a
! * while, because it reduces the chance that they will be dirtied
! * once again in the near future.
*/
! for (buf = listControl->freeListTail;
! BufferIsValid(buf);
! buf = BufferGetBufferDescriptor(buf)->freePrev)
{
! BufferDesc *buf_desc = BufferGetBufferDescriptor(buf);
! Assert(!BufferIsPinned(buf));
! if (buf_desc->flags & BM_VALID &&
! ((buf_desc->flags & BM_DIRTY) || buf_desc->cntxDirty))
{
! /*
! * Buffer is valid, dirty, and unreferenced, so add it to
! * the list of results.
! */
! buffers[num_buffer_dirty] = buf_desc;
! buftags[num_buffer_dirty] = buf_desc->tag;
! num_buffer_dirty++;
! if (num_buffer_dirty >= max_buffers)
! break;
}
}
return num_buffer_dirty;
}
/*
! * Returns a list of all the dirty buffers in the pool. The buffers
! * are not returned in any meaningful order. Buffer pointers are
! * stored into buffers[], and corresponding tags into buftags[] (both
! * arrays should be of size NBuffers, as that is the maximum number of
! * buffers that will be returned). The function returns the number of
! * buffer IDs stored.
*
! * The caller must beware of the possibility that a buffer is no
! * longer dirty, or even contains a different page, by the time he
! * reaches it. If it no longer contains the same page it need not be
! * written, even if it is (again) dirty.
*/
! int
! GetAllDirtyBuffers(BufferDesc **buffers, BufferTag *buftags)
{
! int num_results = 0;
! int i;
/*
! * We need to examine all the buffers in the pool anyway, so we
! * just do a sequential scan. This is friendlier on the CPU cache,
! * although it means we won't end up returning the buffers in a
! * meaningful order.
*/
! for (i = 0; i < NBuffers; i++)
{
! BufferDesc *buf_desc = &BufferDescriptors[i];
! if (buf_desc->flags & BM_VALID &&
! ((buf_desc->flags & BM_DIRTY) || buf_desc->cntxDirty))
{
! /*
! * Buffer is valid, dirty, and unreferenced, so add it to
! * the list of results.
! */
! buffers[num_results] = buf_desc;
! buftags[num_results] = buf_desc->tag;
! num_results++;
}
}
!
! Assert(num_results <= NBuffers);
! return num_results;
}
Index: src/backend/utils/misc/guc.c
===================================================================
RCS file: /var/lib/cvs/pgsql/src/backend/utils/misc/guc.c,v
retrieving revision 1.252
diff -c -r1.252 guc.c
*** src/backend/utils/misc/guc.c 1 Jan 2005 05:43:08 -0000 1.252
--- src/backend/utils/misc/guc.c 24 Jan 2005 00:20:14 -0000
***************
*** 77,83 ****
extern DLLIMPORT bool check_function_bodies;
extern int CommitDelay;
extern int CommitSiblings;
- extern int DebugSharedBuffers;
extern char *default_tablespace;
static const char *assign_log_destination(const char *value,
--- 77,82 ----
***************
*** 1231,1245 ****
},
{
- {"debug_shared_buffers", PGC_POSTMASTER, STATS_MONITORING,
- gettext_noop("Interval to report shared buffer status in seconds"),
- NULL
- },
- &DebugSharedBuffers,
- 0, 0, 600, NULL, NULL
- },
-
- {
{"bgwriter_delay", PGC_SIGHUP, RESOURCES,
gettext_noop("Background writer sleep time between rounds in milliseconds"),
NULL
--- 1230,1235 ----
***************
*** 1249,1263 ****
},
{
- {"bgwriter_percent", PGC_SIGHUP, RESOURCES,
- gettext_noop("Background writer percentage of dirty buffers to flush per round"),
- NULL
- },
- &BgWriterPercent,
- 1, 0, 100, NULL, NULL
- },
-
- {
{"bgwriter_maxpages", PGC_SIGHUP, RESOURCES,
gettext_noop("Background writer maximum number of pages to flush per round"),
NULL
--- 1239,1244 ----
Index: src/backend/utils/misc/postgresql.conf.sample
===================================================================
RCS file: /var/lib/cvs/pgsql/src/backend/utils/misc/postgresql.conf.sample,v
retrieving revision 1.134
diff -c -r1.134 postgresql.conf.sample
*** src/backend/utils/misc/postgresql.conf.sample 5 Nov 2004 19:16:16 -0000 1.134
--- src/backend/utils/misc/postgresql.conf.sample 24 Jan 2005 00:20:38 -0000
***************
*** 99,105 ****
# - Background writer -
#bgwriter_delay = 200 # 10-10000 milliseconds between rounds
- #bgwriter_percent = 1 # 0-100% of dirty buffers in each round
#bgwriter_maxpages = 100 # 0-1000 buffers max per round
--- 99,104 ----
Index: src/include/postmaster/bgwriter.h
===================================================================
RCS file: /var/lib/cvs/pgsql/src/include/postmaster/bgwriter.h,v
retrieving revision 1.4
diff -c -r1.4 bgwriter.h
*** src/include/postmaster/bgwriter.h 31 Dec 2004 22:03:39 -0000 1.4
--- src/include/postmaster/bgwriter.h 24 Jan 2005 00:21:24 -0000
***************
*** 18,24 ****
/* GUC options */
extern int BgWriterDelay;
- extern int BgWriterPercent;
extern int BgWriterMaxPages;
extern int CheckPointTimeout;
extern int CheckPointWarning;
--- 18,23 ----
Index: src/include/storage/buf_internals.h
===================================================================
RCS file: /var/lib/cvs/pgsql/src/include/storage/buf_internals.h,v
retrieving revision 1.75
diff -c -r1.75 buf_internals.h
*** src/include/storage/buf_internals.h 31 Dec 2004 22:03:42 -0000 1.75
--- src/include/storage/buf_internals.h 24 Jan 2005 01:24:54 -0000
***************
*** 26,37 ****
*/
#define BM_DIRTY (1 << 0) /* data needs writing */
#define BM_VALID (1 << 1) /* data is valid */
! #define BM_IO_IN_PROGRESS (1 << 2) /* read or write in
* progress */
! #define BM_IO_ERROR (1 << 3) /* previous I/O failed */
! #define BM_JUST_DIRTIED (1 << 4) /* dirtied since write
* started */
! #define BM_PIN_COUNT_WAITER (1 << 5) /* have waiter for sole
* pin */
typedef bits16 BufFlags;
--- 26,38 ----
*/
#define BM_DIRTY (1 << 0) /* data needs writing */
#define BM_VALID (1 << 1) /* data is valid */
! #define BM_DELETED (1 << 2) /* buffer not in hash table */
! #define BM_IO_IN_PROGRESS (1 << 3) /* read or write in
* progress */
! #define BM_IO_ERROR (1 << 4) /* previous I/O failed */
! #define BM_JUST_DIRTIED (1 << 5) /* dirtied since write
* started */
! #define BM_PIN_COUNT_WAITER (1 << 6) /* have waiter for sole
* pin */
typedef bits16 BufFlags;
***************
*** 79,85 ****
*/
typedef struct sbufdesc
{
! Buffer bufNext; /* link in freelist chain */
SHMEM_OFFSET data; /* pointer to data in buf pool */
/* tag and id must be together for table lookup (still true?) */
--- 80,87 ----
*/
typedef struct sbufdesc
{
! Buffer freeNext; /* links in freelist chain */
! Buffer freePrev;
SHMEM_OFFSET data; /* pointer to data in buf pool */
/* tag and id must be together for table lookup (still true?) */
***************
*** 106,163 ****
} BufferDesc;
#define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
/* entry for buffer lookup hashtable */
typedef struct
{
BufferTag key; /* Tag of a disk page */
! int id; /* CDB id of associated CDB */
} BufferLookupEnt;
/*
! * Definitions for the buffer replacement strategy
! */
! #define STRAT_LIST_UNUSED (-1)
! #define STRAT_LIST_B1 0
! #define STRAT_LIST_T1 1
! #define STRAT_LIST_T2 2
! #define STRAT_LIST_B2 3
! #define STRAT_NUM_LISTS 4
!
! /*
! * The Cache Directory Block (CDB) of the Adaptive Replacement Cache (ARC)
*/
typedef struct
{
! int prev; /* list links */
! int next;
! short list; /* ID of list it is currently in */
! bool t1_vacuum; /* t => present only because of VACUUM */
! TransactionId t1_xid; /* the xid this entry went onto T1 */
! BufferTag buf_tag; /* page identifier */
! int buf_id; /* currently assigned data buffer, or -1 */
! } BufferStrategyCDB;
!
! /*
! * The shared ARC control information.
! */
! typedef struct
! {
! int target_T1_size; /* What T1 size are we aiming for */
! int listUnusedCDB; /* All unused StrategyCDB */
! int listHead[STRAT_NUM_LISTS]; /* ARC lists B1, T1, T2
! * and B2 */
! int listTail[STRAT_NUM_LISTS];
! int listSize[STRAT_NUM_LISTS];
! Buffer listFreeBuffers; /* List of unused buffers */
!
! long num_lookup; /* Some hit statistics */
! long num_hit[STRAT_NUM_LISTS];
! time_t stat_report;
!
! /* Array of CDB's starts here */
! BufferStrategyCDB cdb[1]; /* VARIABLE SIZE ARRAY */
! } BufferStrategyControl;
/* counters in buf_init.c */
--- 108,130 ----
} BufferDesc;
#define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
+ #define BufferGetBufferDescriptor(buf) (&BufferDescriptors[buf - 1])
/* entry for buffer lookup hashtable */
typedef struct
{
BufferTag key; /* Tag of a disk page */
! Buffer id; /* Buffer ID for disk page */
} BufferLookupEnt;
/*
! * Shared control information for free list
*/
typedef struct
{
! Buffer freeListHead; /* Head of LRU -- most recently used */
! Buffer freeListTail; /* Tail of LRU -- least recently used */
! } FreeListControl;
/* counters in buf_init.c */
***************
*** 176,197 ****
/* Internal routines: only called by bufmgr */
/* freelist.c */
! extern BufferDesc *StrategyBufferLookup(BufferTag *tagPtr, bool recheck,
! int *cdb_found_index);
! extern BufferDesc *StrategyGetBuffer(int *cdb_replace_index);
! extern void StrategyReplaceBuffer(BufferDesc *buf, BufferTag *newTag,
! int cdb_found_index, int cdb_replace_index);
! extern void StrategyInvalidateBuffer(BufferDesc *buf);
! extern void StrategyHintVacuum(bool vacuum_active);
! extern int StrategyDirtyBufferList(BufferDesc **buffers, BufferTag *buftags,
! int max_buffers);
! extern void StrategyInitialize(bool init);
/* buf_table.c */
extern void InitBufTable(int size);
! extern int BufTableLookup(BufferTag *tagPtr);
! extern void BufTableInsert(BufferTag *tagPtr, int cdb_id);
! extern void BufTableDelete(BufferTag *tagPtr);
/* bufmgr.c */
extern BufferDesc *BufferDescriptors;
--- 143,164 ----
/* Internal routines: only called by bufmgr */
/* freelist.c */
! extern void SetVacuumHint(bool vacuum_active);
! extern int GetUnpinnedDirtyBuffers(BufferDesc **buffers,
! BufferTag *buftags, int max_buffers);
! extern int GetAllDirtyBuffers(BufferDesc **buffers, BufferTag *buftags);
! extern void InitFreeList(void);
! extern void PinBuffer(BufferDesc *buf, bool fixOwner);
! extern void UnpinBuffer(BufferDesc *buf, bool fixOwner);
! extern BufferDesc *GetFreeBuffer(void);
!
/* buf_table.c */
extern void InitBufTable(int size);
! extern BufferDesc *BufTableLookup(BufferTag *tagPtr);
! extern void BufTableInsert(BufferDesc *buf);
! extern void BufTableInsert(BufferDesc *buf);
! extern void BufTableDelete(BufferDesc *buf);
/* bufmgr.c */
extern BufferDesc *BufferDescriptors;
Index: src/include/storage/bufmgr.h
===================================================================
RCS file: /var/lib/cvs/pgsql/src/include/storage/bufmgr.h,v
retrieving revision 1.89
diff -c -r1.89 bufmgr.h
*** src/include/storage/bufmgr.h 31 Dec 2004 22:03:42 -0000 1.89
--- src/include/storage/bufmgr.h 24 Jan 2005 00:18:49 -0000
***************
*** 150,156 ****
extern void AbortBufferIO(void);
extern void BufmgrCommit(void);
! extern int BufferSync(int percent, int maxpages);
extern void InitLocalBuffer(void);
--- 150,156 ----
extern void AbortBufferIO(void);
extern void BufmgrCommit(void);
! extern int BufferSync(int maxpages);
extern void InitLocalBuffer(void);