Re: patch for new feature: Buffer Cache Hibernation

From: Mitsuru IWASAKI <iwasaki(at)jp(dot)FreeBSD(dot)org>
To: greg(at)2ndQuadrant(dot)com
Cc: pgsql-hackers(at)postgresql(dot)org
Subject: Re: patch for new feature: Buffer Cache Hibernation
Date: 2011-06-05 12:50:14
Message-ID: 20110605.215014.65174283.iwasaki@jp.FreeBSD.org
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers

Hi,

> On 05/07/2011 03:32 AM, Mitsuru IWASAKI wrote:
> > For 1, I've just finish my work. The latest patch is available at:
> > http://people.freebsd.org/~iwasaki/postgres/buffer-cache-hibernation-postgresql-20110507.patch
> >
>
> Reminder here--we can't accept code based on it being published to a web
> page. You'll need to e-mail it to the pgsql-hackers mailing list to be
> considered for the next PostgreSQL CommitFest, which is starting in a
> few weeks. Code submitted to the mailing list is considered a release
> of it to the project under the PostgreSQL license, which we can't just
> assume for things when given only a URL to them.

Sorry about that, but I had enough time to revise my patches this week-end.
I attached the patches in this mail, and will update CommitFest page soon.

> Also, you suggested you were out of time to work on this. If that's the
> case, we'd like to know that so we don't keep cc'ing you about things in
> expectation of an answer. Someone else may pick this up as a project to
> continue working on. But it's going to need a fair amount of revision
> before it matches what people want here, and I'm not sure how much of
> what you've written is going to end up in any commit that may happen
> from this idea.

It seems that I don't have enough time to complete this work.
You don't need to keep cc'ing me, and I'm very happy if postgres to be
the first DBMS which support buffer cache hibernation feature.

Thanks!

diff --git src/backend/access/transam/xlog.c src/backend/access/transam/xlog.c
index b0e4c41..7a3a207 100644
--- src/backend/access/transam/xlog.c
+++ src/backend/access/transam/xlog.c
@@ -4834,6 +4834,19 @@ ReadControlFile(void)
#endif
}

+bool
+GetControlFile(ControlFileData *controlFile)
+{
+ if (ControlFile == NULL)
+ {
+ return false;
+ }
+
+ memcpy(controlFile, ControlFile, sizeof(ControlFileData));
+
+ return true;
+}
+
void
UpdateControlFile(void)
{
diff --git src/backend/bootstrap/bootstrap.c src/backend/bootstrap/bootstrap.c
index fc093cc..7ecf6bb 100644
--- src/backend/bootstrap/bootstrap.c
+++ src/backend/bootstrap/bootstrap.c
@@ -360,6 +360,15 @@ AuxiliaryProcessMain(int argc, char *argv[])
BaseInit();

/*
+ * Only StartupProcess can call ResumeBufferCacheHibernation() after
+ * InitFileAccess() and smgrinit().
+ */
+ if (auxType == StartupProcess && BufferCacheHibernationLevel > 0)
+ {
+ ResumeBufferCacheHibernation();
+ }
+
+ /*
* When we are an auxiliary process, we aren't going to do the full
* InitPostgres pushups, but there are a couple of things that need to get
* lit up even in an auxiliary process.
diff --git src/backend/storage/buffer/buf_init.c src/backend/storage/buffer/buf_init.c
index dadb49d..52eb51a 100644
--- src/backend/storage/buffer/buf_init.c
+++ src/backend/storage/buffer/buf_init.c
@@ -127,6 +127,14 @@ InitBufferPool(void)

/* Init other shared buffer-management stuff */
StrategyInitialize(!foundDescs);
+
+ if (BufferCacheHibernationLevel > 0)
+ {
+ ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS,
+ (char *)BufferDescriptors, sizeof(BufferDesc), NBuffers);
+ ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS,
+ (char *)BufferBlocks, BLCKSZ, NBuffers);
+ }
}

/*
diff --git src/backend/storage/buffer/bufmgr.c src/backend/storage/buffer/bufmgr.c
index f96685d..dba8ebf 100644
--- src/backend/storage/buffer/bufmgr.c
+++ src/backend/storage/buffer/bufmgr.c
@@ -31,6 +31,7 @@
#include "postgres.h"

#include <sys/file.h>
+#include <sys/stat.h>
#include <unistd.h>

#include "catalog/catalog.h"
@@ -61,6 +62,13 @@
#define BUF_WRITTEN 0x01
#define BUF_REUSABLE 0x02

+/*
+ * Buffer Cache Hibernation stuff.
+ */
+/* enable this to debug buffer cache hibernation. */
+#if 0
+#define DEBUG_BUFFER_CACHE_HIBERNATION
+#endif

/* GUC variables */
bool zero_damaged_pages = false;
@@ -765,6 +773,16 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
}
}

+#ifdef DEBUG_BUFFER_CACHE_HIBERNATION
+ elog(DEBUG5,
+ "alloc [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d",
+ buf->buf_id, buf->flags, buf->usage_count, buf->refcount,
+ buf->wait_backend_pid, buf->freeNext,
+ newHash, newTag.rnode.spcNode,
+ newTag.rnode.dbNode, newTag.rnode.relNode,
+ newTag.forkNum, newTag.blockNum);
+#endif
+
return buf;
}

@@ -800,6 +818,16 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
* the old content is no longer relevant. (The usage_count starts out at
* 1 so that the buffer can survive one clock-sweep pass.)
*/
+#ifdef DEBUG_BUFFER_CACHE_HIBERNATION
+ elog(DEBUG5,
+ "rename [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d",
+ buf->buf_id, buf->flags, buf->usage_count, buf->refcount,
+ buf->wait_backend_pid, buf->freeNext,
+ oldHash, oldTag.rnode.spcNode,
+ oldTag.rnode.dbNode, oldTag.rnode.relNode,
+ oldTag.forkNum, oldTag.blockNum);
+#endif
+
buf->tag = newTag;
buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR | BM_PERMANENT);
if (relpersistence == RELPERSISTENCE_PERMANENT)
@@ -2772,3 +2800,716 @@ local_buffer_write_error_callback(void *arg)
pfree(path);
}
}
+
+/* ----------------------------------------------------------------
+ * Buffer Cache Hibernation support stuff
+ *
+ * Suspend/resume buffer cache data structure using hibernation files
+ * at shutdown/startup.
+ * ----------------------------------------------------------------
+ */
+
+int BufferCacheHibernationLevel = 0;
+
+#define BUFFER_CACHE_HIBERNATION_FILE_STRATEGY "global/pg_buffer_cache_hibernation_strategy"
+#define BUFFER_CACHE_HIBERNATION_FILE_DESCRIPTORS "global/pg_buffer_cache_hibernation_descriptors"
+#define BUFFER_CACHE_HIBERNATION_FILE_BLOCKS "global/pg_buffer_cache_hibernation_blocks"
+#define BUFFER_CACHE_HIBERNATION_FILE_CRC32 "global/pg_buffer_cache_hibernation_crc32"
+
+static struct
+{
+ char *hibernation_file;
+ char *data_ptr;
+ Size record_length;
+ Size num_records;
+ pg_crc32 crc;
+} BufferCacheHibernationData[] =
+{
+ /* BufferStrategyControl */
+ {
+ BUFFER_CACHE_HIBERNATION_FILE_STRATEGY,
+ NULL, 0, 0, 0
+ },
+
+ /* BufferDescriptors */
+ {
+ BUFFER_CACHE_HIBERNATION_FILE_DESCRIPTORS,
+ NULL, 0, 0, 0
+ },
+
+ /* BufferBlocks */
+ {
+ BUFFER_CACHE_HIBERNATION_FILE_BLOCKS,
+ NULL, 0, 0, 0
+ },
+
+ /* End-of-list marker */
+ {
+ NULL,
+ NULL, 0, 0, 0
+ },
+};
+
+static ControlFileData controlFile;
+static bool controlFileInitialized = false;
+
+/*
+ * AtProcExit_BufferCacheHibernation:
+ * store the buffer cache into hibernation files at shutdown.
+ */
+static void
+AtProcExit_BufferCacheHibernation(int code, Datum arg)
+{
+ BufferHibernationFileType id;
+ int i;
+ int fd;
+
+ if (BufferCacheHibernationLevel == 0)
+ {
+ return;
+ }
+
+ /*
+ * get the control file to check the system state validation.
+ */
+ if (GetControlFile(&controlFile) == false)
+ {
+ elog(WARNING,
+ "could not get control file, "
+ "aborting buffer cache hibernation");
+ return;
+ }
+
+ if (controlFile.state != DB_SHUTDOWNED)
+ {
+ elog(WARNING,
+ "database system was not shut down normally, "
+ "aborting buffer cache hibernation");
+ return;
+ }
+
+ /*
+ * suspend buffer cache data structure into hibernation files.
+ */
+ for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
+ {
+ Size record_length;
+ Size num_records;
+ char *ptr;
+ pg_crc32 crc;
+
+ if (BufferCacheHibernationLevel < 2 &&
+ id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
+ {
+ continue;
+ }
+
+ if (BufferCacheHibernationData[id].data_ptr == NULL ||
+ BufferCacheHibernationData[id].record_length == 0 ||
+ BufferCacheHibernationData[id].num_records == 0)
+ {
+ elog(WARNING,
+ "ResisterBufferCacheHibernation() was not called for %s",
+ BufferCacheHibernationData[id].hibernation_file);
+ goto cleanup;
+ }
+
+ fd = BasicOpenFile(BufferCacheHibernationData[id].hibernation_file,
+ O_CREAT | O_WRONLY | O_TRUNC | PG_BINARY, S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ {
+ elog(WARNING,
+ "could not open %s",
+ BufferCacheHibernationData[id].hibernation_file);
+ goto cleanup;
+ }
+
+ record_length = BufferCacheHibernationData[id].record_length;
+ num_records = BufferCacheHibernationData[id].num_records;
+
+ elog(NOTICE,
+ "buffer cache hibernate into %s",
+ BufferCacheHibernationData[id].hibernation_file);
+
+ INIT_CRC32(crc);
+ for (i = 0; i < num_records; i++)
+ {
+ ptr = BufferCacheHibernationData[id].data_ptr + (i * record_length);
+ if (write(fd, (void *)ptr, record_length) != record_length)
+ {
+ elog(WARNING,
+ "could not write %s",
+ BufferCacheHibernationData[id].hibernation_file);
+ goto cleanup;
+ }
+
+ COMP_CRC32(crc, ptr, record_length);
+ }
+
+ FIN_CRC32(crc);
+ close(fd);
+
+ BufferCacheHibernationData[id].crc = crc;
+ }
+
+ /*
+ * save the computed crc values for the validations at resuming.
+ */
+ fd = BasicOpenFile(BUFFER_CACHE_HIBERNATION_FILE_CRC32,
+ O_CREAT | O_WRONLY | O_TRUNC | PG_BINARY, S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ {
+ elog(WARNING,
+ "could not open %s",
+ BUFFER_CACHE_HIBERNATION_FILE_CRC32);
+ goto cleanup;
+ }
+
+ for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
+ {
+ pg_crc32 crc;
+
+ if (BufferCacheHibernationLevel < 2 &&
+ id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
+ {
+ continue;
+ }
+
+ crc = BufferCacheHibernationData[id].crc;
+ if (write(fd, (void *)&crc, sizeof(pg_crc32)) != sizeof(pg_crc32))
+ {
+ elog(WARNING,
+ "could not write %s for %s",
+ BUFFER_CACHE_HIBERNATION_FILE_CRC32,
+ BufferCacheHibernationData[id].hibernation_file);
+ goto cleanup;
+ }
+ }
+ close(fd);
+
+ elog(NOTICE,
+ "buffer cache suspended successfully");
+
+ return;
+
+cleanup:
+ for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
+ {
+ unlink(BufferCacheHibernationData[id].hibernation_file);
+ }
+
+ return;
+}
+
+/*
+ * ResisterBufferCacheHibernation:
+ * register the buffer cache data structure info.
+ */
+void
+ResisterBufferCacheHibernation(BufferHibernationFileType id, char *ptr, Size record_length, Size num_records)
+{
+ static bool first_time = true;
+
+ if (BufferCacheHibernationLevel == 0)
+ {
+ return;
+ }
+
+ if (id != BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY &&
+ id != BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS &&
+ id != BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
+ {
+ return;
+ }
+
+ if (first_time)
+ {
+ /*
+ * AtProcExit_BufferCacheHibernation to be called at shutdown.
+ */
+ on_shmem_exit(AtProcExit_BufferCacheHibernation, 0);
+ first_time = false;
+ }
+
+ /*
+ * get the control file to check the system state and
+ * hibernation file validations.
+ */
+ if (controlFileInitialized == false)
+ {
+ if (GetControlFile(&controlFile) == true)
+ {
+ controlFileInitialized = true;
+ }
+ }
+
+ BufferCacheHibernationData[id].data_ptr = ptr;
+ BufferCacheHibernationData[id].record_length = record_length;
+ BufferCacheHibernationData[id].num_records = num_records;
+}
+
+/*
+ * ResumeBufferCacheHibernation:
+ * resume the buffer cache from hibernation file at startup.
+ */
+void
+ResumeBufferCacheHibernation(void)
+{
+ BufferHibernationFileType id;
+ int i;
+ int fd;
+ Size num_records;
+ Size record_length;
+ char *buf_common;
+ int oldNBuffers;
+ bool buffer_block_processed;
+
+ if (BufferCacheHibernationLevel == 0)
+ {
+ return;
+ }
+
+ buf_common = NULL;
+ buffer_block_processed = false;
+
+ /*
+ * lock all buffer descriptors to prevent other processes from
+ * updating buffers.
+ */
+ for (i = 0; i < NBuffers; i++)
+ {
+ BufferDesc *buf;
+
+ buf = &BufferDescriptors[i];
+ LockBufHdr(buf);
+ }
+
+ /*
+ * get the control file to check the system state and
+ * hibernation file validations.
+ */
+ if (controlFileInitialized == false)
+ {
+ elog(WARNING,
+ "could not get control file, "
+ "aborting buffer cache hibernation");
+ goto cleanup;
+ }
+
+ if (controlFile.state != DB_SHUTDOWNED)
+ {
+ elog(WARNING,
+ "database system was not shut down normally, "
+ "aborting buffer cache hibernation");
+ goto cleanup;
+ }
+
+ /*
+ * read the crc values which was computed when the hibernation
+ * files were created.
+ */
+ fd = BasicOpenFile(BUFFER_CACHE_HIBERNATION_FILE_CRC32,
+ O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ {
+ elog(WARNING,
+ "could not open %s",
+ BUFFER_CACHE_HIBERNATION_FILE_CRC32);
+ goto cleanup;
+ }
+
+ for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
+ {
+ pg_crc32 crc;
+
+ if (BufferCacheHibernationLevel < 2 &&
+ id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
+ {
+ continue;
+ }
+
+ if (read(fd, (void *)&crc, sizeof(pg_crc32)) != sizeof(pg_crc32))
+ {
+ if (BufferCacheHibernationLevel == 2 &&
+ id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
+ {
+ /*
+ * if buffer_cache_hibernation_level changes 1 to 2,
+ * the crc value of buffer block hibernation file may not exist.
+ * just ignore it here.
+ */
+ continue;
+ }
+
+ elog(WARNING,
+ "could not read %s for %s",
+ BUFFER_CACHE_HIBERNATION_FILE_CRC32,
+ BufferCacheHibernationData[id].hibernation_file);
+ close(fd);
+ goto cleanup;
+ }
+ BufferCacheHibernationData[id].crc = crc;
+ }
+
+ close(fd);
+
+ /*
+ * allocate a buffer to read the contents of the hibernation files
+ * for validations.
+ */
+ record_length = 0;
+ for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
+ {
+ if (record_length < BufferCacheHibernationData[id].record_length)
+ {
+ record_length = BufferCacheHibernationData[id].record_length;
+ }
+ }
+
+ buf_common = malloc(record_length);
+ Assert(buf_common != NULL);
+
+ /* assume that the number of buffers have not changed. */
+ oldNBuffers = NBuffers;
+
+ /*
+ * check if all hibernation files are valid.
+ */
+ for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
+ {
+ struct stat sb;
+ pg_crc32 crc;
+
+ if (BufferCacheHibernationLevel < 2 &&
+ id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
+ {
+ continue;
+ }
+
+ if (BufferCacheHibernationData[id].data_ptr == NULL ||
+ BufferCacheHibernationData[id].record_length == 0 ||
+ BufferCacheHibernationData[id].num_records == 0)
+ {
+ elog(WARNING,
+ "ResisterBufferCacheHibernation() was not called for %s",
+ BufferCacheHibernationData[id].hibernation_file);
+ goto cleanup;
+ }
+
+ fd = BasicOpenFile(BufferCacheHibernationData[id].hibernation_file,
+ O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ {
+ if (BufferCacheHibernationLevel == 2 &&
+ id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
+ {
+ /*
+ * if buffer_cache_hibernation_level changes 1 to 2,
+ * the buffer block hibernation file may not exist.
+ * just ignore it here.
+ */
+ continue;
+ }
+
+ goto cleanup;
+ }
+
+ if (fstat(fd, &sb) < 0)
+ {
+ elog(WARNING,
+ "could not get stats of the buffer cache hibernation file: %s",
+ BufferCacheHibernationData[id].hibernation_file);
+ close(fd);
+ goto cleanup;
+ }
+
+ record_length = BufferCacheHibernationData[id].record_length;
+ num_records = BufferCacheHibernationData[id].num_records;
+
+ if (sb.st_size != (record_length * num_records))
+ {
+ /* The size of StrategyControl should be the same always. */
+ if (id == BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY ||
+ (sb.st_size % record_length) > 0)
+ {
+ elog(WARNING,
+ "size mismatch on the buffer cache hibernation file: %s",
+ BufferCacheHibernationData[id].hibernation_file);
+ close(fd);
+ goto cleanup;
+ }
+
+ /*
+ * The number of records of buffer descriptors and blocks
+ * should be the same.
+ */
+ if (oldNBuffers != NBuffers &&
+ oldNBuffers != (sb.st_size / record_length))
+ {
+ elog(WARNING,
+ "size mismatch on the buffer cache hibernation file: %s",
+ BufferCacheHibernationData[id].hibernation_file);
+ close(fd);
+ goto cleanup;
+ }
+
+ oldNBuffers = sb.st_size / record_length;
+
+ elog(NOTICE,
+ "shared_buffers have changed from %d to %d: %s",
+ oldNBuffers, NBuffers,
+ BufferCacheHibernationData[id].hibernation_file);
+
+ /* use the original size to compute CRC of the hibernation file. */
+ num_records = oldNBuffers;
+ }
+
+ if ((pg_time_t)sb.st_mtime < controlFile.time)
+ {
+ elog(WARNING,
+ "the hibernation file is older than control file: %s",
+ BufferCacheHibernationData[id].hibernation_file);
+ close(fd);
+ goto cleanup;
+ }
+
+ INIT_CRC32(crc);
+ for (i = 0; i < num_records; i++)
+ {
+ if (read(fd, (void *)buf_common, record_length) != record_length)
+ {
+ elog(WARNING,
+ "could not read the buffer cache hibernation file: %s",
+ BufferCacheHibernationData[id].hibernation_file);
+ close(fd);
+ goto cleanup;
+ }
+
+ COMP_CRC32(crc, buf_common, record_length);
+
+ /*
+ * buffer descriptors validations.
+ */
+ if (id == BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS)
+ {
+ BufferDesc *buf;
+ BufFlags abnormal_flags;
+
+ if (i >= NBuffers)
+ {
+ continue;
+ }
+
+ abnormal_flags = (BM_DIRTY | BM_IO_IN_PROGRESS | BM_IO_ERROR |
+ BM_JUST_DIRTIED | BM_PIN_COUNT_WAITER);
+
+ buf = (BufferDesc *)buf_common;
+
+ if (buf->flags & abnormal_flags)
+ {
+ elog(WARNING,
+ "abnormal flags in buffer descriptors: %d",
+ buf->flags);
+ close(fd);
+ goto cleanup;
+ }
+
+ if (buf->usage_count > BM_MAX_USAGE_COUNT)
+ {
+ elog(WARNING,
+ "invalid usage count in buffer descriptors: %d",
+ buf->usage_count);
+ close(fd);
+ goto cleanup;
+ }
+
+ if (buf->buf_id < 0 || buf->buf_id >= num_records)
+ {
+ elog(WARNING,
+ "invalid buffer id in buffer descriptors: %d",
+ buf->buf_id);
+ close(fd);
+ goto cleanup;
+ }
+ }
+ }
+
+ FIN_CRC32(crc);
+ close(fd);
+
+ if (!EQ_CRC32(BufferCacheHibernationData[id].crc, crc))
+ {
+ elog(WARNING,
+ "crc mismatch on the buffer cache hibernation file: %s",
+ BufferCacheHibernationData[id].hibernation_file);
+ close(fd);
+ goto cleanup;
+ }
+ }
+
+ /*
+ * resume the buffer cache data structure from the hibernation files.
+ */
+ for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
+ {
+ int fd;
+ char *ptr;
+
+ if (BufferCacheHibernationLevel < 2 &&
+ id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
+ {
+ continue;
+ }
+
+ record_length = BufferCacheHibernationData[id].record_length;
+ num_records = BufferCacheHibernationData[id].num_records;
+
+ if (id != BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY)
+ {
+ /* use the smaller number of buffers. */
+ num_records = (oldNBuffers < NBuffers)? oldNBuffers : NBuffers;
+ }
+
+ fd = BasicOpenFile(BufferCacheHibernationData[id].hibernation_file,
+ O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ {
+ if (BufferCacheHibernationLevel == 2 &&
+ id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
+ {
+ /*
+ * if buffer_cache_hibernation_level changes 1 to 2,
+ * the buffer block hibernation file may not exist.
+ * just ignore it here.
+ */
+ continue;
+ }
+
+ goto cleanup;
+ }
+
+ elog(NOTICE,
+ "buffer cache resume from %s(%d bytes * %d records)",
+ BufferCacheHibernationData[id].hibernation_file,
+ record_length, num_records);
+
+ for (i = 0; i < num_records; i++)
+ {
+ ptr = BufferCacheHibernationData[id].data_ptr + (i * record_length);
+ read(fd, (void *)ptr, record_length);
+
+ /* Re-lock the buffer descriptor if necessary. */
+ if (id == BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS)
+ {
+ BufferDesc *buf;
+
+ buf = (BufferDesc *)ptr;
+ if (IsUnlockBufHdr(buf))
+ {
+ LockBufHdr(buf);
+ }
+ }
+ }
+
+ close(fd);
+
+ if (id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
+ {
+ buffer_block_processed = true;
+ }
+ }
+
+ if (buffer_block_processed == false)
+ {
+ /* we didn't use the buffer block hibernation file, so delete it now. */
+ id = BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS;
+ unlink(BufferCacheHibernationData[id].hibernation_file);
+ }
+
+ /*
+ * set the rest data structures (eg. lookup hashtable) up
+ * based on the buffer descriptors.
+ */
+ num_records = (oldNBuffers < NBuffers)? oldNBuffers : NBuffers;
+ for (i = 0; i < num_records; i++)
+ {
+ BufferDesc *buf;
+ BufferTag newTag;
+ uint32 newHash;
+ int buf_id;
+
+ buf = &BufferDescriptors[i];
+ if (buf->tag.rnode.spcNode == InvalidOid &&
+ buf->tag.rnode.dbNode == InvalidOid &&
+ buf->tag.rnode.relNode == InvalidOid)
+ {
+ continue;
+ }
+
+ INIT_BUFFERTAG(newTag, buf->tag.rnode, buf->tag.forkNum, buf->tag.blockNum);
+ newHash = BufTableHashCode(&newTag);
+
+ if (buffer_block_processed == false)
+ {
+ Block bufBlock;
+ SMgrRelation smgr;
+
+ /*
+ * re-read buffer block.
+ */
+ bufBlock = BufHdrGetBlock(buf);
+ smgr = smgropen(buf->tag.rnode, InvalidBackendId);
+ smgrread(smgr, newTag.forkNum, newTag.blockNum, (char *) bufBlock);
+ }
+
+ buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);
+ if (buf_id != -1)
+ {
+ /* the entry exists already, return it to the freelist. */
+ buf->refcount = 0;
+ buf->flags = 0;
+ InvalidateBuffer(buf);
+ continue;
+ }
+
+ /* clear wait_backend_pid because the process was terminated already. */
+ buf->wait_backend_pid = 0;
+
+#ifdef DEBUG_BUFFER_CACHE_HIBERNATION
+ elog(DEBUG5,
+ "resume [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d",
+ buf->buf_id, buf->flags, buf->usage_count, buf->refcount,
+ buf->wait_backend_pid, buf->freeNext,
+ newHash, newTag.rnode.spcNode,
+ newTag.rnode.dbNode, newTag.rnode.relNode,
+ newTag.forkNum, newTag.blockNum);
+#endif
+ }
+
+ /*
+ * adjust StrategyControl based on the change of shared_buffers.
+ */
+ if (oldNBuffers != NBuffers)
+ {
+ AdjustStrategyControl(oldNBuffers);
+ }
+
+ elog(NOTICE,
+ "buffer cache resumed successfully");
+
+cleanup:
+ for (i = 0; i < NBuffers; i++)
+ {
+ BufferDesc *buf;
+
+ buf = &BufferDescriptors[i];
+ UnlockBufHdr(buf);
+ }
+
+ if (buf_common != NULL)
+ {
+ free(buf_common);
+ }
+
+ return;
+}
diff --git src/backend/storage/buffer/freelist.c src/backend/storage/buffer/freelist.c
index bf9903b..ffc101d 100644
--- src/backend/storage/buffer/freelist.c
+++ src/backend/storage/buffer/freelist.c
@@ -347,6 +347,12 @@ StrategyInitialize(bool init)
}
else
Assert(!init);
+
+ if (BufferCacheHibernationLevel > 0)
+ {
+ ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY,
+ (char *)StrategyControl, sizeof(BufferStrategyControl), 1);
+ }
}


@@ -521,3 +527,47 @@ StrategyRejectBuffer(BufferAccessStrategy strategy, volatile BufferDesc *buf)

return true;
}
+
+/*
+ * AdjustStrategyControl -- adjust the member variables of StrategyControl
+ *
+ * If the shared_buffers setting had changed, restored StrategyControl
+ * needs to be adjusted for in both cases of shrinking and enlarging.
+ * This is called only from bufmgr.c:ResumeBufferCacheHibernation().
+ */
+void
+AdjustStrategyControl(int oldNBuffers)
+{
+ if (oldNBuffers == NBuffers)
+ {
+ return;
+ }
+
+ /* enlarge or shrink the free buffer based on current NBuffers. */
+ StrategyControl->lastFreeBuffer = NBuffers - 1;
+
+ /* shared_buffers shrunk. */
+ if (oldNBuffers > NBuffers)
+ {
+ if (StrategyControl->nextVictimBuffer >= NBuffers)
+ {
+ /* set the tail of buffers. */
+ StrategyControl->nextVictimBuffer = NBuffers - 1;
+ }
+
+ if (StrategyControl->firstFreeBuffer >= NBuffers)
+ {
+ /* set FREENEXT_END_OF_LIST(-1). */
+ StrategyControl->firstFreeBuffer = FREENEXT_END_OF_LIST;
+ }
+ }
+ else
+ /* shared_buffers enlarged. */
+ {
+ if (StrategyControl->firstFreeBuffer < 0)
+ {
+ /* set the next entry of the tail of old buffers. */
+ StrategyControl->firstFreeBuffer = oldNBuffers;
+ }
+ }
+}
diff --git src/backend/utils/misc/guc.c src/backend/utils/misc/guc.c
index 738e215..5affc6e 100644
--- src/backend/utils/misc/guc.c
+++ src/backend/utils/misc/guc.c
@@ -2361,6 +2361,18 @@ static struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},

+ {
+ {"buffer_cache_hibernation_level", PGC_POSTMASTER, UNGROUPED,
+ gettext_noop("Sets buffer cache hibernation level."),
+ gettext_noop("0 to disable(default), "
+ "1 for saving buffer descriptors only(recommended), "
+ "2 for saving buffer descriptors and buffer blocks(slower at shutdown).")
+ },
+ &BufferCacheHibernationLevel,
+ 0, 0, 2,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git src/backend/utils/misc/postgresql.conf.sample src/backend/utils/misc/postgresql.conf.sample
index b8a1582..44b6ff3 100644
--- src/backend/utils/misc/postgresql.conf.sample
+++ src/backend/utils/misc/postgresql.conf.sample
@@ -119,6 +119,17 @@
#maintenance_work_mem = 16MB # min 1MB
#max_stack_depth = 2MB # min 100kB

+
+# Buffer Cache Hibernation:
+# Suspend/resume buffer cache data structure using hibernation files
+# at shutdown/startup.
+#buffer_cache_hibernation_level = 0 # Sets buffer cache hibernation level.
+ # 0 to disable(default),
+ # 1 for saving buffer descriptors only
+ # (recommended),
+ # 2 for saving buffer descriptors and
+ # buffer blocks(slower at shutdown).
+
# - Kernel Resource Usage -

#max_files_per_process = 1000 # min 25
diff --git src/include/access/xlog.h src/include/access/xlog.h
index 7056fd6..7a9fb99 100644
--- src/include/access/xlog.h
+++ src/include/access/xlog.h
@@ -13,6 +13,7 @@

#include "access/rmgr.h"
#include "access/xlogdefs.h"
+#include "catalog/pg_control.h"
#include "lib/stringinfo.h"
#include "storage/buf.h"
#include "utils/pg_crc.h"
@@ -294,6 +295,7 @@ extern bool XLogInsertAllowed(void);
extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream);
extern XLogRecPtr GetXLogReplayRecPtr(void);

+extern bool GetControlFile(ControlFileData *controlFile);
extern void UpdateControlFile(void);
extern uint64 GetSystemIdentifier(void);
extern Size XLOGShmemSize(void);
diff --git src/include/storage/buf_internals.h src/include/storage/buf_internals.h
index b7d4ea5..d537ef1 100644
--- src/include/storage/buf_internals.h
+++ src/include/storage/buf_internals.h
@@ -167,6 +167,7 @@ typedef struct sbufdesc
*/
#define LockBufHdr(bufHdr) SpinLockAcquire(&(bufHdr)->buf_hdr_lock)
#define UnlockBufHdr(bufHdr) SpinLockRelease(&(bufHdr)->buf_hdr_lock)
+#define IsUnlockBufHdr(bufHdr) SpinLockFree(&(bufHdr)->buf_hdr_lock)


/* in buf_init.c */
@@ -190,6 +191,7 @@ extern bool StrategyRejectBuffer(BufferAccessStrategy strategy,
extern int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc);
extern Size StrategyShmemSize(void);
extern void StrategyInitialize(bool init);
+extern void AdjustStrategyControl(int oldNBuffers);

/* buf_table.c */
extern Size BufTableShmemSize(int size);
diff --git src/include/storage/bufmgr.h src/include/storage/bufmgr.h
index b8fc87e..ddfeb9d 100644
--- src/include/storage/bufmgr.h
+++ src/include/storage/bufmgr.h
@@ -211,6 +211,20 @@ extern void BgBufferSync(void);

extern void AtProcExit_LocalBuffers(void);

+/* buffer cache hibernation support stuff */
+extern int BufferCacheHibernationLevel;
+
+typedef enum BufferHibernationFileType
+{
+ BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY,
+ BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS,
+ BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS
+} BufferHibernationFileType;
+
+extern void ResisterBufferCacheHibernation(BufferHibernationFileType id,
+ char *ptr, Size record_length, Size num_records);
+extern void ResumeBufferCacheHibernation(void);
+
/* in freelist.c */
extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype);
extern void FreeAccessStrategy(BufferAccessStrategy strategy);

In response to

Responses

Browse pgsql-hackers by date

  From Date Subject
Next Message Gurjeet Singh 2011-06-05 14:21:36 Re: Review: psql include file using relative path
Previous Message Robert Haas 2011-06-05 03:03:45 heap vacuum & cleanup locks