Re: patch for new feature: Buffer Cache Hibernation

From: Bruce Momjian <bruce(at)momjian(dot)us>
To: Mitsuru IWASAKI <iwasaki(at)jp(dot)FreeBSD(dot)org>
Cc: greg(at)2ndQuadrant(dot)com, pgsql-hackers(at)postgresql(dot)org
Subject: Re: patch for new feature: Buffer Cache Hibernation
Date: 2011-10-14 00:02:17
Message-ID: 201110140002.p9E02HB11710@momjian.us
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers


Should this be marked as TODO?

---------------------------------------------------------------------------

Mitsuru IWASAKI wrote:
> Hi,
>
> > On 05/07/2011 03:32 AM, Mitsuru IWASAKI wrote:
> > > For 1, I've just finish my work. The latest patch is available at:
> > > http://people.freebsd.org/~iwasaki/postgres/buffer-cache-hibernation-postgresql-20110507.patch
> > >
> >
> > Reminder here--we can't accept code based on it being published to a web
> > page. You'll need to e-mail it to the pgsql-hackers mailing list to be
> > considered for the next PostgreSQL CommitFest, which is starting in a
> > few weeks. Code submitted to the mailing list is considered a release
> > of it to the project under the PostgreSQL license, which we can't just
> > assume for things when given only a URL to them.
>
> Sorry about that, but I had enough time to revise my patches this week-end.
> I attached the patches in this mail, and will update CommitFest page soon.
>
> > Also, you suggested you were out of time to work on this. If that's the
> > case, we'd like to know that so we don't keep cc'ing you about things in
> > expectation of an answer. Someone else may pick this up as a project to
> > continue working on. But it's going to need a fair amount of revision
> > before it matches what people want here, and I'm not sure how much of
> > what you've written is going to end up in any commit that may happen
> > from this idea.
>
> It seems that I don't have enough time to complete this work.
> You don't need to keep cc'ing me, and I'm very happy if postgres to be
> the first DBMS which support buffer cache hibernation feature.
>
> Thanks!
>
>
> diff --git src/backend/access/transam/xlog.c src/backend/access/transam/xlog.c
> index b0e4c41..7a3a207 100644
> --- src/backend/access/transam/xlog.c
> +++ src/backend/access/transam/xlog.c
> @@ -4834,6 +4834,19 @@ ReadControlFile(void)
> #endif
> }
>
> +bool
> +GetControlFile(ControlFileData *controlFile)
> +{
> + if (ControlFile == NULL)
> + {
> + return false;
> + }
> +
> + memcpy(controlFile, ControlFile, sizeof(ControlFileData));
> +
> + return true;
> +}
> +
> void
> UpdateControlFile(void)
> {
> diff --git src/backend/bootstrap/bootstrap.c src/backend/bootstrap/bootstrap.c
> index fc093cc..7ecf6bb 100644
> --- src/backend/bootstrap/bootstrap.c
> +++ src/backend/bootstrap/bootstrap.c
> @@ -360,6 +360,15 @@ AuxiliaryProcessMain(int argc, char *argv[])
> BaseInit();
>
> /*
> + * Only StartupProcess can call ResumeBufferCacheHibernation() after
> + * InitFileAccess() and smgrinit().
> + */
> + if (auxType == StartupProcess && BufferCacheHibernationLevel > 0)
> + {
> + ResumeBufferCacheHibernation();
> + }
> +
> + /*
> * When we are an auxiliary process, we aren't going to do the full
> * InitPostgres pushups, but there are a couple of things that need to get
> * lit up even in an auxiliary process.
> diff --git src/backend/storage/buffer/buf_init.c src/backend/storage/buffer/buf_init.c
> index dadb49d..52eb51a 100644
> --- src/backend/storage/buffer/buf_init.c
> +++ src/backend/storage/buffer/buf_init.c
> @@ -127,6 +127,14 @@ InitBufferPool(void)
>
> /* Init other shared buffer-management stuff */
> StrategyInitialize(!foundDescs);
> +
> + if (BufferCacheHibernationLevel > 0)
> + {
> + ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS,
> + (char *)BufferDescriptors, sizeof(BufferDesc), NBuffers);
> + ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS,
> + (char *)BufferBlocks, BLCKSZ, NBuffers);
> + }
> }
>
> /*
> diff --git src/backend/storage/buffer/bufmgr.c src/backend/storage/buffer/bufmgr.c
> index f96685d..dba8ebf 100644
> --- src/backend/storage/buffer/bufmgr.c
> +++ src/backend/storage/buffer/bufmgr.c
> @@ -31,6 +31,7 @@
> #include "postgres.h"
>
> #include <sys/file.h>
> +#include <sys/stat.h>
> #include <unistd.h>
>
> #include "catalog/catalog.h"
> @@ -61,6 +62,13 @@
> #define BUF_WRITTEN 0x01
> #define BUF_REUSABLE 0x02
>
> +/*
> + * Buffer Cache Hibernation stuff.
> + */
> +/* enable this to debug buffer cache hibernation. */
> +#if 0
> +#define DEBUG_BUFFER_CACHE_HIBERNATION
> +#endif
>
> /* GUC variables */
> bool zero_damaged_pages = false;
> @@ -765,6 +773,16 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
> }
> }
>
> +#ifdef DEBUG_BUFFER_CACHE_HIBERNATION
> + elog(DEBUG5,
> + "alloc [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d",
> + buf->buf_id, buf->flags, buf->usage_count, buf->refcount,
> + buf->wait_backend_pid, buf->freeNext,
> + newHash, newTag.rnode.spcNode,
> + newTag.rnode.dbNode, newTag.rnode.relNode,
> + newTag.forkNum, newTag.blockNum);
> +#endif
> +
> return buf;
> }
>
> @@ -800,6 +818,16 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
> * the old content is no longer relevant. (The usage_count starts out at
> * 1 so that the buffer can survive one clock-sweep pass.)
> */
> +#ifdef DEBUG_BUFFER_CACHE_HIBERNATION
> + elog(DEBUG5,
> + "rename [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d",
> + buf->buf_id, buf->flags, buf->usage_count, buf->refcount,
> + buf->wait_backend_pid, buf->freeNext,
> + oldHash, oldTag.rnode.spcNode,
> + oldTag.rnode.dbNode, oldTag.rnode.relNode,
> + oldTag.forkNum, oldTag.blockNum);
> +#endif
> +
> buf->tag = newTag;
> buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR | BM_PERMANENT);
> if (relpersistence == RELPERSISTENCE_PERMANENT)
> @@ -2772,3 +2800,716 @@ local_buffer_write_error_callback(void *arg)
> pfree(path);
> }
> }
> +
> +/* ----------------------------------------------------------------
> + * Buffer Cache Hibernation support stuff
> + *
> + * Suspend/resume buffer cache data structure using hibernation files
> + * at shutdown/startup.
> + * ----------------------------------------------------------------
> + */
> +
> +int BufferCacheHibernationLevel = 0;
> +
> +#define BUFFER_CACHE_HIBERNATION_FILE_STRATEGY "global/pg_buffer_cache_hibernation_strategy"
> +#define BUFFER_CACHE_HIBERNATION_FILE_DESCRIPTORS "global/pg_buffer_cache_hibernation_descriptors"
> +#define BUFFER_CACHE_HIBERNATION_FILE_BLOCKS "global/pg_buffer_cache_hibernation_blocks"
> +#define BUFFER_CACHE_HIBERNATION_FILE_CRC32 "global/pg_buffer_cache_hibernation_crc32"
> +
> +static struct
> +{
> + char *hibernation_file;
> + char *data_ptr;
> + Size record_length;
> + Size num_records;
> + pg_crc32 crc;
> +} BufferCacheHibernationData[] =
> +{
> + /* BufferStrategyControl */
> + {
> + BUFFER_CACHE_HIBERNATION_FILE_STRATEGY,
> + NULL, 0, 0, 0
> + },
> +
> + /* BufferDescriptors */
> + {
> + BUFFER_CACHE_HIBERNATION_FILE_DESCRIPTORS,
> + NULL, 0, 0, 0
> + },
> +
> + /* BufferBlocks */
> + {
> + BUFFER_CACHE_HIBERNATION_FILE_BLOCKS,
> + NULL, 0, 0, 0
> + },
> +
> + /* End-of-list marker */
> + {
> + NULL,
> + NULL, 0, 0, 0
> + },
> +};
> +
> +static ControlFileData controlFile;
> +static bool controlFileInitialized = false;
> +
> +/*
> + * AtProcExit_BufferCacheHibernation:
> + * store the buffer cache into hibernation files at shutdown.
> + */
> +static void
> +AtProcExit_BufferCacheHibernation(int code, Datum arg)
> +{
> + BufferHibernationFileType id;
> + int i;
> + int fd;
> +
> + if (BufferCacheHibernationLevel == 0)
> + {
> + return;
> + }
> +
> + /*
> + * get the control file to check the system state validation.
> + */
> + if (GetControlFile(&controlFile) == false)
> + {
> + elog(WARNING,
> + "could not get control file, "
> + "aborting buffer cache hibernation");
> + return;
> + }
> +
> + if (controlFile.state != DB_SHUTDOWNED)
> + {
> + elog(WARNING,
> + "database system was not shut down normally, "
> + "aborting buffer cache hibernation");
> + return;
> + }
> +
> + /*
> + * suspend buffer cache data structure into hibernation files.
> + */
> + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
> + {
> + Size record_length;
> + Size num_records;
> + char *ptr;
> + pg_crc32 crc;
> +
> + if (BufferCacheHibernationLevel < 2 &&
> + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> + {
> + continue;
> + }
> +
> + if (BufferCacheHibernationData[id].data_ptr == NULL ||
> + BufferCacheHibernationData[id].record_length == 0 ||
> + BufferCacheHibernationData[id].num_records == 0)
> + {
> + elog(WARNING,
> + "ResisterBufferCacheHibernation() was not called for %s",
> + BufferCacheHibernationData[id].hibernation_file);
> + goto cleanup;
> + }
> +
> + fd = BasicOpenFile(BufferCacheHibernationData[id].hibernation_file,
> + O_CREAT | O_WRONLY | O_TRUNC | PG_BINARY, S_IRUSR | S_IWUSR);
> + if (fd < 0)
> + {
> + elog(WARNING,
> + "could not open %s",
> + BufferCacheHibernationData[id].hibernation_file);
> + goto cleanup;
> + }
> +
> + record_length = BufferCacheHibernationData[id].record_length;
> + num_records = BufferCacheHibernationData[id].num_records;
> +
> + elog(NOTICE,
> + "buffer cache hibernate into %s",
> + BufferCacheHibernationData[id].hibernation_file);
> +
> + INIT_CRC32(crc);
> + for (i = 0; i < num_records; i++)
> + {
> + ptr = BufferCacheHibernationData[id].data_ptr + (i * record_length);
> + if (write(fd, (void *)ptr, record_length) != record_length)
> + {
> + elog(WARNING,
> + "could not write %s",
> + BufferCacheHibernationData[id].hibernation_file);
> + goto cleanup;
> + }
> +
> + COMP_CRC32(crc, ptr, record_length);
> + }
> +
> + FIN_CRC32(crc);
> + close(fd);
> +
> + BufferCacheHibernationData[id].crc = crc;
> + }
> +
> + /*
> + * save the computed crc values for the validations at resuming.
> + */
> + fd = BasicOpenFile(BUFFER_CACHE_HIBERNATION_FILE_CRC32,
> + O_CREAT | O_WRONLY | O_TRUNC | PG_BINARY, S_IRUSR | S_IWUSR);
> + if (fd < 0)
> + {
> + elog(WARNING,
> + "could not open %s",
> + BUFFER_CACHE_HIBERNATION_FILE_CRC32);
> + goto cleanup;
> + }
> +
> + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
> + {
> + pg_crc32 crc;
> +
> + if (BufferCacheHibernationLevel < 2 &&
> + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> + {
> + continue;
> + }
> +
> + crc = BufferCacheHibernationData[id].crc;
> + if (write(fd, (void *)&crc, sizeof(pg_crc32)) != sizeof(pg_crc32))
> + {
> + elog(WARNING,
> + "could not write %s for %s",
> + BUFFER_CACHE_HIBERNATION_FILE_CRC32,
> + BufferCacheHibernationData[id].hibernation_file);
> + goto cleanup;
> + }
> + }
> + close(fd);
> +
> + elog(NOTICE,
> + "buffer cache suspended successfully");
> +
> + return;
> +
> +cleanup:
> + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
> + {
> + unlink(BufferCacheHibernationData[id].hibernation_file);
> + }
> +
> + return;
> +}
> +
> +/*
> + * ResisterBufferCacheHibernation:
> + * register the buffer cache data structure info.
> + */
> +void
> +ResisterBufferCacheHibernation(BufferHibernationFileType id, char *ptr, Size record_length, Size num_records)
> +{
> + static bool first_time = true;
> +
> + if (BufferCacheHibernationLevel == 0)
> + {
> + return;
> + }
> +
> + if (id != BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY &&
> + id != BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS &&
> + id != BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> + {
> + return;
> + }
> +
> + if (first_time)
> + {
> + /*
> + * AtProcExit_BufferCacheHibernation to be called at shutdown.
> + */
> + on_shmem_exit(AtProcExit_BufferCacheHibernation, 0);
> + first_time = false;
> + }
> +
> + /*
> + * get the control file to check the system state and
> + * hibernation file validations.
> + */
> + if (controlFileInitialized == false)
> + {
> + if (GetControlFile(&controlFile) == true)
> + {
> + controlFileInitialized = true;
> + }
> + }
> +
> + BufferCacheHibernationData[id].data_ptr = ptr;
> + BufferCacheHibernationData[id].record_length = record_length;
> + BufferCacheHibernationData[id].num_records = num_records;
> +}
> +
> +/*
> + * ResumeBufferCacheHibernation:
> + * resume the buffer cache from hibernation file at startup.
> + */
> +void
> +ResumeBufferCacheHibernation(void)
> +{
> + BufferHibernationFileType id;
> + int i;
> + int fd;
> + Size num_records;
> + Size record_length;
> + char *buf_common;
> + int oldNBuffers;
> + bool buffer_block_processed;
> +
> + if (BufferCacheHibernationLevel == 0)
> + {
> + return;
> + }
> +
> + buf_common = NULL;
> + buffer_block_processed = false;
> +
> + /*
> + * lock all buffer descriptors to prevent other processes from
> + * updating buffers.
> + */
> + for (i = 0; i < NBuffers; i++)
> + {
> + BufferDesc *buf;
> +
> + buf = &BufferDescriptors[i];
> + LockBufHdr(buf);
> + }
> +
> + /*
> + * get the control file to check the system state and
> + * hibernation file validations.
> + */
> + if (controlFileInitialized == false)
> + {
> + elog(WARNING,
> + "could not get control file, "
> + "aborting buffer cache hibernation");
> + goto cleanup;
> + }
> +
> + if (controlFile.state != DB_SHUTDOWNED)
> + {
> + elog(WARNING,
> + "database system was not shut down normally, "
> + "aborting buffer cache hibernation");
> + goto cleanup;
> + }
> +
> + /*
> + * read the crc values which was computed when the hibernation
> + * files were created.
> + */
> + fd = BasicOpenFile(BUFFER_CACHE_HIBERNATION_FILE_CRC32,
> + O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR);
> + if (fd < 0)
> + {
> + elog(WARNING,
> + "could not open %s",
> + BUFFER_CACHE_HIBERNATION_FILE_CRC32);
> + goto cleanup;
> + }
> +
> + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
> + {
> + pg_crc32 crc;
> +
> + if (BufferCacheHibernationLevel < 2 &&
> + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> + {
> + continue;
> + }
> +
> + if (read(fd, (void *)&crc, sizeof(pg_crc32)) != sizeof(pg_crc32))
> + {
> + if (BufferCacheHibernationLevel == 2 &&
> + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> + {
> + /*
> + * if buffer_cache_hibernation_level changes 1 to 2,
> + * the crc value of buffer block hibernation file may not exist.
> + * just ignore it here.
> + */
> + continue;
> + }
> +
> + elog(WARNING,
> + "could not read %s for %s",
> + BUFFER_CACHE_HIBERNATION_FILE_CRC32,
> + BufferCacheHibernationData[id].hibernation_file);
> + close(fd);
> + goto cleanup;
> + }
> + BufferCacheHibernationData[id].crc = crc;
> + }
> +
> + close(fd);
> +
> + /*
> + * allocate a buffer to read the contents of the hibernation files
> + * for validations.
> + */
> + record_length = 0;
> + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
> + {
> + if (record_length < BufferCacheHibernationData[id].record_length)
> + {
> + record_length = BufferCacheHibernationData[id].record_length;
> + }
> + }
> +
> + buf_common = malloc(record_length);
> + Assert(buf_common != NULL);
> +
> + /* assume that the number of buffers have not changed. */
> + oldNBuffers = NBuffers;
> +
> + /*
> + * check if all hibernation files are valid.
> + */
> + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
> + {
> + struct stat sb;
> + pg_crc32 crc;
> +
> + if (BufferCacheHibernationLevel < 2 &&
> + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> + {
> + continue;
> + }
> +
> + if (BufferCacheHibernationData[id].data_ptr == NULL ||
> + BufferCacheHibernationData[id].record_length == 0 ||
> + BufferCacheHibernationData[id].num_records == 0)
> + {
> + elog(WARNING,
> + "ResisterBufferCacheHibernation() was not called for %s",
> + BufferCacheHibernationData[id].hibernation_file);
> + goto cleanup;
> + }
> +
> + fd = BasicOpenFile(BufferCacheHibernationData[id].hibernation_file,
> + O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR);
> + if (fd < 0)
> + {
> + if (BufferCacheHibernationLevel == 2 &&
> + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> + {
> + /*
> + * if buffer_cache_hibernation_level changes 1 to 2,
> + * the buffer block hibernation file may not exist.
> + * just ignore it here.
> + */
> + continue;
> + }
> +
> + goto cleanup;
> + }
> +
> + if (fstat(fd, &sb) < 0)
> + {
> + elog(WARNING,
> + "could not get stats of the buffer cache hibernation file: %s",
> + BufferCacheHibernationData[id].hibernation_file);
> + close(fd);
> + goto cleanup;
> + }
> +
> + record_length = BufferCacheHibernationData[id].record_length;
> + num_records = BufferCacheHibernationData[id].num_records;
> +
> + if (sb.st_size != (record_length * num_records))
> + {
> + /* The size of StrategyControl should be the same always. */
> + if (id == BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY ||
> + (sb.st_size % record_length) > 0)
> + {
> + elog(WARNING,
> + "size mismatch on the buffer cache hibernation file: %s",
> + BufferCacheHibernationData[id].hibernation_file);
> + close(fd);
> + goto cleanup;
> + }
> +
> + /*
> + * The number of records of buffer descriptors and blocks
> + * should be the same.
> + */
> + if (oldNBuffers != NBuffers &&
> + oldNBuffers != (sb.st_size / record_length))
> + {
> + elog(WARNING,
> + "size mismatch on the buffer cache hibernation file: %s",
> + BufferCacheHibernationData[id].hibernation_file);
> + close(fd);
> + goto cleanup;
> + }
> +
> + oldNBuffers = sb.st_size / record_length;
> +
> + elog(NOTICE,
> + "shared_buffers have changed from %d to %d: %s",
> + oldNBuffers, NBuffers,
> + BufferCacheHibernationData[id].hibernation_file);
> +
> + /* use the original size to compute CRC of the hibernation file. */
> + num_records = oldNBuffers;
> + }
> +
> + if ((pg_time_t)sb.st_mtime < controlFile.time)
> + {
> + elog(WARNING,
> + "the hibernation file is older than control file: %s",
> + BufferCacheHibernationData[id].hibernation_file);
> + close(fd);
> + goto cleanup;
> + }
> +
> + INIT_CRC32(crc);
> + for (i = 0; i < num_records; i++)
> + {
> + if (read(fd, (void *)buf_common, record_length) != record_length)
> + {
> + elog(WARNING,
> + "could not read the buffer cache hibernation file: %s",
> + BufferCacheHibernationData[id].hibernation_file);
> + close(fd);
> + goto cleanup;
> + }
> +
> + COMP_CRC32(crc, buf_common, record_length);
> +
> + /*
> + * buffer descriptors validations.
> + */
> + if (id == BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS)
> + {
> + BufferDesc *buf;
> + BufFlags abnormal_flags;
> +
> + if (i >= NBuffers)
> + {
> + continue;
> + }
> +
> + abnormal_flags = (BM_DIRTY | BM_IO_IN_PROGRESS | BM_IO_ERROR |
> + BM_JUST_DIRTIED | BM_PIN_COUNT_WAITER);
> +
> + buf = (BufferDesc *)buf_common;
> +
> + if (buf->flags & abnormal_flags)
> + {
> + elog(WARNING,
> + "abnormal flags in buffer descriptors: %d",
> + buf->flags);
> + close(fd);
> + goto cleanup;
> + }
> +
> + if (buf->usage_count > BM_MAX_USAGE_COUNT)
> + {
> + elog(WARNING,
> + "invalid usage count in buffer descriptors: %d",
> + buf->usage_count);
> + close(fd);
> + goto cleanup;
> + }
> +
> + if (buf->buf_id < 0 || buf->buf_id >= num_records)
> + {
> + elog(WARNING,
> + "invalid buffer id in buffer descriptors: %d",
> + buf->buf_id);
> + close(fd);
> + goto cleanup;
> + }
> + }
> + }
> +
> + FIN_CRC32(crc);
> + close(fd);
> +
> + if (!EQ_CRC32(BufferCacheHibernationData[id].crc, crc))
> + {
> + elog(WARNING,
> + "crc mismatch on the buffer cache hibernation file: %s",
> + BufferCacheHibernationData[id].hibernation_file);
> + close(fd);
> + goto cleanup;
> + }
> + }
> +
> + /*
> + * resume the buffer cache data structure from the hibernation files.
> + */
> + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++)
> + {
> + int fd;
> + char *ptr;
> +
> + if (BufferCacheHibernationLevel < 2 &&
> + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> + {
> + continue;
> + }
> +
> + record_length = BufferCacheHibernationData[id].record_length;
> + num_records = BufferCacheHibernationData[id].num_records;
> +
> + if (id != BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY)
> + {
> + /* use the smaller number of buffers. */
> + num_records = (oldNBuffers < NBuffers)? oldNBuffers : NBuffers;
> + }
> +
> + fd = BasicOpenFile(BufferCacheHibernationData[id].hibernation_file,
> + O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR);
> + if (fd < 0)
> + {
> + if (BufferCacheHibernationLevel == 2 &&
> + id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> + {
> + /*
> + * if buffer_cache_hibernation_level changes 1 to 2,
> + * the buffer block hibernation file may not exist.
> + * just ignore it here.
> + */
> + continue;
> + }
> +
> + goto cleanup;
> + }
> +
> + elog(NOTICE,
> + "buffer cache resume from %s(%d bytes * %d records)",
> + BufferCacheHibernationData[id].hibernation_file,
> + record_length, num_records);
> +
> + for (i = 0; i < num_records; i++)
> + {
> + ptr = BufferCacheHibernationData[id].data_ptr + (i * record_length);
> + read(fd, (void *)ptr, record_length);
> +
> + /* Re-lock the buffer descriptor if necessary. */
> + if (id == BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS)
> + {
> + BufferDesc *buf;
> +
> + buf = (BufferDesc *)ptr;
> + if (IsUnlockBufHdr(buf))
> + {
> + LockBufHdr(buf);
> + }
> + }
> + }
> +
> + close(fd);
> +
> + if (id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> + {
> + buffer_block_processed = true;
> + }
> + }
> +
> + if (buffer_block_processed == false)
> + {
> + /* we didn't use the buffer block hibernation file, so delete it now. */
> + id = BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS;
> + unlink(BufferCacheHibernationData[id].hibernation_file);
> + }
> +
> + /*
> + * set the rest data structures (eg. lookup hashtable) up
> + * based on the buffer descriptors.
> + */
> + num_records = (oldNBuffers < NBuffers)? oldNBuffers : NBuffers;
> + for (i = 0; i < num_records; i++)
> + {
> + BufferDesc *buf;
> + BufferTag newTag;
> + uint32 newHash;
> + int buf_id;
> +
> + buf = &BufferDescriptors[i];
> + if (buf->tag.rnode.spcNode == InvalidOid &&
> + buf->tag.rnode.dbNode == InvalidOid &&
> + buf->tag.rnode.relNode == InvalidOid)
> + {
> + continue;
> + }
> +
> + INIT_BUFFERTAG(newTag, buf->tag.rnode, buf->tag.forkNum, buf->tag.blockNum);
> + newHash = BufTableHashCode(&newTag);
> +
> + if (buffer_block_processed == false)
> + {
> + Block bufBlock;
> + SMgrRelation smgr;
> +
> + /*
> + * re-read buffer block.
> + */
> + bufBlock = BufHdrGetBlock(buf);
> + smgr = smgropen(buf->tag.rnode, InvalidBackendId);
> + smgrread(smgr, newTag.forkNum, newTag.blockNum, (char *) bufBlock);
> + }
> +
> + buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);
> + if (buf_id != -1)
> + {
> + /* the entry exists already, return it to the freelist. */
> + buf->refcount = 0;
> + buf->flags = 0;
> + InvalidateBuffer(buf);
> + continue;
> + }
> +
> + /* clear wait_backend_pid because the process was terminated already. */
> + buf->wait_backend_pid = 0;
> +
> +#ifdef DEBUG_BUFFER_CACHE_HIBERNATION
> + elog(DEBUG5,
> + "resume [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d",
> + buf->buf_id, buf->flags, buf->usage_count, buf->refcount,
> + buf->wait_backend_pid, buf->freeNext,
> + newHash, newTag.rnode.spcNode,
> + newTag.rnode.dbNode, newTag.rnode.relNode,
> + newTag.forkNum, newTag.blockNum);
> +#endif
> + }
> +
> + /*
> + * adjust StrategyControl based on the change of shared_buffers.
> + */
> + if (oldNBuffers != NBuffers)
> + {
> + AdjustStrategyControl(oldNBuffers);
> + }
> +
> + elog(NOTICE,
> + "buffer cache resumed successfully");
> +
> +cleanup:
> + for (i = 0; i < NBuffers; i++)
> + {
> + BufferDesc *buf;
> +
> + buf = &BufferDescriptors[i];
> + UnlockBufHdr(buf);
> + }
> +
> + if (buf_common != NULL)
> + {
> + free(buf_common);
> + }
> +
> + return;
> +}
> diff --git src/backend/storage/buffer/freelist.c src/backend/storage/buffer/freelist.c
> index bf9903b..ffc101d 100644
> --- src/backend/storage/buffer/freelist.c
> +++ src/backend/storage/buffer/freelist.c
> @@ -347,6 +347,12 @@ StrategyInitialize(bool init)
> }
> else
> Assert(!init);
> +
> + if (BufferCacheHibernationLevel > 0)
> + {
> + ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY,
> + (char *)StrategyControl, sizeof(BufferStrategyControl), 1);
> + }
> }
>
>
> @@ -521,3 +527,47 @@ StrategyRejectBuffer(BufferAccessStrategy strategy, volatile BufferDesc *buf)
>
> return true;
> }
> +
> +/*
> + * AdjustStrategyControl -- adjust the member variables of StrategyControl
> + *
> + * If the shared_buffers setting had changed, restored StrategyControl
> + * needs to be adjusted for in both cases of shrinking and enlarging.
> + * This is called only from bufmgr.c:ResumeBufferCacheHibernation().
> + */
> +void
> +AdjustStrategyControl(int oldNBuffers)
> +{
> + if (oldNBuffers == NBuffers)
> + {
> + return;
> + }
> +
> + /* enlarge or shrink the free buffer based on current NBuffers. */
> + StrategyControl->lastFreeBuffer = NBuffers - 1;
> +
> + /* shared_buffers shrunk. */
> + if (oldNBuffers > NBuffers)
> + {
> + if (StrategyControl->nextVictimBuffer >= NBuffers)
> + {
> + /* set the tail of buffers. */
> + StrategyControl->nextVictimBuffer = NBuffers - 1;
> + }
> +
> + if (StrategyControl->firstFreeBuffer >= NBuffers)
> + {
> + /* set FREENEXT_END_OF_LIST(-1). */
> + StrategyControl->firstFreeBuffer = FREENEXT_END_OF_LIST;
> + }
> + }
> + else
> + /* shared_buffers enlarged. */
> + {
> + if (StrategyControl->firstFreeBuffer < 0)
> + {
> + /* set the next entry of the tail of old buffers. */
> + StrategyControl->firstFreeBuffer = oldNBuffers;
> + }
> + }
> +}
> diff --git src/backend/utils/misc/guc.c src/backend/utils/misc/guc.c
> index 738e215..5affc6e 100644
> --- src/backend/utils/misc/guc.c
> +++ src/backend/utils/misc/guc.c
> @@ -2361,6 +2361,18 @@ static struct config_int ConfigureNamesInt[] =
> NULL, NULL, NULL
> },
>
> + {
> + {"buffer_cache_hibernation_level", PGC_POSTMASTER, UNGROUPED,
> + gettext_noop("Sets buffer cache hibernation level."),
> + gettext_noop("0 to disable(default), "
> + "1 for saving buffer descriptors only(recommended), "
> + "2 for saving buffer descriptors and buffer blocks(slower at shutdown).")
> + },
> + &BufferCacheHibernationLevel,
> + 0, 0, 2,
> + NULL, NULL, NULL
> + },
> +
> /* End-of-list marker */
> {
> {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
> diff --git src/backend/utils/misc/postgresql.conf.sample src/backend/utils/misc/postgresql.conf.sample
> index b8a1582..44b6ff3 100644
> --- src/backend/utils/misc/postgresql.conf.sample
> +++ src/backend/utils/misc/postgresql.conf.sample
> @@ -119,6 +119,17 @@
> #maintenance_work_mem = 16MB # min 1MB
> #max_stack_depth = 2MB # min 100kB
>
> +
> +# Buffer Cache Hibernation:
> +# Suspend/resume buffer cache data structure using hibernation files
> +# at shutdown/startup.
> +#buffer_cache_hibernation_level = 0 # Sets buffer cache hibernation level.
> + # 0 to disable(default),
> + # 1 for saving buffer descriptors only
> + # (recommended),
> + # 2 for saving buffer descriptors and
> + # buffer blocks(slower at shutdown).
> +
> # - Kernel Resource Usage -
>
> #max_files_per_process = 1000 # min 25
> diff --git src/include/access/xlog.h src/include/access/xlog.h
> index 7056fd6..7a9fb99 100644
> --- src/include/access/xlog.h
> +++ src/include/access/xlog.h
> @@ -13,6 +13,7 @@
>
> #include "access/rmgr.h"
> #include "access/xlogdefs.h"
> +#include "catalog/pg_control.h"
> #include "lib/stringinfo.h"
> #include "storage/buf.h"
> #include "utils/pg_crc.h"
> @@ -294,6 +295,7 @@ extern bool XLogInsertAllowed(void);
> extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream);
> extern XLogRecPtr GetXLogReplayRecPtr(void);
>
> +extern bool GetControlFile(ControlFileData *controlFile);
> extern void UpdateControlFile(void);
> extern uint64 GetSystemIdentifier(void);
> extern Size XLOGShmemSize(void);
> diff --git src/include/storage/buf_internals.h src/include/storage/buf_internals.h
> index b7d4ea5..d537ef1 100644
> --- src/include/storage/buf_internals.h
> +++ src/include/storage/buf_internals.h
> @@ -167,6 +167,7 @@ typedef struct sbufdesc
> */
> #define LockBufHdr(bufHdr) SpinLockAcquire(&(bufHdr)->buf_hdr_lock)
> #define UnlockBufHdr(bufHdr) SpinLockRelease(&(bufHdr)->buf_hdr_lock)
> +#define IsUnlockBufHdr(bufHdr) SpinLockFree(&(bufHdr)->buf_hdr_lock)
>
>
> /* in buf_init.c */
> @@ -190,6 +191,7 @@ extern bool StrategyRejectBuffer(BufferAccessStrategy strategy,
> extern int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc);
> extern Size StrategyShmemSize(void);
> extern void StrategyInitialize(bool init);
> +extern void AdjustStrategyControl(int oldNBuffers);
>
> /* buf_table.c */
> extern Size BufTableShmemSize(int size);
> diff --git src/include/storage/bufmgr.h src/include/storage/bufmgr.h
> index b8fc87e..ddfeb9d 100644
> --- src/include/storage/bufmgr.h
> +++ src/include/storage/bufmgr.h
> @@ -211,6 +211,20 @@ extern void BgBufferSync(void);
>
> extern void AtProcExit_LocalBuffers(void);
>
> +/* buffer cache hibernation support stuff */
> +extern int BufferCacheHibernationLevel;
> +
> +typedef enum BufferHibernationFileType
> +{
> + BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY,
> + BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS,
> + BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS
> +} BufferHibernationFileType;
> +
> +extern void ResisterBufferCacheHibernation(BufferHibernationFileType id,
> + char *ptr, Size record_length, Size num_records);
> +extern void ResumeBufferCacheHibernation(void);
> +
> /* in freelist.c */
> extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype);
> extern void FreeAccessStrategy(BufferAccessStrategy strategy);
>
> --
> Sent via pgsql-hackers mailing list (pgsql-hackers(at)postgresql(dot)org)
> To make changes to your subscription:
> http://www.postgresql.org/mailpref/pgsql-hackers

--
Bruce Momjian <bruce(at)momjian(dot)us> http://momjian.us
EnterpriseDB http://enterprisedb.com

+ It's impossible for everything to be true. +

In response to

Responses

Browse pgsql-hackers by date

  From Date Subject
Next Message Bruce Momjian 2011-10-14 00:50:05 Re: WIP: AuthenticationMD5 protocol documentation clarification
Previous Message Bruce Momjian 2011-10-14 00:00:13 Re: Remove support for 'userlocks'?