diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c index d2dcf526d62..9768d2176da 100644 --- a/src/backend/utils/mmgr/aset.c +++ b/src/backend/utils/mmgr/aset.c @@ -441,7 +441,7 @@ AllocSetContextCreateInternal(MemoryContext parent, * Allocate the initial block. Unlike other aset.c blocks, it starts with * the context header and its block header follows that. */ - set = (AllocSet) malloc(firstBlockSize); + set = (AllocSet) MemoryPoolAlloc(firstBlockSize); if (set == NULL) { if (TopMemoryContext) @@ -579,13 +579,15 @@ AllocSetReset(MemoryContext context) } else { + Size size = block->endptr - ((char *) block); + /* Normal case, release the block */ context->mem_allocated -= block->endptr - ((char *) block); #ifdef CLOBBER_FREED_MEMORY wipe_mem(block, block->freeptr - ((char *) block)); #endif - free(block); + MemoryPoolFree(block, size); } block = next; } @@ -649,7 +651,7 @@ AllocSetDelete(MemoryContext context) freelist->num_free--; /* All that remains is to free the header/initial block */ - free(oldset); + MemoryPoolFree(oldset, keepersize); } Assert(freelist->num_free == 0); } @@ -666,6 +668,7 @@ AllocSetDelete(MemoryContext context) while (block != NULL) { AllocBlock next = block->next; + Size size = block->endptr - ((char *) block); if (!IsKeeperBlock(set, block)) context->mem_allocated -= block->endptr - ((char *) block); @@ -675,7 +678,7 @@ AllocSetDelete(MemoryContext context) #endif if (!IsKeeperBlock(set, block)) - free(block); + MemoryPoolFree(block, size); block = next; } @@ -683,7 +686,7 @@ AllocSetDelete(MemoryContext context) Assert(context->mem_allocated == keepersize); /* Finally, free the context header, including the keeper block */ - free(set); + MemoryPoolFree(set, keepersize); } /* @@ -712,7 +715,7 @@ AllocSetAllocLarge(MemoryContext context, Size size, int flags) #endif blksize = chunk_size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ; - block = (AllocBlock) malloc(blksize); + block = (AllocBlock) MemoryPoolAlloc(blksize); if (block == NULL) return MemoryContextAllocationFailure(context, size, flags); @@ -905,7 +908,7 @@ AllocSetAllocFromNewBlock(MemoryContext context, Size size, int flags, blksize <<= 1; /* Try to allocate it */ - block = (AllocBlock) malloc(blksize); + block = (AllocBlock) MemoryPoolAlloc(blksize); /* * We could be asking for pretty big blocks here, so cope if malloc fails. @@ -916,7 +919,7 @@ AllocSetAllocFromNewBlock(MemoryContext context, Size size, int flags, blksize >>= 1; if (blksize < required_size) break; - block = (AllocBlock) malloc(blksize); + block = (AllocBlock) MemoryPoolAlloc(blksize); } if (block == NULL) @@ -1071,6 +1074,7 @@ AllocSetFree(void *pointer) { /* Release single-chunk block. */ AllocBlock block = ExternalChunkGetBlock(chunk); + Size size = block->endptr - ((char *) block); /* * Try to verify that we have a sane block pointer: the block header @@ -1104,7 +1108,7 @@ AllocSetFree(void *pointer) #ifdef CLOBBER_FREED_MEMORY wipe_mem(block, block->freeptr - ((char *) block)); #endif - free(block); + MemoryPoolFree(block, size); } else { @@ -1223,7 +1227,7 @@ AllocSetRealloc(void *pointer, Size size, int flags) blksize = chksize + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ; oldblksize = block->endptr - ((char *) block); - block = (AllocBlock) realloc(block, blksize); + block = (AllocBlock) MemoryPoolRealloc(block, oldblksize, blksize); if (block == NULL) { /* Disallow access to the chunk header. */ diff --git a/src/backend/utils/mmgr/bump.c b/src/backend/utils/mmgr/bump.c index f98a203a0ce..0ee64fc13ef 100644 --- a/src/backend/utils/mmgr/bump.c +++ b/src/backend/utils/mmgr/bump.c @@ -71,6 +71,7 @@ typedef struct BumpContext uint32 maxBlockSize; /* maximum block size */ uint32 nextBlockSize; /* next block size to allocate */ uint32 allocChunkLimit; /* effective chunk size limit */ + uint32 allocSize; /* effective chunk size limit */ dlist_head blocks; /* list of blocks with the block currently * being filled at the head */ @@ -183,7 +184,7 @@ BumpContextCreate(MemoryContext parent, * Allocate the initial block. Unlike other bump.c blocks, it starts with * the context header and its block header follows that. */ - set = (BumpContext *) malloc(allocSize); + set = (BumpContext *) MemoryPoolAlloc(allocSize); if (set == NULL) { MemoryContextStats(TopMemoryContext); @@ -216,6 +217,7 @@ BumpContextCreate(MemoryContext parent, set->initBlockSize = (uint32) initBlockSize; set->maxBlockSize = (uint32) maxBlockSize; set->nextBlockSize = (uint32) initBlockSize; + set->allocSize = (uint32) allocSize; /* * Compute the allocation chunk size limit for this context. @@ -289,10 +291,12 @@ BumpReset(MemoryContext context) void BumpDelete(MemoryContext context) { + BumpContext *set = (BumpContext *) context; + /* Reset to release all releasable BumpBlocks */ BumpReset(context); /* And free the context header and keeper block */ - free(context); + MemoryPoolFree(context, set->allocSize); } /* @@ -326,7 +330,7 @@ BumpAllocLarge(MemoryContext context, Size size, int flags) required_size = chunk_size + Bump_CHUNKHDRSZ; blksize = required_size + Bump_BLOCKHDRSZ; - block = (BumpBlock *) malloc(blksize); + block = (BumpBlock *) MemoryPoolAlloc(blksize); if (block == NULL) return NULL; @@ -458,7 +462,7 @@ BumpAllocFromNewBlock(MemoryContext context, Size size, int flags, if (blksize < required_size) blksize = pg_nextpower2_size_t(required_size); - block = (BumpBlock *) malloc(blksize); + block = (BumpBlock *) MemoryPoolAlloc(blksize); if (block == NULL) return MemoryContextAllocationFailure(context, size, flags); @@ -603,6 +607,8 @@ BumpBlockFreeBytes(BumpBlock *block) static inline void BumpBlockFree(BumpContext *set, BumpBlock *block) { + Size blksize = ((char *) block->endptr - (char *) block); + /* Make sure nobody tries to free the keeper block */ Assert(!IsKeeperBlock(set, block)); @@ -615,7 +621,7 @@ BumpBlockFree(BumpContext *set, BumpBlock *block) wipe_mem(block, ((char *) block->endptr - (char *) block)); #endif - free(block); + MemoryPoolFree(block, blksize); } /* diff --git a/src/backend/utils/mmgr/generation.c b/src/backend/utils/mmgr/generation.c index 9124d9b9522..d2dfd1e8039 100644 --- a/src/backend/utils/mmgr/generation.c +++ b/src/backend/utils/mmgr/generation.c @@ -65,6 +65,7 @@ typedef struct GenerationContext uint32 maxBlockSize; /* maximum block size */ uint32 nextBlockSize; /* next block size to allocate */ uint32 allocChunkLimit; /* effective chunk size limit */ + uint32 allocSize; /* first block size */ GenerationBlock *block; /* current (most recently allocated) block */ GenerationBlock *freeblock; /* pointer to an empty block that's being @@ -206,7 +207,7 @@ GenerationContextCreate(MemoryContext parent, * Allocate the initial block. Unlike other generation.c blocks, it * starts with the context header and its block header follows that. */ - set = (GenerationContext *) malloc(allocSize); + set = (GenerationContext *) MemoryPoolAlloc(allocSize); if (set == NULL) { MemoryContextStats(TopMemoryContext); @@ -242,6 +243,7 @@ GenerationContextCreate(MemoryContext parent, set->initBlockSize = (uint32) initBlockSize; set->maxBlockSize = (uint32) maxBlockSize; set->nextBlockSize = (uint32) initBlockSize; + set->allocSize = allocSize; /* * Compute the allocation chunk size limit for this context. @@ -327,10 +329,16 @@ GenerationReset(MemoryContext context) void GenerationDelete(MemoryContext context) { + Size allocSize; + GenerationContext *set = (GenerationContext *) context; + + allocSize = set->allocSize; + /* Reset to release all releasable GenerationBlocks */ GenerationReset(context); + /* And free the context header and keeper block */ - free(context); + MemoryPoolFree(context, allocSize); } /* @@ -361,7 +369,7 @@ GenerationAllocLarge(MemoryContext context, Size size, int flags) required_size = chunk_size + Generation_CHUNKHDRSZ; blksize = required_size + Generation_BLOCKHDRSZ; - block = (GenerationBlock *) malloc(blksize); + block = (GenerationBlock *) MemoryPoolAlloc(blksize); if (block == NULL) return MemoryContextAllocationFailure(context, size, flags); @@ -482,7 +490,7 @@ GenerationAllocFromNewBlock(MemoryContext context, Size size, int flags, if (blksize < required_size) blksize = pg_nextpower2_size_t(required_size); - block = (GenerationBlock *) malloc(blksize); + block = (GenerationBlock *) MemoryPoolAlloc(blksize); if (block == NULL) return MemoryContextAllocationFailure(context, size, flags); @@ -663,6 +671,8 @@ GenerationBlockFreeBytes(GenerationBlock *block) static inline void GenerationBlockFree(GenerationContext *set, GenerationBlock *block) { + Size blksize = block->blksize; + /* Make sure nobody tries to free the keeper block */ Assert(!IsKeeperBlock(set, block)); /* We shouldn't be freeing the freeblock either */ @@ -677,7 +687,7 @@ GenerationBlockFree(GenerationContext *set, GenerationBlock *block) wipe_mem(block, block->blksize); #endif - free(block); + MemoryPoolFree(block, blksize); } /* diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index d6bf204ce27..1f1731cb6da 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -1726,3 +1726,1074 @@ pchomp(const char *in) n--; return pnstrdup(in, n); } + +/* + * Memory Pools + * + * Contexts may get memory either directly from the OS (libc) through malloc + * calls, but that has non-trivial overhead, depending on the allocation size + * and so on. And we tend to allocate fairly large amounts of memory, because + * contexts allocate blocks (starting with 1kB, quickly growing by doubling). + * A lot of hot paths also allocate pieces of memory exceeding the size limit + * and being allocated as a separate block. + * + * The contexts may cache the memory by keeping chunks, but it's limited to a + * single memory context (as AllocSet freelist), and only for the lifetime of + * a particular context instance. When the memory is reset/deleted, all the + * blocks are freed and retuned to the OS (libc). + * + * There's a rudimentary cache of memory contexts blocks, but this only keeps + * the keeper blocks, not any other blocks that may be needed. + * + * Memory pools are attempt to improve this by establishing a cache of blocks + * shared by all the memory contexts. A memory pool allocates blocks larger + * than 1kB, with doubling (1kB, 2kB, 4kB, ...). All the allocations come + * from memory contexts, and are either regular blocks (also starting at 1kB) + * or oversized chunks (a couple kB or larger). This means the lower limit + * is reasonable - there should be no smaller allocations. + * + * There's no explicit upper size limit - whatever could be used by palloc() + * can be requested from the pool. However, only blocks up to 8MB may be + * cached by the pool - larger allocations are not kept after pfree(). + * + * To make the reuse possible, the blocks are grouped into size clasess the + * same way AllocSet uses for chunks. There are 14 size classes, starting + * at 1kB and ending at 8MB. + * + * This "rouding" applies even to oversized chunks. So e.g. allocating 27kB + * will allocate a 32kB block. This wastes memory, but it means the block + * may be reused by "regular" allocations. The amount of wasted memory could + * be reduced by using size classes with smaller steps, but that reduces the + * likelihood of reusing the block. + */ + + +#define MEMPOOL_MIN_BLOCK 1024L /* smallest cached block */ +#define MEMPOOL_MAX_BLOCK (8*1024L*1024L) /* largest cached block */ +#define MEMPOOL_SIZES 14 /* 1kB -> 8MB */ + +/* + * Maximum amount of memory to keep in cache for all size buckets. Sets a + * safety limit limit set on the blocks kept in the *cached* part of the + * pool. Each bucket starts with the same amount of memory (1/14 of this) + * and then we adapt the cache depending on cache hits/misses. + */ +#define MEMPOOL_SIZE_MAX (128*1024L*1024L) + +/* + * Maximum number of blocks kept for the whole memory pool. This is used + * only to allocate the entries, so we assume all are in the smallest size + * bucket. + */ +#define MEMPOOL_MAX_BLOCKS (MEMPOOL_SIZE_MAX / MEMPOOL_MIN_BLOCK) + +/* + * How often to rebalance the memory pool buckets (number of allocations). + * This is a tradeoff between the pool being adaptive and more overhead. + */ +#define MEMPOOL_REBALANCE_DISTANCE 25000 + +/* + * To enable debug logging for the memory pool code, build with -DMEMPOOL_DEBUG. + */ +#ifdef MEMPOOL_DEBUG + +#undef MEMPOOL_DEBUG +#define MEMPOOL_RANDOMIZE(ptr, size) memset((ptr), 0x7f, (size)) +#define MEMPOOL_DEBUG(...) fprintf (stderr, __VA_ARGS__) + +#else + +#define MEMPOOL_DEBUG(...) +#define MEMPOOL_RANDOMIZE(ptr, size) + +#endif /* MEMPOOL_DEBUG */ + + +/* + * Entries for a simple linked list of blocks to reuse. + */ +typedef struct MemPoolEntry +{ + void *ptr; /* allocated block (NULL in empty entries) */ + struct MemPoolEntry *next; +} MemPoolEntry; + +/* + * Information about allocations of blocks of a certain size. We track the number + * of currently cached blocks, and also the number of allocated blocks (still + * used by the memory context). + * + * maxcached is the maximum number of free blocks to keep in the cache + * + * maxallocated is the maximum number of concurrently allocated blocks (from the + * point of the memory context) + */ +typedef struct MemPoolBucket +{ + int nhits; /* allocation cache hits */ + int nmisses; /* allocation cache misses */ + int nallocated; /* number of currently allocated blocks */ + int maxallocated; /* max number of allocated blocks */ + int ncached; /* number of free blocks (entry list) */ + int maxcached; /* max number of free blocks to cache */ + MemPoolEntry *entry; +} MemPoolBucket; + +/* + * MemPool - memory pool, caching allocations between memory contexts + * + * cache - stores free-d blocks that may be reused for future allocations, + * each slot is a list of MemPoolEntry elements using the "entries" + * + * entries - pre-allocated entries for the freelists, used by cache lists + * + * freelist - list of free cache entries (not used by the cache lists) + * + * The meaning of the freelist is somewhat inverse - when a block is freed + * by the memory context above, we need to add it to the cache. To do that + * we get an entry from the freelist, and add it to the cache. So free-ing + * a block removes an entry from the mempool freelist. + */ +typedef struct MemPool +{ + /* LIFO cache of free-d blocks of eligible sizes (1kB - 1MB, doubled) */ + MemPoolBucket cache[MEMPOOL_SIZES]; + + /* pre-allocated entries for cache of free-d blocks */ + MemPoolEntry entries[MEMPOOL_SIZES * MEMPOOL_MAX_BLOCKS]; + + /* head of freelist (entries from the array) */ + MemPoolEntry *freelist; + + /* memory limit / accounting */ + int64 mem_allowed; + int64 mem_allocated; + int64 mem_cached; + int64 num_requests; +} MemPool; + +static MemPool *pool = NULL; + +static void +AssertCheckMemPool(MemPool *p) +{ +#ifdef ASSERT_CHECKING + int nused = 0; + int nfree = 0; + int64 mem_cached = 0; + Size block_size = MEMPOOL_MIN_BLOCK; + + Assert(p->mem_allocated >= 0); + Assert(p->mem_cached >= 0); + + /* count the elements in the various cache buckets */ + for (int i = 0; i < MEMPOOL_SIZES; i++) + { + int count = 0; + + Assert(p->cache[i].ncached >= 0); + Assert(p->cache[i].ncached <= p->cache[i].maxcached); + + entry = p->cache[i].entry; + + while (entry) + { + Assert(entry->ptr); + + entry = entry->next; + count++; + } + + Assert(count == p->cache[i].ncached); + + nused += count; + mem_cached += (count * block_size); + + block_size *= 2; + } + + /* now count the elements in the freelist */ + entry = p->freelist; + while (entry) + { + nfree++; + entry = entry->next; + } + + Assert(nfree + nused == MEMPOOL_SIZES * MEMPOOL_MAX_BLOCKS); + Assert(mem_cached == p->mem_cached); +#endif +} + +static void MemoryPoolRebalanceBuckets(void); +static void MemoryPoolEnforceSizeLimit(Size request_size, int index); + +/* + * MemoryPoolInit + * initialize the global memory pool + * + * Initialize the overall memory pool structure, and also link all entries + * into a freelist. + */ +static void +MemoryPoolInit(void) +{ + Size size = MEMPOOL_MIN_BLOCK; + + /* bail out if already initialized */ + if (pool) + return; + + /* allocate the basic structure */ + pool = malloc(sizeof(MemPool)); + memset(pool, 0, sizeof(MemPool)); + + /* initialize the frelist - put all entries to the list */ + pool->freelist = &pool->entries[0]; + + for (int i = 0; i < (MEMPOOL_SIZES * MEMPOOL_MAX_BLOCKS - 1); i++) + { + if (i < (MEMPOOL_SIZES * MEMPOOL_MAX_BLOCKS - 1)) + pool->entries[i].next = &pool->entries[i+1]; + else + pool->entries[i].next = NULL; + } + + /* set default maximum counts of entries for each size class */ + for (int i = 0; i < MEMPOOL_SIZES; i++) + { + pool->cache[i].maxcached = (MEMPOOL_SIZE_MAX / MEMPOOL_SIZES / size); + size *= 2; + } + + AssertCheckMemPool(pool); +} + +/* + * MemoryPoolEntrySize + * calculate the size of the block to allocate for a given request size + * + * The request sizes are grouped into pow(2,n) classes, starting at 1kB and + * ending at 8MB. Which means there are 14 size classes. + */ +static Size +MemoryPoolEntrySize(Size size) +{ + Size result; + + /* + * We shouldn't really get many malloc() for such small elements through + * memory contexts, so just use the smallest block. + */ + if (size < MEMPOOL_MIN_BLOCK) + return MEMPOOL_MIN_BLOCK; + + /* + * We can get various large allocations - we don't want to cache those, + * not waste space on doubling them, so just allocate them directly. + * Maybe the limit should be separate/lower, like 1MB. + */ + if (size > MEMPOOL_MAX_BLOCK) + return size; + + /* + * Otherwise just calculate the first block larger than the request. + * + * XXX Maybe there's a better way to calculate this? The number of loops + * should be very low, though (less than MEMPOOL_SIZES, i.e. 14). + */ + result = MEMPOOL_MIN_BLOCK; + while (size > result) + result *= 2; + + MEMPOOL_DEBUG("%d MempoolEntrySize %lu => %lu\n", getpid(), size, result); + + /* the block size has to be sufficient for the requested size */ + Assert(size <= result); + + return result; +} + +/* + * MemoryPoolEntryIndex + * Calculate the cache index for a given entry size. + * + * XXX Always called right after MemoryPoolEntrySize, so maybe it should be + * merged into a single function, so that the loop happens only once. + */ +static int +MemoryPoolEntryIndex(Size size) +{ + int blockIndex = 0; + Size blockSize = MEMPOOL_MIN_BLOCK; + + /* is size possibly in cache? */ + if (size < MEMPOOL_MIN_BLOCK || size > MEMPOOL_MAX_BLOCK) + return -1; + + /* calculate where to maybe cache the entry */ + while (blockSize <= MEMPOOL_MAX_BLOCK) + { + Assert(size >= blockSize); + + if (size == blockSize) + { + Assert(blockIndex < MEMPOOL_SIZES); + return blockIndex; + } + + blockIndex++; + blockSize *= 2; + } + + /* not eligible for caching after all */ + return -1; +} + +/* + * Check that the entry size is valid and matches the class index - if smaller + * than 8MB, it needs to be in one of the valid classes. + */ +static void +AssertCheckEntrySize(Size size, int cacheIndex) +{ +#ifdef USE_ASSERT_CHECKING + int blockSize = MEMPOOL_MIN_BLOCK; + int blockIndex = 0; + + Assert(cacheIndex >= -1 && cacheIndex < MEMPOOL_SIZES); + + /* all sizes in the valid range should be in one of the slots */ + if (cacheIndex == -1) + Assert(size < MEMPOOL_MIN_BLOCK || size > MEMPOOL_MAX_BLOCK); + else + { + /* calculate the block size / index for the given size */ + while (size > blockSize) + { + blockSize *= 2; + blockIndex++; + } + + Assert(size == blockSize); + Assert(cacheIndex == blockIndex); + } +#endif +} + +/* + * MemoryPoolAlloc + * Allocate a block from the memory pool. + * + * The block may come either from cache - if available - or from malloc(). + */ +void * +MemoryPoolAlloc(Size size) +{ + int index; + void *ptr; + + MemoryPoolInit(); + + pool->num_requests++; + + MemoryPoolRebalanceBuckets(); + + /* maybe override the requested size */ + size = MemoryPoolEntrySize(size); + index = MemoryPoolEntryIndex(size); + + /* cross-check the size and index */ + AssertCheckEntrySize(size, index); + + /* try to enforce the memory limit */ + MemoryPoolEnforceSizeLimit(size, index); + + /* Is the block eligible to be in the cache? Or is it too large/small? */ + if (index >= 0) + { + MemPoolEntry *entry = pool->cache[index].entry; + + /* + * update the number of allocated chunks, and the high watermark + * + * We do this even if there's no entry in the cache. + */ + pool->cache[index].nallocated++; + pool->cache[index].maxallocated = Max(pool->cache[index].nallocated, + pool->cache[index].maxallocated); + + /* + * If we have a cached block for this size, we're done. Remove it + * from the cache and return the entry to the freelist. + */ + if (entry != NULL) + { + /* remember the pointer (we'll reset the entry) */ + ptr = entry->ptr; + entry->ptr = NULL; + + /* remove the entry from the cache */ + pool->cache[index].entry = entry->next; + pool->cache[index].ncached--; + + /* return the entry to the freelist */ + entry->next = pool->freelist; + pool->freelist = entry; + + MEMPOOL_RANDOMIZE(ptr, size); + MEMPOOL_DEBUG("%d MemoryPoolAlloc %lu => %d %p HIT\n", getpid(), size, index, ptr); + + /* update memory accounting */ + Assert(pool->mem_cached >= size); + + pool->mem_cached -= size; + pool->mem_allocated += size; + + pool->cache[index].nhits++; + + AssertCheckMemPool(pool); + + return ptr; + } + + pool->cache[index].nmisses++; + } + + /* + * Either too small/large for the cache, or there's no available block of + * the right size. + */ + ptr = malloc(size); + + MEMPOOL_RANDOMIZE(ptr, size); + MEMPOOL_DEBUG("%d MemoryPoolAlloc %lu => %d %p MISS\n", getpid(), size, index, ptr); + + /* update memory accounting */ + pool->mem_allocated += size; + + /* maybe we should track the number of over-sized allocations too? */ + // pool->cache_misses++; + + AssertCheckMemPool(pool); + + return ptr; +} + +/* + * MemoryPoolShouldCache + * Should we put the entry into cache at the given index? + */ +static bool +MemoryPoolShouldCache(Size size, int index) +{ + MemPoolBucket *entry = &pool->cache[index]; + + /* not in any pool bucket */ + if (index == -1) + return false; + + /* + * Bail out if no freelist entries. + * + * XXX This shouldn't be possible, as we size the freeslist as if all classes + * could have the maximum number of entries (but the actual number grops to + * 1/2 with each size class). + */ + if (!pool->freelist) + return false; + + /* Memory limit is set, and we'd exceed it? Don't cache. */ + if ((pool->mem_allowed > 0) && + (pool->mem_allocated + pool->mem_cached + size > pool->mem_allowed)) + return false; + + /* Did we already reach the maximum size of the size class? */ + return (entry->ncached < entry->maxcached); +} + +/* + * MemoryPoolFree + * Free a block, maybe add it to the memory pool cache. + */ +void +MemoryPoolFree(void *pointer, Size size) +{ + int index = 0; + + MemoryPoolInit(); + + /* + * Override the requested size (provided by the memory context), calculate + * the appropriate size class index. + */ + size = MemoryPoolEntrySize(size); + index = MemoryPoolEntryIndex(size); + + AssertCheckEntrySize(size, index); + + /* check that we've correctly accounted for this block during allocation */ + Assert(pool->mem_allocated >= size); + + /* + * update the number of allocated blocks (if eligible for cache) + * + * XXX Needs to happen even if we don't add the block to the cache. + */ + if (index != -1) + pool->cache[index].nallocated--; + + /* + * Should we cache this entry? Do we have entries for the freelist, and + * do we have free space in the size class / memory pool as a whole? + */ + if (MemoryPoolShouldCache(size, index)) + { + MemPoolEntry *entry; + + entry = pool->freelist; + pool->freelist = entry->next; + + /* add the entry to the cache, update number of entries in this bucket */ + entry->next = pool->cache[index].entry; + pool->cache[index].entry = entry; + pool->cache[index].ncached++; + + entry->ptr = pointer; + + MEMPOOL_RANDOMIZE(pointer, size); + MEMPOOL_DEBUG("%d MemoryPoolFree %lu => %d %p ADD\n", getpid(), size, index, pointer); + + /* update accounting */ + pool->mem_cached += size; + pool->mem_allocated -= size; + + AssertCheckMemPool(pool); + + return; + } + + MEMPOOL_RANDOMIZE(pointer, size); + MEMPOOL_DEBUG("%d MemoryPoolFree %lu => %d FULL\n", getpid(), size, index); + + /* update accounting */ + pool->mem_allocated -= size; + + AssertCheckMemPool(pool); + + free(pointer); +} + +/* + * MemoryPoolRealloc + * reallocate a previously allocated block + * + * XXX Maybe this should use the cache too. Right now we just call realloc() + * after updating the cache counters. And maybe it should enforce the memory + * limit, just like we do in MemoryPoolAlloc(). + */ +void * +MemoryPoolRealloc(void *pointer, Size oldsize, Size newsize) +{ + void *ptr; + + int oldindex, + newindex; + + MemoryPoolInit(); + + oldsize = MemoryPoolEntrySize(oldsize); + newsize = MemoryPoolEntrySize(newsize); + + /* XXX Maybe if (oldsize >= newsize) we don't need to do anything? */ + + oldindex = MemoryPoolEntryIndex(oldsize); + newindex = MemoryPoolEntryIndex(newsize); + + if (oldindex != -1) + pool->cache[oldindex].nallocated--; + + if (newindex != -1) + { + pool->cache[newindex].nallocated++; + pool->cache[newindex].maxallocated = Max(pool->cache[newindex].nallocated, + pool->cache[newindex].maxallocated); + } + + MEMPOOL_DEBUG("%d MemoryPoolRealloc old %lu => %p\n", getpid(), oldsize, pointer); + + ptr = realloc(pointer, newsize); + + MEMPOOL_DEBUG("%d MemoryPoolRealloc new %lu => %p\n", getpid(), newsize, ptr); + + /* update accounting */ + Assert(pool->mem_allocated >= oldsize); + + pool->mem_allocated -= oldsize; + pool->mem_allocated += newsize; + + AssertCheckMemPool(pool); + + return ptr; +} + +/* + * MemoryPoolRebalanceBuckets + * Rebalance the cache capacity for difference size classes. + * + * The goal of the rebalance is to adapt the cache capacity to changes in the + * workload - release blocks of sizes that are no longer needed, allow caching + * for new block sizes etc. + * + * The rebalance happens every MEMPOOL_REBALANCE_DISTANCE allocations - it needs + * to happen often enough to adapt to the workload changes, but not too often + * to cause significant overhead. The distance also needs to be sufficient to + * have a reasonable representation of the allocations. + * + * The rebalance happens in three phases: + * + * 1) shrink oversized buckets (maxallocated < maxcached) + * + * 2) enlarge undersized buckets (maxcached < maxallocated) + * + * 3) distribute remaining capacity (if any) uniformly + * + * The reduction in (1) is gradual, i.e. instead of setting maxcached to the + * maxallocated value (which may be seen as the minimum capacity needed), we + * only go halfway there. The intent is to dampen the transition in case the + * current counter is not entirely representative. + * + * The bucket enlarging in step (2) is proportional to the number of misses + * for each bucket (with respect to the total number of misses in the buckets + * that are too small). We however don't oversize the bucket - we assign at + * most (maxallocated - maxcached) entries, not more in this step. + * + * Finally, we simply take the remaining unallocated/unassigned memory (up to + * MEMPOOL_SIZE_MAX), and distribute it to all the buckets uniformly. That is, + * each bucket gets the same amount (rounded to entries of appropriate size). + * + * XXX Maybe we should have a parameter for the dampening factor in (1), and + * not just use 0.5. For example, maybe 0.75 would be better? + * + * XXX This assumes misses for different buckets are equally expensive, but + * that may not be the case. It's likely a miss is proportional to the size + * of the block, so maybe we should consider that and use the size as weight + * for the cache miss. + */ +static void +MemoryPoolRebalanceBuckets(void) +{ + Size block_size; + int64 redistribute_bytes; + int64 assigned_bytes = 0; + int64 num_total_misses = 0; + + /* only do this once every MEMPOOL_REBALANCE_DISTANCE allocations */ + if (pool->num_requests < MEMPOOL_REBALANCE_DISTANCE) + return; + +#ifdef MEMPOOL_DEBUG + /* print info about the cache and individual size buckets before the rebalance */ + MEMPOOL_DEBUG("%d mempool rebalance requests %ld allowed %ld allocated %ld cached %ld\n", + getpid(), pool->num_requests, + pool->mem_allowed, pool->mem_allocated, pool->mem_cached); + + for (int i = 0; i < MEMPOOL_SIZES; i++) + { + MEMPOOL_DEBUG("%d mempool rebalance bucket %d hit %d miss %d (%.1f%%) maxcached %d cached %d maxallocated %d allocated %d\n", + getpid(), i, pool->cache[i].nhits, pool->cache[i].nmisses, + pool->cache[i].nhits * 100.0 / Max(1, pool->cache[i].nhits + pool->cache[i].nmisses), + pool->cache[i].maxcached, pool->cache[i].ncached, + pool->cache[i].maxallocated, pool->cache[i].nallocated); + } +#endif + + /* + * Are there buckets with cache that is unnecessarily large? That is, with + * (ncached + nallocated > maxallocated). If yes, we release half of that + * and put that into a budget that we can redistribute. + * + * XXX We release half to somewhat dampen the changes over time. + */ + block_size = MEMPOOL_MIN_BLOCK; + for (int i = 0; i < MEMPOOL_SIZES; i++) + { + /* + * If the cache is large enough to serve all allocations, try making it + * a bit smaller and cut half the extra space (and maybe also free the + * unnecessary blocks). + */ + if (pool->cache[i].maxcached > pool->cache[i].maxallocated) + { + int nentries; + + pool->cache[i].maxcached + = (pool->cache[i].maxcached + pool->cache[i].maxallocated) / 2; + + nentries = (pool->cache[i].ncached + pool->cache[i].nallocated); + nentries -= pool->cache[i].maxcached; + + /* release enough entries from the cache */ + while (nentries > 0) + { + MemPoolEntry *entry = pool->cache[i].entry; + + pool->cache[i].entry = entry->next; + pool->cache[i].ncached--; + + free(entry->ptr); + entry->ptr = NULL; + + /* add the entry to the freelist */ + entry->next = pool->freelist; + pool->freelist = entry; + + Assert(pool->mem_cached >= block_size); + + /* update accounting */ + pool->mem_cached -= block_size; + + nentries--; + } + } + + /* remember how many misses we saw in the undersized buckets */ + num_total_misses += pool->cache[i].nmisses; + + /* remember how much space we already allocated to this bucket */ + assigned_bytes += (pool->cache[i].maxcached * block_size); + + /* double the block size */ + block_size = (block_size << 1); + } + + /* + * How much memory we can redistribute? Start with the memory limit, + * and subtract the space currently allocated and assigned to cache. + */ + redistribute_bytes = Max(pool->mem_allowed, MEMPOOL_SIZE_MAX); + redistribute_bytes -= (pool->mem_allocated); + redistribute_bytes -= assigned_bytes; + + /* + * Make sure it's not negative (might happen if there's a lot of + * allocated memory). + */ + redistribute_bytes = Max(0, redistribute_bytes); + + MEMPOOL_DEBUG("%d mempool rebalance can redistribute %ld bytes, allocated %ld bytes, assigned %ld bytes, total misses %ld\n", + getpid(), redistribute_bytes, pool->mem_allocated, assigned_bytes, num_total_misses); + + /* + * Redistribute the memory based on the number of misses, and reset the + * various counters, so that the next round begins afresh. + */ + if (redistribute_bytes > 0) + { + block_size = MEMPOOL_MIN_BLOCK; + for (int i = 0; i < MEMPOOL_SIZES; i++) + { + int64 nbytes; + int nentries; + + /* Are we missing entries in cache for this slot? */ + if (pool->cache[i].maxcached < pool->cache[i].maxallocated) + { + int nmissing = (pool->cache[i].maxallocated - pool->cache[i].maxcached); + + /* + * How many entries we can add to this size bucket, based on the number + * of cache misses? + */ + nbytes = redistribute_bytes * pool->cache[i].nmisses / Max(1, num_total_misses); + nentries = (nbytes / block_size); + + /* But don't add more than we need. */ + nentries = Min(nentries, nmissing); + + pool->cache[i].maxcached += nentries; + assigned_bytes += nentries * block_size; + } + + /* double the block size */ + block_size = (block_size << 1); + } + } + + MEMPOOL_DEBUG("%d mempool rebalance done allocated %ld bytes, assigned %ld bytes\n", + getpid(), pool->mem_allocated, assigned_bytes); + + /* + * If we still have some memory, redistribute it uniformly. + */ + redistribute_bytes = Max(pool->mem_allowed, MEMPOOL_SIZE_MAX); + redistribute_bytes -= (pool->mem_allocated); + redistribute_bytes -= assigned_bytes; + + /* + * Make sure it's not negative (might happen if there's a lot of + * allocated memory). + */ + redistribute_bytes = Max(0, redistribute_bytes); + + MEMPOOL_DEBUG("%d mempool rebalance remaining bytes %ld, allocated %ld bytes, assigned %ld bytes\n", + getpid(), redistribute_bytes, pool->mem_allocated, assigned_bytes); + + block_size = MEMPOOL_MIN_BLOCK; + for (int i = 0; i < MEMPOOL_SIZES; i++) + { + int nentries = (redistribute_bytes / MEMPOOL_SIZES / block_size); + + pool->cache[i].maxcached += nentries; + + /* also reset the various counters */ + pool->cache[i].maxallocated = pool->cache[i].nallocated; + pool->cache[i].nhits = 0; + pool->cache[i].nmisses = 0; + + /* double the block size */ + block_size = (block_size << 1); + } + + MEMPOOL_DEBUG("%d mempool rebalance done\n", getpid()); + +#ifdef MEMPOOL_DEBUG + /* print some info about cache hit ratio, but only once in a while */ + block_size = MEMPOOL_MIN_BLOCK; + assigned_bytes = 0; + for (int i = 0; i < MEMPOOL_SIZES; i++) + { + MEMPOOL_DEBUG("%d mempool rebalance bucket %d maxcached %d cached %d maxallocated %d allocated %d\n", + getpid(), i, + pool->cache[i].maxcached, pool->cache[i].ncached, + pool->cache[i].maxallocated, pool->cache[i].nallocated); + + assigned_bytes += (pool->cache[i].maxcached * block_size); + + /* double the block size */ + block_size = (block_size << 1); + } + MEMPOOL_DEBUG("%d mempool rebalance allocated %ld assigned %ld (total %ld kB)\n", + getpid(), pool->mem_allocated, assigned_bytes, + (pool->mem_allocated + assigned_bytes) / 1024L); +#endif + + /* start new rebalance period */ + pool->num_requests = 0; +} + +/* + * MemoryPoolEnforceMaxCounts + * release cached blocks exceeding the maxcached for a given bucket + * + * XXX This gets called only from MemoryPoolSetSizeLimit, which updates the + * maxcount based on the memory limit. Maybe it should be integrated into + * that directly? + * + * XXX Or maybe we should simply do the rebalancing for the new limit? + */ +static void +MemoryPoolEnforceMaxCounts(void) +{ + Size block_size = MEMPOOL_MAX_BLOCK; + + /* nothing cached, so can't release anything */ + if (pool->mem_cached == 0) + return; + + /* + * Walk through the buckets, make sure that no bucket has too many cached + * entries. + */ + for (int i = MEMPOOL_SIZES - 1; i >= 0; i--) + { + while (pool->cache[i].entry) + { + MemPoolEntry *entry = pool->cache[i].entry; + + /* we're within the limit, bail out */ + if (pool->cache[i].ncached <= pool->cache[i].maxcached) + break; + + pool->cache[i].entry = entry->next; + pool->cache[i].ncached--; + + free(entry->ptr); + entry->ptr = NULL; + + /* add the entry to the freelist */ + entry->next = pool->freelist; + pool->freelist = entry; + + Assert(pool->mem_cached >= block_size); + + /* update accounting */ + pool->mem_cached -= block_size; + } + + /* double the block size */ + block_size = (block_size << 1); + } + + MEMPOOL_DEBUG("%d MemoryPoolEnforceMaxCounts allocated %ld cached %ld\n", + getpid(), pool->mem_allocated, pool->mem_cached); + + AssertCheckMemPool(pool); +} + +/* + * MemoryPoolEnforceSizeLimit + * Release cached blocks to allow allocating a block of a given size. + * + * If actually freeing blocks is needed, we free more of them, so that we don't + * need to do that too often. We free at least 2x the amount of space we need, + * or 25% of the limit, whichever is larger. + * + * We free memory from the largest blocks, because that's likely to free memory + * the fastest. And we don't alocate those very often. + * + * XXX Maybe we should free memory in the smaller classes too, so that we don't + * end up keeping many unnecessary old blocks, while trashing the large class. + */ +static void +MemoryPoolEnforceSizeLimit(Size request_size, int index) +{ + int64 threshold, + needtofree; + + Size block_size = MEMPOOL_MAX_BLOCK; + + /* no memory limit set */ + if (pool->mem_allowed == 0) + return; + + /* nothing cached, so can't release anything */ + if (pool->mem_cached == 0) + return; + + /* + * With the new request, would we exceed the memory limit? we need + * to count both the allocated and cached memory. + * + * XXX In principle the block may be already available in cache, in which + * case we don't need to add it to the allocated + cached figure. + */ + if (pool->mem_allocated + pool->mem_cached + request_size <= pool->mem_allowed) + return; + + /* + * How much we need to release? we don't want to allocate just enough + * for the one request, but a bit more, to prevent trashing. + */ + threshold = Min(Max(0, pool->mem_allowed - 2 * request_size), + pool->mem_allowed * 0.75); + + Assert((threshold >= 0) && (threshold < pool->mem_allowed)); + + /* + * How much we need to free, to get under the theshold? Can't free more + * than we have in the cache, though. + * + * XXX One we free at least this amount of memory, we're done. + */ + needtofree = (pool->mem_allocated + pool->mem_cached + request_size) - threshold; + needtofree = Min(needtofree, pool->mem_cached); + + MEMPOOL_DEBUG("%d MemoryPoolMaybeShrink total %ld cached %ld threshold %ld needtofree %ld\n", + getpid(), pool->mem_allocated + pool->mem_cached, pool->mem_cached, threshold, needtofree); + + /* Is it even eligible to be in the cache? */ + for (int i = MEMPOOL_SIZES - 1; i >= 0; i--) + { + /* did we free enough memory? */ + if (needtofree <= 0) + break; + + while (pool->cache[i].entry) + { + MemPoolEntry *entry = pool->cache[i].entry; + + pool->cache[i].entry = entry->next; + pool->cache[i].ncached--; + + free(entry->ptr); + entry->ptr = NULL; + + /* add the entry to the freelist */ + entry->next = pool->freelist; + pool->freelist = entry; + + needtofree -= block_size; + + /* did we free enough memory? */ + if (needtofree <= 0) + break; + } + + block_size = (block_size >> 1); + } + + MEMPOOL_DEBUG("%d MemoryPoolEnforceMemoryLimit allocated %ld cached %ld needtofree %ld\n", + getpid(), pool->mem_allocated, pool->mem_cached, needtofree); + + AssertCheckMemPool(pool); +} + +/* + * MemoryPoolSetSizeLimit + * Set size limit for the memory pool. + */ +void +MemoryPoolSetSizeLimit(int64 size) +{ + Size blksize = MEMPOOL_MIN_BLOCK; + Size maxsize; + + Assert(pool); + Assert(size >= 0); + + pool->mem_allowed = size; + + /* also update the max number of entries for each class size */ + + if (size > 0) + maxsize = size / MEMPOOL_SIZES; + else + maxsize = MEMPOOL_SIZE_MAX; + + for (int i = 0; i < MEMPOOL_SIZES; i++) + { + pool->cache[i].maxcached = (maxsize / blksize); + blksize *= 2; + } + + /* enforce the updated maxcached limit */ + MemoryPoolEnforceMaxCounts(); + + /* also enforce the general memory limit */ + MemoryPoolEnforceSizeLimit(0, -1); +} + +/* + * MemoryPoolGetSizeAndCounts + */ +void +MemoryPoolGetSizeAndCounts(int64 *mem_allowed, int64 *mem_allocated, int64 *mem_cached, + int64 *cache_hits, int64 *cache_misses) +{ + Assert(pool); + + *mem_allowed = pool->mem_allowed; + *mem_allocated = pool->mem_allocated; + *mem_cached = pool->mem_cached; + + *cache_hits = 0; + *cache_misses = 0; + + for (int i = 0; i < MEMPOOL_SIZES; i++) + { + *cache_hits += pool->cache[i].nhits; + *cache_misses += pool->cache[i].nmisses; + } +} diff --git a/src/backend/utils/mmgr/slab.c b/src/backend/utils/mmgr/slab.c index 516e1c95aaf..53f1271c288 100644 --- a/src/backend/utils/mmgr/slab.c +++ b/src/backend/utils/mmgr/slab.c @@ -359,9 +359,7 @@ SlabContextCreate(MemoryContext parent, elog(ERROR, "block size %zu for slab is too small for %zu-byte chunks", blockSize, chunkSize); - - - slab = (SlabContext *) malloc(Slab_CONTEXT_HDRSZ(chunksPerBlock)); + slab = (SlabContext *) MemoryPoolAlloc(Slab_CONTEXT_HDRSZ(chunksPerBlock)); if (slab == NULL) { MemoryContextStats(TopMemoryContext); @@ -451,7 +449,7 @@ SlabReset(MemoryContext context) #ifdef CLOBBER_FREED_MEMORY wipe_mem(block, slab->blockSize); #endif - free(block); + MemoryPoolFree(block, slab->blockSize); context->mem_allocated -= slab->blockSize; } @@ -467,7 +465,7 @@ SlabReset(MemoryContext context) #ifdef CLOBBER_FREED_MEMORY wipe_mem(block, slab->blockSize); #endif - free(block); + MemoryPoolFree(block, slab->blockSize); context->mem_allocated -= slab->blockSize; } } @@ -484,10 +482,13 @@ SlabReset(MemoryContext context) void SlabDelete(MemoryContext context) { + SlabContext *set PG_USED_FOR_ASSERTS_ONLY = (SlabContext *) context; + /* Reset to release all the SlabBlocks */ SlabReset(context); + /* And free the context header */ - free(context); + MemoryPoolFree(context, Slab_CONTEXT_HDRSZ(set->chunksPerBlock)); } /* @@ -562,7 +563,7 @@ SlabAllocFromNewBlock(MemoryContext context, Size size, int flags) } else { - block = (SlabBlock *) malloc(slab->blockSize); + block = (SlabBlock *) MemoryPoolAlloc(slab->blockSize); if (unlikely(block == NULL)) return MemoryContextAllocationFailure(context, size, flags); @@ -795,7 +796,7 @@ SlabFree(void *pointer) #ifdef CLOBBER_FREED_MEMORY wipe_mem(block, slab->blockSize); #endif - free(block); + MemoryPoolFree(block, slab->blockSize); slab->header.mem_allocated -= slab->blockSize; } diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h index 4446e14223d..6571dc9ca1f 100644 --- a/src/include/utils/memutils.h +++ b/src/include/utils/memutils.h @@ -189,4 +189,13 @@ extern MemoryContext GenerationContextCreate(MemoryContext parent, #define SLAB_DEFAULT_BLOCK_SIZE (8 * 1024) #define SLAB_LARGE_BLOCK_SIZE (8 * 1024 * 1024) +extern void *MemoryPoolAlloc(Size size); +extern void *MemoryPoolRealloc(void *pointer, Size oldsize, Size size); +extern void MemoryPoolFree(void *pointer, Size size); + +extern void MemoryPoolSetSizeLimit(int64 size); +extern void MemoryPoolGetSizeAndCounts(int64 *mem_limit, + int64 *mem_allocated, int64 *mem_cached, + int64 *cache_hits, int64 *cache_misses); + #endif /* MEMUTILS_H */