From 239295dc3dca9ad1f05a71a2169c89405f4ce041 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Tue, 18 Jul 2023 20:16:18 -0700 Subject: [PATCH v4 2/3] Optimize AllocSetAlloc() by separating hot from cold paths. With gcc the common paths of AllocSetAlloc() now don't need to initialize a stack frame when compiling with gcc. --- src/backend/utils/mmgr/aset.c | 482 +++++++++++++++++++--------------- 1 file changed, 269 insertions(+), 213 deletions(-) diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c index 81c3120c2b..4e52eb063c 100644 --- a/src/backend/utils/mmgr/aset.c +++ b/src/backend/utils/mmgr/aset.c @@ -687,101 +687,301 @@ AllocSetDelete(MemoryContext context) } /* - * AllocSetAlloc - * Returns pointer to allocated memory of given size or NULL if - * request could not be completed; memory is added to the set. + * Helper for AllocSetAlloc() that allocates an entire block for the chunk. * - * No request may exceed: - * MAXALIGN_DOWN(SIZE_MAX) - ALLOC_BLOCKHDRSZ - ALLOC_CHUNKHDRSZ - * All callers use a much-lower limit. - * - * Note: when using valgrind, it doesn't matter how the returned allocation - * is marked, as mcxt.c will set it to UNDEFINED. In some paths we will - * return space that is marked NOACCESS - AllocSetRealloc has to beware! + * AllocSetAlloc()'s comment explains why this is separate. */ -void * -AllocSetAlloc(MemoryContext context, Size size, int flags) +pg_noinline +static void * +AllocSetAllocLarge(MemoryContext context, Size size, int flags) { AllocSet set = (AllocSet) context; - AllocBlock block; + AllocBlock block; MemoryChunk *chunk; - int fidx; - Size chunk_size; - Size blksize; + Size chunk_size; + Size blksize; - Assert(AllocSetIsValid(set)); + /* check size, only allocation path where the limits could be hit */ + MemoryContextCheckSize(context, size, flags); + +#ifdef MEMORY_CONTEXT_CHECKING + /* ensure there's always space for the sentinel byte */ + chunk_size = MAXALIGN(size + 1); +#else + chunk_size = MAXALIGN(size); +#endif + + blksize = chunk_size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ; + block = (AllocBlock) malloc(blksize); + if (block == NULL) + return MemoryContextAllocationFailure(context, size, flags); + + context->mem_allocated += blksize; + + block->aset = set; + block->freeptr = block->endptr = ((char *) block) + blksize; + + chunk = (MemoryChunk *) (((char *) block) + ALLOC_BLOCKHDRSZ); + + /* mark the MemoryChunk as externally managed */ + MemoryChunkSetHdrMaskExternal(chunk, MCTX_ASET_ID); + +#ifdef MEMORY_CONTEXT_CHECKING + chunk->requested_size = size; + /* set mark to catch clobber of "unused" space */ + Assert(size < chunk_size); + set_sentinel(MemoryChunkGetPointer(chunk), size); +#endif +#ifdef RANDOMIZE_ALLOCATED_MEMORY + /* fill the allocated space with junk */ + randomize_mem((char *) MemoryChunkGetPointer(chunk), size); +#endif /* - * If requested size exceeds maximum for chunks, allocate an entire block - * for this request. + * Stick the new block underneath the active allocation block, if any, + * so that we don't lose the use of the space remaining therein. */ - if (size > set->allocChunkLimit) + if (set->blocks != NULL) { - /* check size, only allocation path where the limits could be hit */ - MemoryContextCheckSize(context, size, flags); + block->prev = set->blocks; + block->next = set->blocks->next; + if (block->next) + block->next->prev = block; + set->blocks->next = block; + } + else + { + block->prev = NULL; + block->next = NULL; + set->blocks = block; + } -#ifdef MEMORY_CONTEXT_CHECKING - /* ensure there's always space for the sentinel byte */ - chunk_size = MAXALIGN(size + 1); -#else - chunk_size = MAXALIGN(size); -#endif + /* Ensure any padding bytes are marked NOACCESS. */ + VALGRIND_MAKE_MEM_NOACCESS((char *) MemoryChunkGetPointer(chunk) + size, + chunk_size - size); - blksize = chunk_size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ; - block = (AllocBlock) malloc(blksize); - if (block == NULL) - return MemoryContextAllocationFailure(context, size, flags); + /* Disallow access to the chunk header. */ + VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOC_CHUNKHDRSZ); - context->mem_allocated += blksize; + return MemoryChunkGetPointer(chunk); +} - block->aset = set; - block->freeptr = block->endptr = ((char *) block) + blksize; +/* + * Small helper for allocating a new chunk from a chunk, to avoid duplicating + * the code between AllocSetAlloc() and AllocSetAllocFromNewBlock(). + */ +static inline void * +AllocSetAllocChunkFromBlock(MemoryContext context, AllocBlock block, + Size size, Size chunk_size, int fidx) +{ + MemoryChunk *chunk; - chunk = (MemoryChunk *) (((char *) block) + ALLOC_BLOCKHDRSZ); + chunk = (MemoryChunk *) (block->freeptr); - /* mark the MemoryChunk as externally managed */ - MemoryChunkSetHdrMaskExternal(chunk, MCTX_ASET_ID); + /* Prepare to initialize the chunk header. */ + VALGRIND_MAKE_MEM_UNDEFINED(chunk, ALLOC_CHUNKHDRSZ); + + block->freeptr += (chunk_size + ALLOC_CHUNKHDRSZ); + Assert(block->freeptr <= block->endptr); + + /* store the free list index in the value field */ + MemoryChunkSetHdrMask(chunk, block, fidx, MCTX_ASET_ID); #ifdef MEMORY_CONTEXT_CHECKING - chunk->requested_size = size; - /* set mark to catch clobber of "unused" space */ - Assert(size < chunk_size); + chunk->requested_size = size; + /* set mark to catch clobber of "unused" space */ + if (size < chunk_size) set_sentinel(MemoryChunkGetPointer(chunk), size); #endif #ifdef RANDOMIZE_ALLOCATED_MEMORY - /* fill the allocated space with junk */ - randomize_mem((char *) MemoryChunkGetPointer(chunk), size); + /* fill the allocated space with junk */ + randomize_mem((char *) MemoryChunkGetPointer(chunk), size); #endif + /* Ensure any padding bytes are marked NOACCESS. */ + VALGRIND_MAKE_MEM_NOACCESS((char *) MemoryChunkGetPointer(chunk) + size, + chunk_size - size); + + /* Disallow access to the chunk header. */ + VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOC_CHUNKHDRSZ); + + return MemoryChunkGetPointer(chunk); +} + +/* + * Helper for AllocSetAlloc() that allocates a new block and returns a chunk + * allocated from it. + * + * AllocSetAlloc()'s comment explains why this is separate. + */ +pg_noinline +static void * +AllocSetAllocFromNewBlock(MemoryContext context, Size size, int flags, + int fidx) +{ + AllocSet set = (AllocSet) context; + AllocBlock block; + Size availspace; + Size blksize; + Size required_size; + Size chunk_size; + + /* due to the keeper block set->blocks should always be valid */ + Assert(set->blocks != NULL); + block = set->blocks; + availspace = block->endptr - block->freeptr; + + /* + * The existing active (top) block does not have enough room for + * the requested allocation, but it might still have a useful + * amount of space in it. Once we push it down in the block list, + * we'll never try to allocate more space from it. So, before we + * do that, carve up its free space into chunks that we can put on + * the set's freelists. + * + * Because we can only get here when there's less than + * ALLOC_CHUNK_LIMIT left in the block, this loop cannot iterate + * more than ALLOCSET_NUM_FREELISTS-1 times. + */ + while (availspace >= ((1 << ALLOC_MINBITS) + ALLOC_CHUNKHDRSZ)) + { + AllocFreeListLink *link; + Size availchunk = availspace - ALLOC_CHUNKHDRSZ; + int a_fidx = AllocSetFreeIndex(availchunk); + MemoryChunk *chunk; + /* - * Stick the new block underneath the active allocation block, if any, - * so that we don't lose the use of the space remaining therein. + * In most cases, we'll get back the index of the next larger + * freelist than the one we need to put this chunk on. The + * exception is when availchunk is exactly a power of 2. */ - if (set->blocks != NULL) + if (availchunk != GetChunkSizeFromFreeListIdx(a_fidx)) { - block->prev = set->blocks; - block->next = set->blocks->next; - if (block->next) - block->next->prev = block; - set->blocks->next = block; - } - else - { - block->prev = NULL; - block->next = NULL; - set->blocks = block; + a_fidx--; + Assert(a_fidx >= 0); + availchunk = GetChunkSizeFromFreeListIdx(a_fidx); } - /* Ensure any padding bytes are marked NOACCESS. */ - VALGRIND_MAKE_MEM_NOACCESS((char *) MemoryChunkGetPointer(chunk) + size, - chunk_size - size); + chunk = (MemoryChunk *) (block->freeptr); - /* Disallow access to the chunk header. */ - VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOC_CHUNKHDRSZ); + /* Prepare to initialize the chunk header. */ + VALGRIND_MAKE_MEM_UNDEFINED(chunk, ALLOC_CHUNKHDRSZ); + block->freeptr += (availchunk + ALLOC_CHUNKHDRSZ); + availspace -= (availchunk + ALLOC_CHUNKHDRSZ); - return MemoryChunkGetPointer(chunk); + /* store the freelist index in the value field */ + MemoryChunkSetHdrMask(chunk, block, a_fidx, MCTX_ASET_ID); +#ifdef MEMORY_CONTEXT_CHECKING + chunk->requested_size = InvalidAllocSize; /* mark it free */ +#endif + /* push this chunk onto the free list */ + link = GetFreeListLink(chunk); + + VALGRIND_MAKE_MEM_DEFINED(link, sizeof(AllocFreeListLink)); + link->next = set->freelist[a_fidx]; + VALGRIND_MAKE_MEM_NOACCESS(link, sizeof(AllocFreeListLink)); + + set->freelist[a_fidx] = chunk; } + /* + * The first such block has size initBlockSize, and we double the + * space in each succeeding block, but not more than maxBlockSize. + */ + blksize = set->nextBlockSize; + set->nextBlockSize <<= 1; + if (set->nextBlockSize > set->maxBlockSize) + set->nextBlockSize = set->maxBlockSize; + + chunk_size = GetChunkSizeFromFreeListIdx(fidx); + + /* + * If initBlockSize is less than ALLOC_CHUNK_LIMIT, we could need more + * space... but try to keep it a power of 2. + */ + required_size = chunk_size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ; + while (blksize < required_size) + blksize <<= 1; + + /* Try to allocate it */ + block = (AllocBlock) malloc(blksize); + + /* + * We could be asking for pretty big blocks here, so cope if malloc + * fails. But give up if there's less than 1 MB or so available... + */ + while (block == NULL && blksize > 1024 * 1024) + { + blksize >>= 1; + if (blksize < required_size) + break; + block = (AllocBlock) malloc(blksize); + } + + if (block == NULL) + return MemoryContextAllocationFailure(context, size, flags); + + context->mem_allocated += blksize; + + block->aset = set; + block->freeptr = ((char *) block) + ALLOC_BLOCKHDRSZ; + block->endptr = ((char *) block) + blksize; + + /* Mark unallocated space NOACCESS. */ + VALGRIND_MAKE_MEM_NOACCESS(block->freeptr, + blksize - ALLOC_BLOCKHDRSZ); + + block->prev = NULL; + block->next = set->blocks; + if (block->next) + block->next->prev = block; + set->blocks = block; + + return AllocSetAllocChunkFromBlock(context, block, size, chunk_size, fidx); +} + +/* + * AllocSetAlloc + * Returns pointer to allocated memory of given size or NULL if + * request could not be completed; memory is added to the set. + * + * No request may exceed: + * MAXALIGN_DOWN(SIZE_MAX) - ALLOC_BLOCKHDRSZ - ALLOC_CHUNKHDRSZ + * All callers use a much-lower limit. + * + * Note: when using valgrind, it doesn't matter how the returned allocation + * is marked, as mcxt.c will set it to UNDEFINED. In some paths we will + * return space that is marked NOACCESS - AllocSetRealloc has to beware! + * + * This function should only contain the most common code paths, everything + * else should be in pg_noinline helper functions. That allows to avoid the + * overhead of creating a stack frame for the common cases - this function is + * one of the most common bottlenecks, making this worthwhile. The helper + * functions should always directly return the newly allocated memory, + * otherwise the stack frame is required after all. + */ +void * +AllocSetAlloc(MemoryContext context, Size size, int flags) +{ + AllocSet set = (AllocSet) context; + AllocBlock block; + MemoryChunk *chunk; + int fidx; + Size chunk_size; + Size availspace; + + Assert(AllocSetIsValid(set)); + + /* due to the keeper block set->blocks should always be valid */ + Assert(set->blocks != NULL); + + /* + * If requested size exceeds maximum for chunks, allocate an entire block + * for this request. + */ + if (size > set->allocChunkLimit) + return AllocSetAllocLarge(context, size, flags); + /* * Request is small enough to be treated as a chunk. Look in the * corresponding free list to see if there is a free chunk we could reuse. @@ -837,164 +1037,20 @@ AllocSetAlloc(MemoryContext context, Size size, int flags) chunk_size = GetChunkSizeFromFreeListIdx(fidx); Assert(chunk_size >= size); + block = set->blocks; + availspace = block->endptr - block->freeptr; + /* * If there is enough room in the active allocation block, we will put the * chunk into that block. Else must start a new one. */ - if ((block = set->blocks) != NULL) - { - Size availspace = block->endptr - block->freeptr; - - if (availspace < (chunk_size + ALLOC_CHUNKHDRSZ)) - { - /* - * The existing active (top) block does not have enough room for - * the requested allocation, but it might still have a useful - * amount of space in it. Once we push it down in the block list, - * we'll never try to allocate more space from it. So, before we - * do that, carve up its free space into chunks that we can put on - * the set's freelists. - * - * Because we can only get here when there's less than - * ALLOC_CHUNK_LIMIT left in the block, this loop cannot iterate - * more than ALLOCSET_NUM_FREELISTS-1 times. - */ - while (availspace >= ((1 << ALLOC_MINBITS) + ALLOC_CHUNKHDRSZ)) - { - AllocFreeListLink *link; - Size availchunk = availspace - ALLOC_CHUNKHDRSZ; - int a_fidx = AllocSetFreeIndex(availchunk); - - /* - * In most cases, we'll get back the index of the next larger - * freelist than the one we need to put this chunk on. The - * exception is when availchunk is exactly a power of 2. - */ - if (availchunk != GetChunkSizeFromFreeListIdx(a_fidx)) - { - a_fidx--; - Assert(a_fidx >= 0); - availchunk = GetChunkSizeFromFreeListIdx(a_fidx); - } - - chunk = (MemoryChunk *) (block->freeptr); - - /* Prepare to initialize the chunk header. */ - VALGRIND_MAKE_MEM_UNDEFINED(chunk, ALLOC_CHUNKHDRSZ); - block->freeptr += (availchunk + ALLOC_CHUNKHDRSZ); - availspace -= (availchunk + ALLOC_CHUNKHDRSZ); - - /* store the freelist index in the value field */ - MemoryChunkSetHdrMask(chunk, block, a_fidx, MCTX_ASET_ID); -#ifdef MEMORY_CONTEXT_CHECKING - chunk->requested_size = InvalidAllocSize; /* mark it free */ -#endif - /* push this chunk onto the free list */ - link = GetFreeListLink(chunk); - - VALGRIND_MAKE_MEM_DEFINED(link, sizeof(AllocFreeListLink)); - link->next = set->freelist[a_fidx]; - VALGRIND_MAKE_MEM_NOACCESS(link, sizeof(AllocFreeListLink)); - - set->freelist[a_fidx] = chunk; - } - /* Mark that we need to create a new block */ - block = NULL; - } - } - - /* - * Time to create a new regular (multi-chunk) block? - */ - if (block == NULL) - { - Size required_size; - - /* - * The first such block has size initBlockSize, and we double the - * space in each succeeding block, but not more than maxBlockSize. - */ - blksize = set->nextBlockSize; - set->nextBlockSize <<= 1; - if (set->nextBlockSize > set->maxBlockSize) - set->nextBlockSize = set->maxBlockSize; - - /* - * If initBlockSize is less than ALLOC_CHUNK_LIMIT, we could need more - * space... but try to keep it a power of 2. - */ - required_size = chunk_size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ; - while (blksize < required_size) - blksize <<= 1; - - /* Try to allocate it */ - block = (AllocBlock) malloc(blksize); - - /* - * We could be asking for pretty big blocks here, so cope if malloc - * fails. But give up if there's less than 1 MB or so available... - */ - while (block == NULL && blksize > 1024 * 1024) - { - blksize >>= 1; - if (blksize < required_size) - break; - block = (AllocBlock) malloc(blksize); - } - - if (block == NULL) - return MemoryContextAllocationFailure(context, size, flags); - - context->mem_allocated += blksize; - - block->aset = set; - block->freeptr = ((char *) block) + ALLOC_BLOCKHDRSZ; - block->endptr = ((char *) block) + blksize; - - /* Mark unallocated space NOACCESS. */ - VALGRIND_MAKE_MEM_NOACCESS(block->freeptr, - blksize - ALLOC_BLOCKHDRSZ); - - block->prev = NULL; - block->next = set->blocks; - if (block->next) - block->next->prev = block; - set->blocks = block; - } + if (unlikely(availspace < (chunk_size + ALLOC_CHUNKHDRSZ))) + return AllocSetAllocFromNewBlock(context, size, flags, fidx); /* * OK, do the allocation */ - chunk = (MemoryChunk *) (block->freeptr); - - /* Prepare to initialize the chunk header. */ - VALGRIND_MAKE_MEM_UNDEFINED(chunk, ALLOC_CHUNKHDRSZ); - - block->freeptr += (chunk_size + ALLOC_CHUNKHDRSZ); - Assert(block->freeptr <= block->endptr); - - /* store the free list index in the value field */ - MemoryChunkSetHdrMask(chunk, block, fidx, MCTX_ASET_ID); - -#ifdef MEMORY_CONTEXT_CHECKING - chunk->requested_size = size; - /* set mark to catch clobber of "unused" space */ - if (size < chunk_size) - set_sentinel(MemoryChunkGetPointer(chunk), size); -#endif -#ifdef RANDOMIZE_ALLOCATED_MEMORY - /* fill the allocated space with junk */ - randomize_mem((char *) MemoryChunkGetPointer(chunk), size); -#endif - - /* Ensure any padding bytes are marked NOACCESS. */ - VALGRIND_MAKE_MEM_NOACCESS((char *) MemoryChunkGetPointer(chunk) + size, - chunk_size - size); - - /* Disallow access to the chunk header. */ - VALGRIND_MAKE_MEM_NOACCESS(chunk, ALLOC_CHUNKHDRSZ); - - return MemoryChunkGetPointer(chunk); + return AllocSetAllocChunkFromBlock(context, block, size, chunk_size, fidx); } /* -- 2.40.1.windows.1