From 6d31088c39d455637179853a3c72a7d9e20fe053 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Thu, 2 Nov 2017 18:35:10 +0100 Subject: [PATCH v3] Fix summarization concurrent with relation extension --- src/backend/access/brin/brin.c | 91 ++++++++++++++++++++++++++++-------------- 1 file changed, 62 insertions(+), 29 deletions(-) diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index e6909d7aea..72ac8929bd 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -67,7 +67,7 @@ static BrinBuildState *initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap, BlockNumber pagesPerRange); static void terminate_brin_buildstate(BrinBuildState *state); static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange, - double *numSummarized, double *numExisting); + bool include_partial, double *numSummarized, double *numExisting); static void form_and_insert_tuple(BrinBuildState *state); static void union_tuples(BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b); @@ -791,7 +791,7 @@ brinvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) brin_vacuum_scan(info->index, info->strategy); - brinsummarize(info->index, heapRel, BRIN_ALL_BLOCKRANGES, + brinsummarize(info->index, heapRel, BRIN_ALL_BLOCKRANGES, false, &stats->num_index_tuples, &stats->num_index_tuples); heap_close(heapRel, AccessShareLock); @@ -909,7 +909,7 @@ brin_summarize_range(PG_FUNCTION_ARGS) RelationGetRelationName(indexRel)))); /* OK, do it */ - brinsummarize(indexRel, heapRel, heapBlk, &numSummarized, NULL); + brinsummarize(indexRel, heapRel, heapBlk, true, &numSummarized, NULL); relation_close(indexRel, ShareUpdateExclusiveLock); relation_close(heapRel, ShareUpdateExclusiveLock); @@ -1129,7 +1129,8 @@ terminate_brin_buildstate(BrinBuildState *state) } /* - * Summarize the given page range of the given index. + * On the given BRIN index, summarize the heap page range that corresponds + * to the heap block number given. * * This routine can run in parallel with insertions into the heap. To avoid * missing those values from the summary tuple, we first insert a placeholder @@ -1139,6 +1140,12 @@ terminate_brin_buildstate(BrinBuildState *state) * update of the index value happens in a loop, so that if somebody updates * the placeholder tuple after we read it, we detect the case and try again. * This ensures that the concurrently inserted tuples are not lost. + * + * A further corner case is this routine being asked to summarize the partial + * range at the end of the table. heapNumBlocks is the (possibly outdated) + * table size; if we notice that the requested range lies beyond that size, + * we re-compute the table size after inserting the placeholder tuple, to + * avoid missing pages that were appended recently. */ static void summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel, @@ -1160,6 +1167,33 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel, heapBlk, phtup, phsz); /* + * Compute range end. We hold ShareUpdateExclusive lock on table, so it + * cannot shrink concurrently (but it can grow). + */ + Assert(heapBlk % state->bs_pagesPerRange == 0); + if (heapBlk + state->bs_pagesPerRange > heapNumBlks) + { + /* + * If we're asked to scan what we believe to be the final range on the + * table (i.e. a range that might be partial) we need to recompute our + * idea of what the latest page is after inserting the placeholder + * tuple. Anyone that grows the table later will update the + * placeholder tuple, so it doesn't matter that we won't scan these + * pages ourselves. Careful: the table might have been extended + * beyond the current range, so clamp our result. + * + * Fortunately, this should occur infrequently. + */ + scanNumBlks = Min(RelationGetNumberOfBlocks(heapRel) - heapBlk, + state->bs_pagesPerRange); + } + else + { + /* Easy case: range is known to be complete */ + scanNumBlks = state->bs_pagesPerRange; + } + + /* * Execute the partial heap scan covering the heap blocks in the specified * page range, summarizing the heap tuples in it. This scan stops just * short of brinbuildCallback creating the new index entry. @@ -1169,8 +1203,6 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel, * by transactions that are still in progress, among other corner cases. */ state->bs_currRangeStart = heapBlk; - scanNumBlks = heapBlk + state->bs_pagesPerRange <= heapNumBlks ? - state->bs_pagesPerRange : heapNumBlks - heapBlk; IndexBuildHeapRangeScan(heapRel, state->bs_irel, indexInfo, false, true, heapBlk, scanNumBlks, brinbuildCallback, (void *) state); @@ -1234,6 +1266,8 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel, * Summarize page ranges that are not already summarized. If pageRange is * BRIN_ALL_BLOCKRANGES then the whole table is scanned; otherwise, only the * page range containing the given heap page number is scanned. + * If include_partial is true, then the partial range at the end of the table + * is summarized, otherwise not. * * For each new index tuple inserted, *numSummarized (if not NULL) is * incremented; for each existing tuple, *numExisting (if not NULL) is @@ -1241,56 +1275,55 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel, */ static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange, - double *numSummarized, double *numExisting) + bool include_partial, double *numSummarized, double *numExisting) { BrinRevmap *revmap; BrinBuildState *state = NULL; IndexInfo *indexInfo = NULL; BlockNumber heapNumBlocks; - BlockNumber heapBlk; BlockNumber pagesPerRange; Buffer buf; BlockNumber startBlk; - BlockNumber endBlk; - - /* determine range of pages to process; nothing to do for an empty table */ - heapNumBlocks = RelationGetNumberOfBlocks(heapRel); - if (heapNumBlocks == 0) - return; revmap = brinRevmapInitialize(index, &pagesPerRange, NULL); + /* determine range of pages to process */ + heapNumBlocks = RelationGetNumberOfBlocks(heapRel); if (pageRange == BRIN_ALL_BLOCKRANGES) - { startBlk = 0; - endBlk = heapNumBlocks; - } else - { startBlk = (pageRange / pagesPerRange) * pagesPerRange; + if (startBlk >= heapNumBlocks) + { /* Nothing to do if start point is beyond end of table */ - if (startBlk > heapNumBlocks) - { - brinRevmapTerminate(revmap); - return; - } - endBlk = startBlk + pagesPerRange; - if (endBlk > heapNumBlocks) - endBlk = heapNumBlocks; + brinRevmapTerminate(revmap); + return; } /* * Scan the revmap to find unsummarized items. */ buf = InvalidBuffer; - for (heapBlk = startBlk; heapBlk < endBlk; heapBlk += pagesPerRange) + for (; startBlk < heapNumBlocks; startBlk += pagesPerRange) { BrinTuple *tup; OffsetNumber off; + /* + * Unless requested to summarize even a partial range, go away now if + * we think the next range is partial. + * + * Maybe the table already grew to cover this range completely, but we + * don't want to spend a whole RelationGetNumberOfBlocks to find out, + * so just leave it for next time. + */ + if (!include_partial && + (startBlk + pagesPerRange > heapNumBlocks)) + break; + CHECK_FOR_INTERRUPTS(); - tup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, NULL, + tup = brinGetTupleForHeapBlock(revmap, startBlk, &buf, &off, NULL, BUFFER_LOCK_SHARE, NULL); if (tup == NULL) { @@ -1303,7 +1336,7 @@ brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange, pagesPerRange); indexInfo = BuildIndexInfo(index); } - summarize_range(indexInfo, state, heapRel, heapBlk, heapNumBlocks); + summarize_range(indexInfo, state, heapRel, startBlk, heapNumBlocks); /* and re-initialize state for the next range */ brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc); -- 2.11.0