*** a/src/backend/access/heap/heapam.c --- b/src/backend/access/heap/heapam.c *************** *** 4262,4293 **** HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, } /* - * Perform XLogInsert to register a heap cleanup info message. These - * messages are sent once per VACUUM and are required because - * of the phasing of removal operations during a lazy VACUUM. - * see comments for vacuum_log_cleanup_info(). - */ - XLogRecPtr - log_heap_cleanup_info(RelFileNode rnode, TransactionId latestRemovedXid) - { - xl_heap_cleanup_info xlrec; - XLogRecPtr recptr; - XLogRecData rdata; - - xlrec.node = rnode; - xlrec.latestRemovedXid = latestRemovedXid; - - rdata.data = (char *) &xlrec; - rdata.len = SizeOfHeapCleanupInfo; - rdata.buffer = InvalidBuffer; - rdata.next = NULL; - - recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEANUP_INFO, &rdata); - - return recptr; - } - - /* * Perform XLogInsert for a heap-clean operation. Caller must already * have modified the buffer and marked it dirty. * --- 4262,4267 ---- *************** *** 4634,4660 **** log_newpage_buffer(Buffer buffer) } /* - * Handles CLEANUP_INFO - */ - static void - heap_xlog_cleanup_info(XLogRecPtr lsn, XLogRecord *record) - { - xl_heap_cleanup_info *xlrec = (xl_heap_cleanup_info *) XLogRecGetData(record); - - if (InHotStandby) - ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node); - - /* - * Actual operation is a no-op. Record type exists to provide a means for - * conflict processing to occur before we begin index vacuum actions. see - * vacuumlazy.c and also comments in btvacuumpage() - */ - - /* Backup blocks are not used in cleanup_info records */ - Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK)); - } - - /* * Handles HEAP2_CLEAN record type */ static void --- 4608,4613 ---- *************** *** 5693,5701 **** heap2_redo(XLogRecPtr lsn, XLogRecord *record) case XLOG_HEAP2_CLEAN: heap_xlog_clean(lsn, record); break; - case XLOG_HEAP2_CLEANUP_INFO: - heap_xlog_cleanup_info(lsn, record); - break; case XLOG_HEAP2_VISIBLE: heap_xlog_visible(lsn, record); break; --- 5646,5651 ---- *** a/src/backend/access/heap/pruneheap.c --- b/src/backend/access/heap/pruneheap.c *************** *** 121,133 **** heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin) * have pruned while we hold pin.) */ if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree) - { - TransactionId ignore = InvalidTransactionId; /* return value not - * needed */ - /* OK to prune */ ! (void) heap_page_prune(relation, buffer, OldestXmin, true, &ignore); ! } /* And release buffer lock */ LockBuffer(buffer, BUFFER_LOCK_UNLOCK); --- 121,128 ---- * have pruned while we hold pin.) */ if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree) /* OK to prune */ ! (void) heap_page_prune(relation, buffer, OldestXmin, true); /* And release buffer lock */ LockBuffer(buffer, BUFFER_LOCK_UNLOCK); *************** *** 148,159 **** heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin) * send its own new total to pgstats, and we don't want this delta applied * on top of that.) * ! * Returns the number of tuples deleted from the page and sets ! * latestRemovedXid. */ int heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, ! bool report_stats, TransactionId *latestRemovedXid) { int ndeleted = 0; Page page = BufferGetPage(buffer); --- 143,153 ---- * send its own new total to pgstats, and we don't want this delta applied * on top of that.) * ! * Returns the number of tuples deleted from the page. */ int heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, ! bool report_stats) { int ndeleted = 0; Page page = BufferGetPage(buffer); *************** *** 277,284 **** heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, if (report_stats && ndeleted > prstate.ndead) pgstat_update_heap_dead_tuples(relation, ndeleted - prstate.ndead); - *latestRemovedXid = prstate.latestRemovedXid; - /* * XXX Should we update the FSM information of this page ? * --- 271,276 ---- *** a/src/backend/access/nbtree/nbtxlog.c --- b/src/backend/access/nbtree/nbtxlog.c *************** *** 728,738 **** btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record) * update the page. * * Btree delete records can conflict with standby queries. You might ! * think that vacuum records would conflict as well, but we've handled ! * that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid ! * cleaned by the vacuum of the heap and so we can resolve any conflicts ! * just once when that arrives. After that we know that no conflicts ! * exist from individual btree vacuum records on that index. */ if (InHotStandby) { --- 728,736 ---- * update the page. * * Btree delete records can conflict with standby queries. You might ! * think that vacuum records would conflict as well. However, VACUUM ! * always removes the heap tuple first, and the XLOG_HEAP2_CLEAN record ! * from doing so is enough. */ if (InHotStandby) { *** a/src/backend/access/rmgrdesc/heapdesc.c --- b/src/backend/access/rmgrdesc/heapdesc.c *************** *** 133,145 **** heap2_desc(StringInfo buf, uint8 xl_info, char *rec) xlrec->node.relNode, xlrec->block, xlrec->latestRemovedXid); } - else if (info == XLOG_HEAP2_CLEANUP_INFO) - { - xl_heap_cleanup_info *xlrec = (xl_heap_cleanup_info *) rec; - - appendStringInfo(buf, "cleanup info: remxid %u", - xlrec->latestRemovedXid); - } else if (info == XLOG_HEAP2_VISIBLE) { xl_heap_visible *xlrec = (xl_heap_visible *) rec; --- 133,138 ---- *** a/src/backend/commands/vacuumlazy.c --- b/src/backend/commands/vacuumlazy.c *************** *** 11,17 **** * on the number of tuples and pages we will keep track of at once. * * We are willing to use at most maintenance_work_mem memory space to keep ! * track of dead tuples. We initially allocate an array of TIDs of that size, * with an upper limit that depends on table size (this limit ensures we don't * allocate a huge area uselessly for vacuuming small tables). If the array * threatens to overflow, we suspend the heap scan phase and perform a pass of --- 11,17 ---- * on the number of tuples and pages we will keep track of at once. * * We are willing to use at most maintenance_work_mem memory space to keep ! * track of dead TIDs. We initially allocate an array of TIDs of that size, * with an upper limit that depends on table size (this limit ensures we don't * allocate a huge area uselessly for vacuuming small tables). If the array * threatens to overflow, we suspend the heap scan phase and perform a pass of *************** *** 19,25 **** * TID array. * * If we're processing a table with no indexes, we can just vacuum each page ! * as we go; there's no need to save up multiple tuples to minimize the number * of index scans performed. So we don't use maintenance_work_mem memory for * the TID array, just enough to hold as many heap tuples as fit on one page. * --- 19,25 ---- * TID array. * * If we're processing a table with no indexes, we can just vacuum each page ! * as we go; there's no need to save up many TIDs to minimize the number * of index scans performed. So we don't use maintenance_work_mem memory for * the TID array, just enough to hold as many heap tuples as fit on one page. * *************** *** 108,120 **** typedef struct LVRelStats BlockNumber pages_removed; double tuples_deleted; BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */ ! /* List of TIDs of tuples we intend to delete */ ! /* NB: this list is ordered by TID address */ int num_dead_tuples; /* current # of entries */ int max_dead_tuples; /* # slots allocated in array */ ItemPointer dead_tuples; /* array of ItemPointerData */ int num_index_scans; - TransactionId latestRemovedXid; bool lock_waiter_detected; } LVRelStats; --- 108,118 ---- BlockNumber pages_removed; double tuples_deleted; BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */ ! /* Ordered list of TIDs we intend to delete */ int num_dead_tuples; /* current # of entries */ int max_dead_tuples; /* # slots allocated in array */ ItemPointer dead_tuples; /* array of ItemPointerData */ int num_index_scans; bool lock_waiter_detected; } LVRelStats; *************** *** 331,379 **** lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, } /* - * For Hot Standby we need to know the highest transaction id that will - * be removed by any change. VACUUM proceeds in a number of passes so - * we need to consider how each pass operates. The first phase runs - * heap_page_prune(), which can issue XLOG_HEAP2_CLEAN records as it - * progresses - these will have a latestRemovedXid on each record. - * In some cases this removes all of the tuples to be removed, though - * often we have dead tuples with index pointers so we must remember them - * for removal in phase 3. Index records for those rows are removed - * in phase 2 and index blocks do not have MVCC information attached. - * So before we can allow removal of any index tuples we need to issue - * a WAL record containing the latestRemovedXid of rows that will be - * removed in phase three. This allows recovery queries to block at the - * correct place, i.e. before phase two, rather than during phase three - * which would be after the rows have become inaccessible. - */ - static void - vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats) - { - /* - * Skip this for relations for which no WAL is to be written, or if we're - * not trying to support archive recovery. - */ - if (!RelationNeedsWAL(rel) || !XLogIsNeeded()) - return; - - /* - * No need to write the record at all unless it contains a valid value - */ - if (TransactionIdIsValid(vacrelstats->latestRemovedXid)) - (void) log_heap_cleanup_info(rel->rd_node, vacrelstats->latestRemovedXid); - } - - /* * lazy_scan_heap() -- scan an open heap relation * * This routine prunes each page in the heap, which will among other * things truncate dead tuples to dead line pointers, defragment the ! * page, and set commit status bits (see heap_page_prune). It also builds ! * lists of dead tuples and pages with free space, calculates statistics ! * on the number of live tuples in the heap, and marks pages as ! * all-visible if appropriate. When done, or when we run low on space for ! * dead-tuple TIDs, invoke vacuuming of indexes and call lazy_vacuum_heap ! * to reclaim dead line pointers. * * If there are no indexes then we can reclaim line pointers on the fly; * dead line pointers need only be retained until all index pointers that --- 329,344 ---- } /* * lazy_scan_heap() -- scan an open heap relation * * This routine prunes each page in the heap, which will among other * things truncate dead tuples to dead line pointers, defragment the ! * page, and set commit status bits (see heap_page_prune). It then ! * builds lists of dead line pointers, tracks feasibility of truncating ! * the end of the heap, calculates statistics on the number of live ! * tuples, and marks pages as all-visible if appropriate. When done, or ! * when we run low on space for dead-tuple TIDs, invoke vacuuming of ! * indexes and call lazy_vacuum_heap to reclaim dead line pointers. * * If there are no indexes then we can reclaim line pointers on the fly; * dead line pointers need only be retained until all index pointers that *************** *** 389,398 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, char *relname; BlockNumber empty_pages, vacuumed_pages; ! double num_tuples, ! tups_vacuumed, ! nkeep, ! nunused; IndexBulkDeleteResult **indstats; int i; PGRUsage ru0; --- 354,363 ---- char *relname; BlockNumber empty_pages, vacuumed_pages; ! double num_tuples, /* kept: any reason */ ! tups_vacuumed, /* truncated to LP_DEAD */ ! nkeep, /* kept: writer finished too recently */ ! nunused; /* already LP_UNUSED */ IndexBulkDeleteResult **indstats; int i; PGRUsage ru0; *************** *** 418,424 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, vacrelstats->rel_pages = nblocks; vacrelstats->scanned_pages = 0; vacrelstats->nonempty_pages = 0; - vacrelstats->latestRemovedXid = InvalidTransactionId; lazy_space_alloc(vacrelstats, nblocks); --- 383,388 ---- *************** *** 472,486 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, Page page; OffsetNumber offnum, maxoff; ! bool tupgone, ! hastup; int prev_dead_count; OffsetNumber frozen[MaxOffsetNumber]; int nfrozen; Size freespace; bool all_visible_according_to_vm; - bool all_visible; - bool has_dead_tuples; TransactionId visibility_cutoff_xid = InvalidTransactionId; if (blkno == next_not_all_visible_block) --- 436,452 ---- Page page; OffsetNumber offnum, maxoff; ! /* Page has items !LP_UNUSED && !LP_DEAD? */ ! bool hastup, ! /* Page has items !LP_UNUSED && !(LP_NORMAL && HEAPTUPLE_LIVE)? */ ! has_nonlive, ! /* Page has live tuples too new to be visible to some snapshot? */ ! has_too_recent; int prev_dead_count; OffsetNumber frozen[MaxOffsetNumber]; int nfrozen; Size freespace; bool all_visible_according_to_vm; TransactionId visibility_cutoff_xid = InvalidTransactionId; if (blkno == next_not_all_visible_block) *************** *** 536,557 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, vmbuffer = InvalidBuffer; } - /* Log cleanup info before we touch indexes */ - vacuum_log_cleanup_info(onerel, vacrelstats); - /* Remove index entries */ for (i = 0; i < nindexes; i++) lazy_vacuum_index(Irel[i], &indstats[i], vacrelstats); ! /* Remove tuples from heap */ lazy_vacuum_heap(onerel, vacrelstats); ! /* ! * Forget the now-vacuumed tuples, and press on, but be careful ! * not to reset latestRemovedXid since we want that value to be ! * valid. ! */ vacrelstats->num_dead_tuples = 0; vacrelstats->num_index_scans++; } --- 502,516 ---- vmbuffer = InvalidBuffer; } /* Remove index entries */ for (i = 0; i < nindexes; i++) lazy_vacuum_index(Irel[i], &indstats[i], vacrelstats); ! /* Mark dead line pointers unused */ lazy_vacuum_heap(onerel, vacrelstats); ! /* Forget the now-vacuumed TIDs, and press on */ vacrelstats->num_dead_tuples = 0; vacrelstats->num_index_scans++; } *************** *** 569,575 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno, RBM_NORMAL, vac_strategy); ! /* We need buffer cleanup lock so that we can prune HOT chains. */ if (!ConditionalLockBufferForCleanup(buf)) { /* --- 528,534 ---- buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno, RBM_NORMAL, vac_strategy); ! /* We need buffer cleanup lock so that we can prune. */ if (!ConditionalLockBufferForCleanup(buf)) { /* *************** *** 671,691 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, } /* ! * Prune all HOT-update chains in this page. ! * ! * We count tuples removed by the pruning step as removed by VACUUM. */ ! tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, false, ! &vacrelstats->latestRemovedXid); /* * Now scan the page to collect vacuumable items and check for tuples * requiring freezing. */ - all_visible = true; - has_dead_tuples = false; nfrozen = 0; hastup = false; prev_dead_count = vacrelstats->num_dead_tuples; maxoff = PageGetMaxOffsetNumber(page); for (offnum = FirstOffsetNumber; --- 630,651 ---- } /* ! * Remove dead tuples in this page. Certain heap-only tuples will be ! * reduced straight to LP_UNUSED line pointers. Other tuples, ! * including all index-referenced tuples, will be reduced to LP_DEAD ! * with no storage. We will finish the job by removing the index ! * entries and changing them to LP_UNUSED. */ ! tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, false); /* * Now scan the page to collect vacuumable items and check for tuples * requiring freezing. */ nfrozen = 0; hastup = false; + has_nonlive = false; + has_too_recent = false; prev_dead_count = vacrelstats->num_dead_tuples; maxoff = PageGetMaxOffsetNumber(page); for (offnum = FirstOffsetNumber; *************** *** 693,698 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, --- 653,659 ---- offnum = OffsetNumberNext(offnum)) { ItemId itemid; + bool try_freeze = true; itemid = PageGetItemId(page, offnum); *************** *** 716,737 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, * DEAD item pointers are to be vacuumed normally; but we don't * count them in tups_vacuumed, else we'd be double-counting (at * least in the common case where heap_page_prune() just freed up ! * a non-HOT tuple). */ if (ItemIdIsDead(itemid)) { lazy_record_dead_tuple(vacrelstats, &(tuple.t_self)); ! all_visible = false; continue; } Assert(ItemIdIsNormal(itemid)); tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); - tupgone = false; - switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf)) { case HEAPTUPLE_DEAD: --- 677,699 ---- * DEAD item pointers are to be vacuumed normally; but we don't * count them in tups_vacuumed, else we'd be double-counting (at * least in the common case where heap_page_prune() just freed up ! * a tuple). */ if (ItemIdIsDead(itemid)) { lazy_record_dead_tuple(vacrelstats, &(tuple.t_self)); ! has_nonlive = true; continue; } Assert(ItemIdIsNormal(itemid)); + num_tuples += 1; + hastup = true; + tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf)) { case HEAPTUPLE_DEAD: *************** *** 742,762 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, * state changed since heap_page_prune() looked. In * particular an INSERT_IN_PROGRESS tuple could have * changed to DEAD if the inserter aborted. So this ! * cannot be considered an error condition. * ! * If the tuple is HOT-updated then it must only be ! * removed by a prune operation; so we keep it just as if ! * it were RECENTLY_DEAD. Also, if it's a heap-only ! * tuple, we choose to keep it, because it'll be a lot ! * cheaper to get rid of it in the next pruning pass than ! * to treat it like an indexed tuple. */ ! if (HeapTupleIsHotUpdated(&tuple) || ! HeapTupleIsHeapOnly(&tuple)) ! nkeep += 1; ! else ! tupgone = true; /* we can delete the tuple */ ! all_visible = false; break; case HEAPTUPLE_LIVE: /* Tuple is good --- but let's do some validity checks */ --- 704,727 ---- * state changed since heap_page_prune() looked. In * particular an INSERT_IN_PROGRESS tuple could have * changed to DEAD if the inserter aborted. So this ! * cannot be considered an error condition, and it's not ! * worth the code to actually kill the tuple here. ! * ! * heap_freeze_tuple() can't cope with HEAPTUPLE_DEAD. ! * Since our OldestXmin predates the inserter's abort, ! * none of the tuple's XIDs should qualify for freezing. ! * For this rarely-tested branch, burn a few cycles ! * verifying that at runtime. * ! * In all other respects, treat this outcome just like ! * HEAPTUPLE_RECENTLY_DEAD. */ ! if (heap_tuple_needs_freeze(tuple.t_data, FreezeLimit, buf)) ! elog(ERROR, "just-dead tuple requires freezing"); ! try_freeze = false; ! ! nkeep += 1; ! has_nonlive = true; break; case HEAPTUPLE_LIVE: /* Tuple is good --- but let's do some validity checks */ *************** *** 774,786 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, * that the HEAP_XMIN_COMMITTED hint bit is set because of * that. */ ! if (all_visible) { TransactionId xmin; if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED)) { ! all_visible = false; break; } --- 739,751 ---- * that the HEAP_XMIN_COMMITTED hint bit is set because of * that. */ ! if (!has_too_recent) { TransactionId xmin; if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED)) { ! has_too_recent = true; break; } *************** *** 791,797 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, xmin = HeapTupleHeaderGetXmin(tuple.t_data); if (!TransactionIdPrecedes(xmin, OldestXmin)) { ! all_visible = false; break; } --- 756,762 ---- xmin = HeapTupleHeaderGetXmin(tuple.t_data); if (!TransactionIdPrecedes(xmin, OldestXmin)) { ! has_too_recent = true; break; } *************** *** 807,847 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, * from relation. */ nkeep += 1; ! all_visible = false; break; case HEAPTUPLE_INSERT_IN_PROGRESS: /* This is an expected case during concurrent vacuum */ ! all_visible = false; break; case HEAPTUPLE_DELETE_IN_PROGRESS: /* This is an expected case during concurrent vacuum */ ! all_visible = false; break; default: elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); break; } ! if (tupgone) ! { ! lazy_record_dead_tuple(vacrelstats, &(tuple.t_self)); ! HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data, ! &vacrelstats->latestRemovedXid); ! tups_vacuumed += 1; ! has_dead_tuples = true; ! } ! else ! { ! num_tuples += 1; ! hastup = true; ! ! /* ! * Each non-removable tuple must be checked to see if it needs ! * freezing. Note we already have exclusive buffer lock. ! */ ! if (heap_freeze_tuple(tuple.t_data, FreezeLimit)) ! frozen[nfrozen++] = offnum; ! } } /* scan along page */ /* --- 772,799 ---- * from relation. */ nkeep += 1; ! has_nonlive = true; break; case HEAPTUPLE_INSERT_IN_PROGRESS: /* This is an expected case during concurrent vacuum */ ! has_nonlive = true; break; case HEAPTUPLE_DELETE_IN_PROGRESS: /* This is an expected case during concurrent vacuum */ ! has_nonlive = true; break; default: elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); break; } ! /* ! * Apart from the case(s) handled specially above, each ! * non-removable tuple must be checked to see if it needs ! * freezing. Note we already have exclusive buffer lock. ! */ ! if (try_freeze && heap_freeze_tuple(tuple.t_data, FreezeLimit)) ! frozen[nfrozen++] = offnum; } /* scan along page */ /* *************** *** 870,883 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, if (nindexes == 0 && vacrelstats->num_dead_tuples > 0) { ! /* Remove tuples from heap */ lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats); ! /* ! * Forget the now-vacuumed tuples, and press on, but be careful ! * not to reset latestRemovedXid since we want that value to be ! * valid. ! */ vacrelstats->num_dead_tuples = 0; vacuumed_pages++; } --- 822,831 ---- if (nindexes == 0 && vacrelstats->num_dead_tuples > 0) { ! /* Mark dead line pointers unused */ lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats); ! /* Forget the now-vacuumed TIDs, and press on */ vacrelstats->num_dead_tuples = 0; vacuumed_pages++; } *************** *** 885,891 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, freespace = PageGetHeapFreeSpace(page); /* mark page all-visible, if appropriate */ ! if (all_visible) { if (!PageIsAllVisible(page)) { --- 833,839 ---- freespace = PageGetHeapFreeSpace(page); /* mark page all-visible, if appropriate */ ! if (!has_nonlive && !has_too_recent) { if (!PageIsAllVisible(page)) { *************** *** 932,941 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, * means that the tuples that we think are not visible to everyone yet * actually are, and the PD_ALL_VISIBLE flag is correct. * ! * There should never be dead tuples on a page with PD_ALL_VISIBLE ! * set, however. */ ! else if (PageIsAllVisible(page) && has_dead_tuples) { elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u", relname, blkno); --- 880,888 ---- * means that the tuples that we think are not visible to everyone yet * actually are, and the PD_ALL_VISIBLE flag is correct. * ! * A PD_ALL_VISIBLE page should, however, carry only live tuples. */ ! else if (PageIsAllVisible(page) && has_nonlive) { elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u", relname, blkno); *************** *** 950,964 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, if (hastup) vacrelstats->nonempty_pages = blkno + 1; ! /* ! * If we remembered any tuples for deletion, then the page will be ! * visited again by lazy_vacuum_heap, which will compute and record ! * its post-compaction free space. If not, then we're done with this ! * page, so remember its free space as-is. (This path will always be ! * taken if there are no indexes.) ! */ ! if (vacrelstats->num_dead_tuples == prev_dead_count) ! RecordPageWithFreeSpace(onerel, blkno, freespace); } /* save stats for use later */ --- 897,903 ---- if (hastup) vacrelstats->nonempty_pages = blkno + 1; ! RecordPageWithFreeSpace(onerel, blkno, freespace); } /* save stats for use later */ *************** *** 980,998 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, vmbuffer = InvalidBuffer; } ! /* If any tuples need to be deleted, perform final vacuum cycle */ ! /* XXX put a threshold on min number of tuples here? */ if (vacrelstats->num_dead_tuples > 0) { - /* Log cleanup info before we touch indexes */ - vacuum_log_cleanup_info(onerel, vacrelstats); - /* Remove index entries */ for (i = 0; i < nindexes; i++) lazy_vacuum_index(Irel[i], &indstats[i], vacrelstats); ! /* Remove tuples from heap */ lazy_vacuum_heap(onerel, vacrelstats); vacrelstats->num_index_scans++; } --- 919,934 ---- vmbuffer = InvalidBuffer; } ! /* If any line pointers need to be deleted, perform final vacuum cycle */ ! /* XXX put a threshold on minimum count here? */ if (vacrelstats->num_dead_tuples > 0) { /* Remove index entries */ for (i = 0; i < nindexes; i++) lazy_vacuum_index(Irel[i], &indstats[i], vacrelstats); ! /* Mark dead line pointers unused */ lazy_vacuum_heap(onerel, vacrelstats); vacrelstats->num_index_scans++; } *************** *** 1027,1038 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, /* * lazy_vacuum_heap() -- second pass over the heap * ! * This routine marks dead tuples as unused and compacts out free ! * space on their pages. Pages not having dead tuples recorded from ! * lazy_scan_heap are not visited at all. * ! * Note: the reason for doing this as a second pass is we cannot remove ! * the tuples until we've removed their index entries, and we want to * process index entry removal in batches as large as possible. */ static void --- 963,973 ---- /* * lazy_vacuum_heap() -- second pass over the heap * ! * This routine marks dead line pointers as unused. Pages not recorded ! * during lazy_scan_heap() are not visited at all. * ! * Note: the reason for doing this as a second pass is we cannot reuse the ! * line pointers until we've removed their index entries, and we want to * process index entry removal in batches as large as possible. */ static void *************** *** 1050,1077 **** lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) { BlockNumber tblk; Buffer buf; - Page page; - Size freespace; vacuum_delay_point(); tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]); buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL, vac_strategy); ! if (!ConditionalLockBufferForCleanup(buf)) ! { ! ReleaseBuffer(buf); ! ++tupindex; ! continue; ! } tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats); - - /* Now that we've compacted the page, record its available space */ - page = BufferGetPage(buf); - freespace = PageGetHeapFreeSpace(page); - UnlockReleaseBuffer(buf); - RecordPageWithFreeSpace(onerel, tblk, freespace); npages++; } --- 985,999 ---- { BlockNumber tblk; Buffer buf; vacuum_delay_point(); tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]); buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL, vac_strategy); ! LockBuffer(buf, BUFFER_LOCK_SHARE); tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats); UnlockReleaseBuffer(buf); npages++; } *************** *** 1084,1107 **** lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) } /* ! * lazy_vacuum_page() -- free dead tuples on a page ! * and repair its fragmentation. * ! * Caller must hold pin and buffer cleanup lock on the buffer. * ! * tupindex is the index in vacrelstats->dead_tuples of the first dead ! * tuple for this page. We assume the rest follow sequentially. ! * The return value is the first tupindex after the tuples of this page. */ static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, int tupindex, LVRelStats *vacrelstats) { Page page = BufferGetPage(buffer); - OffsetNumber unused[MaxOffsetNumber]; - int uncnt = 0; - - START_CRIT_SECTION(); for (; tupindex < vacrelstats->num_dead_tuples; tupindex++) { --- 1006,1033 ---- } /* ! * lazy_vacuum_page() -- change LP_DEAD line pointers to LP_UNUSED. * ! * Caller must hold at least a shared lock on the buffer. Only VACUUM ! * modifies heap LP_DEAD line pointers, and no other process will be upset if ! * such a line pointer suddenly becomes LP_UNUSED. The primary task noticing ! * the difference is PageAddItem(), and it would only mind a switch in the ! * opposite direction, from LP_UNUSED to LP_DEAD. (Furthermore, it needs an ! * exclusive lock on the buffer.) * ! * If this change is somehow lost, the next VACUUM can redo it. All xids are ! * gone; a mere LP_DEAD pointer imposes no wraparound hazard. Therefore, ! * treat this update like a hint and emit no WAL. ! * ! * tupindex is the index in vacrelstats->dead_tuples of the first dead TID for ! * this page. We assume the rest follow sequentially. The return value is ! * the first tupindex beyond this page. */ static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, int tupindex, LVRelStats *vacrelstats) { Page page = BufferGetPage(buffer); for (; tupindex < vacrelstats->num_dead_tuples; tupindex++) { *************** *** 1114,1141 **** lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, break; /* past end of tuples for this block */ toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]); itemid = PageGetItemId(page, toff); ItemIdSetUnused(itemid); - unused[uncnt++] = toff; - } - - PageRepairFragmentation(page); - - MarkBufferDirty(buffer); - - /* XLOG stuff */ - if (RelationNeedsWAL(onerel)) - { - XLogRecPtr recptr; - - recptr = log_heap_clean(onerel, buffer, - NULL, 0, NULL, 0, - unused, uncnt, - vacrelstats->latestRemovedXid); - PageSetLSN(page, recptr); - PageSetTLI(page, ThisTimeLineID); } ! END_CRIT_SECTION(); return tupindex; } --- 1040,1050 ---- break; /* past end of tuples for this block */ toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]); itemid = PageGetItemId(page, toff); + Assert(ItemIdIsDead(itemid)); /* stronger lock needed otherwise */ ItemIdSetUnused(itemid); } ! SetBufferCommitInfoNeedsSave(buffer); return tupindex; } *************** *** 1187,1193 **** lazy_check_needs_freeze(Buffer buf) /* * lazy_vacuum_index() -- vacuum one index relation. * ! * Delete all the index entries pointing to tuples listed in * vacrelstats->dead_tuples, and update running statistics. */ static void --- 1096,1102 ---- /* * lazy_vacuum_index() -- vacuum one index relation. * ! * Delete all the index entries pointing to items listed in * vacrelstats->dead_tuples, and update running statistics. */ static void *************** *** 1491,1499 **** count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats) /* * Note: any non-unused item should be taken as a reason to keep ! * this page. We formerly thought that DEAD tuples could be ! * thrown away, but that's not so, because we'd not have cleaned ! * out their index entries. */ if (ItemIdIsUsed(itemid)) { --- 1400,1408 ---- /* * Note: any non-unused item should be taken as a reason to keep ! * this page. We formerly thought that LP_DEAD line pointers ! * could be thrown away, but that's not so, because we'd not have ! * cleaned out their index entries. */ if (ItemIdIsUsed(itemid)) { *************** *** 1552,1558 **** lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks) } /* ! * lazy_record_dead_tuple - remember one deletable tuple */ static void lazy_record_dead_tuple(LVRelStats *vacrelstats, --- 1461,1467 ---- } /* ! * lazy_record_dead_tuple - remember one reclaimable line pointer */ static void lazy_record_dead_tuple(LVRelStats *vacrelstats, *************** *** 1561,1567 **** lazy_record_dead_tuple(LVRelStats *vacrelstats, /* * The array shouldn't overflow under normal behavior, but perhaps it * could if we are given a really small maintenance_work_mem. In that ! * case, just forget the last few tuples (we'll get 'em next time). */ if (vacrelstats->num_dead_tuples < vacrelstats->max_dead_tuples) { --- 1470,1476 ---- /* * The array shouldn't overflow under normal behavior, but perhaps it * could if we are given a really small maintenance_work_mem. In that ! * case, just forget the last few entries (we'll get 'em next time). */ if (vacrelstats->num_dead_tuples < vacrelstats->max_dead_tuples) { *** a/src/include/access/heapam.h --- b/src/include/access/heapam.h *************** *** 153,159 **** extern void heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin); extern int heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, ! bool report_stats, TransactionId *latestRemovedXid); extern void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, --- 153,159 ---- TransactionId OldestXmin); extern int heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, ! bool report_stats); extern void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, *** a/src/include/access/heapam_xlog.h --- b/src/include/access/heapam_xlog.h *************** *** 51,57 **** #define XLOG_HEAP2_FREEZE 0x00 #define XLOG_HEAP2_CLEAN 0x10 /* 0x20 is free, was XLOG_HEAP2_CLEAN_MOVE */ ! #define XLOG_HEAP2_CLEANUP_INFO 0x30 #define XLOG_HEAP2_VISIBLE 0x40 #define XLOG_HEAP2_MULTI_INSERT 0x50 --- 51,57 ---- #define XLOG_HEAP2_FREEZE 0x00 #define XLOG_HEAP2_CLEAN 0x10 /* 0x20 is free, was XLOG_HEAP2_CLEAN_MOVE */ ! /* 0x30 is free, was XLOG_HEAP2_CLEANUP_INFO */ #define XLOG_HEAP2_VISIBLE 0x40 #define XLOG_HEAP2_MULTI_INSERT 0x50 *************** *** 172,190 **** typedef struct xl_heap_clean #define SizeOfHeapClean (offsetof(xl_heap_clean, ndead) + sizeof(uint16)) - /* - * Cleanup_info is required in some cases during a lazy VACUUM. - * Used for reporting the results of HeapTupleHeaderAdvanceLatestRemovedXid() - * see vacuumlazy.c for full explanation - */ - typedef struct xl_heap_cleanup_info - { - RelFileNode node; - TransactionId latestRemovedXid; - } xl_heap_cleanup_info; - - #define SizeOfHeapCleanupInfo (sizeof(xl_heap_cleanup_info)) - /* This is for replacing a page's contents in toto */ /* NB: this is used for indexes as well as heaps */ typedef struct xl_heap_newpage --- 172,177 ---- *************** *** 246,253 **** extern void heap_desc(StringInfo buf, uint8 xl_info, char *rec); extern void heap2_redo(XLogRecPtr lsn, XLogRecord *rptr); extern void heap2_desc(StringInfo buf, uint8 xl_info, char *rec); - extern XLogRecPtr log_heap_cleanup_info(RelFileNode rnode, - TransactionId latestRemovedXid); extern XLogRecPtr log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, --- 233,238 ---- *** a/src/include/storage/itemid.h --- b/src/include/storage/itemid.h *************** *** 123,128 **** typedef uint16 ItemLength; --- 123,131 ---- * ItemIdSetUnused * Set the item identifier to be UNUSED, with no storage. * Beware of multiple evaluations of itemId! + * + * Note: VACUUM uses this on LP_DEAD heap items as though it were a hint-bit + * mechanism, holding only a shared lock. */ #define ItemIdSetUnused(itemId) \ ( \