Index: src/backend/commands/vacuumlazy.c =================================================================== RCS file: /projects/cvsroot/pgsql-server/src/backend/commands/vacuumlazy.c,v retrieving revision 1.43 diff -r1.43 vacuumlazy.c 65,85d64 < typedef struct LVRelStats < { < /* Overall statistics about rel */ < BlockNumber rel_pages; < double rel_tuples; < BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */ < Size threshold; /* minimum interesting free space */ < /* List of TIDs of tuples we intend to delete */ < /* NB: this list is ordered by TID address */ < int num_dead_tuples; /* current # of entries */ < int max_dead_tuples; /* # slots allocated in array */ < ItemPointer dead_tuples; /* array of ItemPointerData */ < /* Array or heap of per-page info about free space */ < /* We use a simple array until it fills up, then convert to heap */ < bool fs_is_heap; /* are we using heap organization? */ < int num_free_pages; /* current # of entries */ < int max_free_pages; /* # slots allocated in array */ < PageFreeSpaceInfo *free_pages; /* array or heap of blkno/avail */ < } LVRelStats; < < 176a156,275 > > bool lazy_scan_heap_page(Relation onerel, LVRelStats *vacrelstats, > BlockNumber blkno, Page page) > { > bool pgchanged; > bool tupgone; > > OffsetNumber offnum, > maxoff; > > HeapTupleData tuple; > > double nkeep = 0; > double nunused = 0; > int tups_vacuumed = 0; > > /* KLUDGE! */ > if(onerel == NULL) > OldestXmin = GetOldestXmin(true); > > pgchanged = false; > maxoff = PageGetMaxOffsetNumber(page); > for (offnum = FirstOffsetNumber; > offnum <= maxoff; > offnum = OffsetNumberNext(offnum)) > { > ItemId itemid; > uint16 sv_infomask; > > itemid = PageGetItemId(page, offnum); > > if (!ItemIdIsUsed(itemid)) > { > nunused += 1; > continue; > } > > tuple.t_datamcxt = NULL; > tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); > tuple.t_len = ItemIdGetLength(itemid); > ItemPointerSet(&(tuple.t_self), blkno, offnum); > > tupgone = false; > sv_infomask = tuple.t_data->t_infomask; > > switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin)) > { > case HEAPTUPLE_DEAD: > tupgone = true; /* we can delete the tuple */ > break; > case HEAPTUPLE_LIVE: > if(onerel == NULL) > break; > /* > * Tuple is good. Consider whether to replace its > * xmin value with FrozenTransactionId. > * > * NB: Since we hold only a shared buffer lock here, we > * are assuming that TransactionId read/write is > * atomic. This is not the only place that makes such > * an assumption. It'd be possible to avoid the > * assumption by momentarily acquiring exclusive lock, > * but for the moment I see no need to. > */ > if (TransactionIdIsNormal(HeapTupleHeaderGetXmin(tuple.t_data)) && > TransactionIdPrecedes(HeapTupleHeaderGetXmin(tuple.t_data), > FreezeLimit)) > { > HeapTupleHeaderSetXmin(tuple.t_data, FrozenTransactionId); > /* infomask should be okay already */ > Assert(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED); > pgchanged = true; > } > > /* > * Other checks... > */ > if (onerel->rd_rel->relhasoids && > !OidIsValid(HeapTupleGetOid(&tuple))) > { > elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid", > RelationGetRelationName(onerel), blkno, offnum); > } > break; > case HEAPTUPLE_RECENTLY_DEAD: > /* > * If tuple is recently deleted then we must not > * remove it from relation. > */ > nkeep += 1; > break; > case HEAPTUPLE_INSERT_IN_PROGRESS: > /* This is an expected case during concurrent vacuum */ > break; > case HEAPTUPLE_DELETE_IN_PROGRESS: > /* This is an expected case during concurrent vacuum */ > break; > default: > elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); > break; > } > > /* check for hint-bit update by HeapTupleSatisfiesVacuum */ > if (sv_infomask != tuple.t_data->t_infomask) > pgchanged = true; > > if (tupgone) > { > lazy_record_dead_tuple(vacrelstats, &(tuple.t_self)); > tups_vacuumed += 1; > } > else > { > vacrelstats->rel_tuples += 1; > } > } /* scan along page */ > return pgchanged; > } > > 191,192d289 < HeapTupleData tuple; < char *relname; 194,195c291 < double num_tuples, < tups_vacuumed, --- > double tups_vacuumed, 199a296,299 > Page page; > char *relname; > > relname = RelationGetRelationName(onerel); 203d302 < relname = RelationGetRelationName(onerel); 210c309 < num_tuples = tups_vacuumed = nkeep = nunused = 0; --- > tups_vacuumed = nkeep = nunused = 0; 214a314,316 > > /* Initialize counters. Incremented by lazy_scan_heap_page */ > vacrelstats->rel_tuples = 0; 221,226c323 < Page page; < OffsetNumber offnum, < maxoff; < bool pgchanged, < tupgone, < hastup; --- > bool pgchanged; 285,296d381 < pgchanged = false; < hastup = false; < prev_dead_count = vacrelstats->num_dead_tuples; < maxoff = PageGetMaxOffsetNumber(page); < for (offnum = FirstOffsetNumber; < offnum <= maxoff; < offnum = OffsetNumberNext(offnum)) < { < ItemId itemid; < uint16 sv_infomask; < < itemid = PageGetItemId(page, offnum); 298,369c383 < if (!ItemIdIsUsed(itemid)) < { < nunused += 1; < continue; < } < < tuple.t_datamcxt = NULL; < tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); < tuple.t_len = ItemIdGetLength(itemid); < ItemPointerSet(&(tuple.t_self), blkno, offnum); < < tupgone = false; < sv_infomask = tuple.t_data->t_infomask; < < switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin)) < { < case HEAPTUPLE_DEAD: < tupgone = true; /* we can delete the tuple */ < break; < case HEAPTUPLE_LIVE: < < /* < * Tuple is good. Consider whether to replace its < * xmin value with FrozenTransactionId. < * < * NB: Since we hold only a shared buffer lock here, we < * are assuming that TransactionId read/write is < * atomic. This is not the only place that makes such < * an assumption. It'd be possible to avoid the < * assumption by momentarily acquiring exclusive lock, < * but for the moment I see no need to. < */ < if (TransactionIdIsNormal(HeapTupleHeaderGetXmin(tuple.t_data)) && < TransactionIdPrecedes(HeapTupleHeaderGetXmin(tuple.t_data), < FreezeLimit)) < { < HeapTupleHeaderSetXmin(tuple.t_data, FrozenTransactionId); < /* infomask should be okay already */ < Assert(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED); < pgchanged = true; < } < < /* < * Other checks... < */ < if (onerel->rd_rel->relhasoids && < !OidIsValid(HeapTupleGetOid(&tuple))) < elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid", < relname, blkno, offnum); < break; < case HEAPTUPLE_RECENTLY_DEAD: < < /* < * If tuple is recently deleted then we must not < * remove it from relation. < */ < nkeep += 1; < break; < case HEAPTUPLE_INSERT_IN_PROGRESS: < /* This is an expected case during concurrent vacuum */ < break; < case HEAPTUPLE_DELETE_IN_PROGRESS: < /* This is an expected case during concurrent vacuum */ < break; < default: < elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); < break; < } < < /* check for hint-bit update by HeapTupleSatisfiesVacuum */ < if (sv_infomask != tuple.t_data->t_infomask) < pgchanged = true; --- > prev_dead_count = vacrelstats->num_dead_tuples; 371,381c385 < if (tupgone) < { < lazy_record_dead_tuple(vacrelstats, &(tuple.t_self)); < tups_vacuumed += 1; < } < else < { < num_tuples += 1; < hastup = true; < } < } /* scan along page */ --- > pgchanged = lazy_scan_heap_page(onerel, vacrelstats, blkno, page); 396c400 < if (hastup) --- > if (vacrelstats->rel_tuples > 0) 407,409d410 < /* save stats for use later */ < vacrelstats->rel_tuples = num_tuples; < 430c431 < tups_vacuumed, num_tuples, nblocks), --- > tups_vacuumed, vacrelstats->rel_tuples, nblocks), 492a494 > Index: src/backend/storage/buffer/bufmgr.c =================================================================== RCS file: /projects/cvsroot/pgsql-server/src/backend/storage/buffer/bufmgr.c,v retrieving revision 1.173 diff -r1.173 bufmgr.c 49a50 > #include "commands/vacuum.h" 184a186,188 > if(!reln->rd_rel->relhasindex && reln->rd_rel->relam == 0) > bufHdr->flags |= BM_OPPVACUUMABLE; > 467a472,561 > > /* > * vacuum_page() -- free dead tuples on a page > * and repair its fragmentation. > * > * Caller is expected to handle reading, locking, and writing the buffer. > * > * vacrelstats->dead_tuples must be filled by caller. > */ > void > vacuum_page(Page page, LVRelStats *vacrelstats) > { > OffsetNumber unused[BLCKSZ / sizeof(OffsetNumber)]; > int uncnt; > ItemId itemid; > int tupindex; > > /* elog(WARNING, "vacuum_page: num_dead_tuples=%d\n",vacrelstats->num_dead_tuples); */ > > START_CRIT_SECTION(); > for (tupindex=0; tupindex < vacrelstats->num_dead_tuples; tupindex++) > { > OffsetNumber toff; > > toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]); > itemid = PageGetItemId(page, toff); > itemid->lp_flags &= ~LP_USED; > } > > uncnt = PageRepairFragmentation(page, unused); > > END_CRIT_SECTION(); > } > > > /* MAX_TUPLES_PER_PAGE can be a conservative upper limit */ > #define MAX_TUPLES_PER_PAGE ((int) (BLCKSZ / sizeof(HeapTupleHeaderData))) > > /* > * Buffer must be pinned. BufMgrLock must be held. > */ > static void > oppVacuum(Buffer buf) > { > LVRelStats vacrelstats; > Page page; > BufferDesc *bufHdr; > struct timeval tv1, tv2, tv3; > ItemPointerData dead_tuples[MAX_TUPLES_PER_PAGE]; > > bzero(&vacrelstats, sizeof(LVRelStats)); > vacrelstats.max_dead_tuples = MAX_TUPLES_PER_PAGE; > vacrelstats.dead_tuples = dead_tuples; > > gettimeofday(&tv1, NULL); > > bufHdr = &BufferDescriptors[buf - 1]; > > if(ConditionalLockBuffer(buf)) { > Assert(bufHdr->refcount > 0); > if (bufHdr->refcount > 1) > { > /* Someone else was busy with the page. Give up */ > } else { > /* Successfully acquired exclusive lock with pincount 1 */ > > page = BufferGetPage(buf); > > lazy_scan_heap_page(NULL, &vacrelstats, bufHdr->tag.blockNum, page); > if(vacrelstats.num_dead_tuples > 0) > vacuum_page(page, &vacrelstats); > > } > LockBuffer(buf, BUFFER_LOCK_UNLOCK); > RecordAndGetPageWithFreeSpace(&bufHdr->tag.rnode, bufHdr->tag.blockNum, PageGetFreeSpace(page), 0); > } > > gettimeofday(&tv2, NULL); > > timersub(&tv2, &tv1, &tv3); > > elog(WARNING, "oppVacuum: relNode == %d, blockNum == %d, freed == %d, time == %d:%d", bufHdr->tag.rnode.relNode, bufHdr->tag.blockNum, vacrelstats.num_dead_tuples, > tv3.tv_sec, tv3.tv_usec); > > /* DumpFreeSpace();*/ > > } > > bool oppVacuumEnabled; > 475a570 > bool opportunisticVacuum = false; 501c596,598 < if (release) --- > if((bufHdr->flags & BM_OPPVACUUMABLE) && oppVacuumEnabled) > opportunisticVacuum = true; > else if (release) 503a601,606 > > if(opportunisticVacuum) { > oppVacuum(buffer); > if(release) > ReleaseBuffer(buffer); > } 699a803,805 > elog(WARNING, "BufferSync, percent: %d, maxpages: %d, num_buffer_dirty: %d", percent, maxpages, num_buffer_dirty); > > 750a857,860 > > if(bufHdr->flags & BM_OPPVACUUMABLE) > oppVacuum(BufferDescriptorGetBuffer(bufHdr)); > 973a1084,1085 > buffer = BufferDescriptorGetBuffer(buf); > 994d1105 < buffer = BufferDescriptorGetBuffer(buf); Index: src/backend/storage/freespace/freespace.c =================================================================== RCS file: /projects/cvsroot/pgsql-server/src/backend/storage/freespace/freespace.c,v retrieving revision 1.32 diff -r1.32 freespace.c 215d214 < 404c403 < BlockNumber freepage; --- > BlockNumber freepage = InvalidBlockNumber; 431c430,431 < freepage = find_free_space(fsmrel, spaceNeeded); --- > if(spaceNeeded > 0) > freepage = find_free_space(fsmrel, spaceNeeded); 1291a1292,1293 > elog(WARNING, "fsm_record_free_space: %d, %d", page, spaceAvail); > 1294,1297d1295 < if (lookup_fsm_page_entry(fsmrel, page, &pageIndex)) < { < /* Found an existing entry for page; update it */ < FSMPageData *info; 1299,1310c1297,1318 < info = (FSMPageData *) < (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES); < info += pageIndex; < FSMPageSetSpace(info, spaceAvail); < } < else < { < /* < * No existing entry; ignore the call. We used to add the page to < * the FSM --- but in practice, if the page hasn't got enough < * space to satisfy the caller who's kicking it back to us, then < * it's probably uninteresting to everyone else as well. --- > > /* Found an existing entry for page; update it */ > FSMPageData *info; > > info = (FSMPageData *) > (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES); > > if (!lookup_fsm_page_entry(fsmrel, page, &pageIndex)) { > int i; > > elog(WARNING, "expanding, storedPages: %d, pageIndex: %d, fsm_current_allocation: %d", > fsmrel->storedPages, pageIndex, fsm_current_allocation(fsmrel)); > > int newStoredPages = fsmrel->storedPages + 1; > /* Is there space in our allocation */ > if(newStoredPages >= fsm_current_allocation(fsmrel) * CHUNKPAGES) > return; > > fsmrel->storedPages = newStoredPages; > > /* Move each entry with a bigger blkno downwards to make room > * for the new entry 1311a1320,1322 > memmove(info + pageIndex + 1, info + pageIndex, (fsmrel->storedPages - pageIndex - 1) * sizeof(FSMPageData)); > > FSMPageSetPageNum(info + pageIndex, page); 1312a1324,1326 > > info += pageIndex; > FSMPageSetSpace(info, spaceAvail); 1827a1842 > #define FREESPACE_DEBUG Index: src/backend/storage/page/bufpage.c =================================================================== RCS file: /projects/cvsroot/pgsql-server/src/backend/storage/page/bufpage.c,v retrieving revision 1.59 diff -r1.59 bufpage.c 18a19,23 > #include > > void > DumpPage(Page page); > 318a324,329 > struct timeval tv1, tv2, tv3; > > /* DumpPage(page); > > gettimeofday(&tv1, NULL); */ > 416a428,435 > /* > gettimeofday(&tv2, NULL); > > timersub(&tv2, &tv1, &tv3); > > fprintf(stderr, "PageRepairFramgentation, took %d us\n", tv3.tv_usec); > */ > 544a564,606 > > #define PAGE_DEBUG > > #ifdef PAGE_DEBUG > void > DumpPage(Page page) > { > int i; > PageHeader hdr; > ItemId itemId; > int nitems; > Offset pd_lower = ((PageHeader) page)->pd_lower; > Offset pd_upper = ((PageHeader) page)->pd_upper; > Offset pd_special = ((PageHeader) page)->pd_special; > XLogRecPtr pd_lsn = PageGetLSN(page); > TimeLineID pd_tli = PageGetTLI(page); > int maxOffset = PageGetMaxOffsetNumber(page); > > hdr = (PageHeader) page; > > fprintf(stderr, "pd_lower: %d, pd_upper: %d, pd_special: %d, pd_lsn: %d, pd_tli: %d\n", > pd_lower, > pd_upper, > pd_special, > pd_lsn, > pd_tli); > > for(i=1; i <= maxOffset; i++) > { > itemId = PageGetItemId(page, i); > int offset = ItemIdGetOffset(itemId); > int len = ItemIdGetLength(itemId); > > fprintf(stderr, "line ptr %d: offset: %d, len: %d, flags:%s%s\n", i, > offset, len, > ItemIdIsUsed(itemId) ? " LP_USED" : "", > ItemIdDeleted(itemId) ? " LP_DELETED" : ""); > } > > } > > > #endif Index: src/backend/utils/misc/guc.c =================================================================== RCS file: /projects/cvsroot/pgsql-server/src/backend/utils/misc/guc.c,v retrieving revision 1.232 diff -r1.232 guc.c 82a83 > extern bool oppVacuumEnabled; 840a842,853 > { > {"enable_oppvacuum", PGC_USERSET, QUERY_TUNING_METHOD, > gettext_noop("Enables opportunistic vacuum."), > NULL > }, > &oppVacuumEnabled, > false, NULL, NULL > }, > > > > Index: src/include/commands/vacuum.h =================================================================== RCS file: /projects/cvsroot/pgsql-server/src/include/commands/vacuum.h,v retrieving revision 1.55 diff -r1.55 vacuum.h 30a31 > #include "storage/freespace.h" 122a124,148 > > typedef struct LVRelStats > { > /* Overall statistics about rel */ > BlockNumber rel_pages; > double rel_tuples; > BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */ > Size threshold; /* minimum interesting free space */ > /* List of TIDs of tuples we intend to delete */ > /* NB: this list is ordered by TID address */ > int num_dead_tuples; /* current # of entries */ > int max_dead_tuples; /* # slots allocated in array */ > ItemPointer dead_tuples; /* array of ItemPointerData */ > /* Array or heap of per-page info about free space */ > /* We use a simple array until it fills up, then convert to heap */ > bool fs_is_heap; /* are we using heap organization? */ > int num_free_pages; /* current # of entries */ > int max_free_pages; /* # slots allocated in array */ > PageFreeSpaceInfo *free_pages; /* array or heap of blkno/avail */ > } LVRelStats; > > > > > 146a173,177 > extern bool lazy_scan_heap_page(Relation onerel, LVRelStats *vacrelstats, > BlockNumber blkno, Page page); > > > Index: src/include/storage/buf_internals.h =================================================================== RCS file: /projects/cvsroot/pgsql-server/src/include/storage/buf_internals.h,v retrieving revision 1.71 diff -r1.71 buf_internals.h 32a33 > #define BM_OPPVACUUMABLE (1 << 6) /* buffer can be oppvacuumed */