diff -cpr pgsql-orig/src/backend/access/nbtree/nbtinsert.c pgsql/src/backend/access/nbtree/nbtinsert.c *** pgsql-orig/src/backend/access/nbtree/nbtinsert.c 2006-01-24 21:39:25.000000000 +0900 --- pgsql/src/backend/access/nbtree/nbtinsert.c 2006-01-25 05:07:56.000000000 +0900 *************** static void _bt_pgaddtup(Relation rel, P *** 62,67 **** --- 62,68 ---- OffsetNumber itup_off, const char *where); static bool _bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum, int keysz, ScanKey scankey); + static int _bt_vacuum_page(Relation rel, Buffer buf); /* *************** _bt_check_unique(Relation rel, BTItem bt *** 260,265 **** --- 261,267 ---- hbuffer) == HEAPTUPLE_DEAD) { curitemid->lp_flags |= LP_DELETE; + opaque->btpo_flags |= BTP_HAS_DEAD; if (nbuf != InvalidBuffer) SetBufferCommitInfoNeedsSave(nbuf); else *************** _bt_insertonpg(Relation rel, *** 424,433 **** */ bool movedright = false; ! while (PageGetFreeSpace(page) < itemsz && ! !P_RIGHTMOST(lpageop) && ! _bt_compare(rel, keysz, scankey, page, P_HIKEY) == 0 && ! random() > (MAX_RANDOM_VALUE / 100)) { /* * step right to next non-dead page --- 426,432 ---- */ bool movedright = false; ! while (PageGetFreeSpace(page) < itemsz) { /* * step right to next non-dead page *************** _bt_insertonpg(Relation rel, *** 440,445 **** --- 439,457 ---- */ Buffer rbuf = InvalidBuffer; + if (P_ISLEAF(lpageop) && P_HAS_DEAD(lpageop) && + !rel->rd_istemp && BufferSuperLockHeldByMe(buf)) + { + _bt_vacuum_page(rel, buf); + if (PageGetFreeSpace(page) >= itemsz) + break; + } + + if (P_RIGHTMOST(lpageop) || + _bt_compare(rel, keysz, scankey, page, P_HIKEY) != 0 || + random() <= (MAX_RANDOM_VALUE / 100)) + break; + for (;;) { BlockNumber rblkno = lpageop->btpo_next; *************** _bt_isequal(TupleDesc itupdesc, Page pag *** 1590,1592 **** --- 1602,1651 ---- /* if we get here, the keys are equal */ return true; } + + /* + * _bt_vacuum_page + * + * @param buf Must have super-exclusive-lock. + */ + static int + _bt_vacuum_page(Relation rel, Buffer buf) + { + OffsetNumber deletable[MaxOffsetNumber]; + BTPageOpaque opaque; + OffsetNumber offnum, minoff, maxoff; + int ndeletable; + Page page = BufferGetPage(buf); + + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + if (!P_ISLEAF(opaque) || !P_HAS_DEAD(opaque)) + return -1; + + minoff = P_FIRSTDATAKEY(opaque); + maxoff = PageGetMaxOffsetNumber(page); + if (minoff > maxoff) + return -1; + + ndeletable = 0; + + for (offnum = minoff; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + ItemId itemId = PageGetItemId(page, offnum); + if (!ItemIdIsUsed(itemId) || ItemIdDeleted(itemId)) + { + deletable[ndeletable++] = offnum; + } + } + + if (ndeletable > 0) + { + _bt_delitems(rel, buf, deletable, ndeletable); + } + + /* clear the dead tuple hint bit */ + opaque->btpo_flags &= ~BTP_HAS_DEAD; + + return ndeletable; + } diff -cpr pgsql-orig/src/backend/access/nbtree/nbtree.c pgsql/src/backend/access/nbtree/nbtree.c *** pgsql-orig/src/backend/access/nbtree/nbtree.c 2006-01-24 21:39:25.000000000 +0900 --- pgsql/src/backend/access/nbtree/nbtree.c 2006-01-25 05:30:06.000000000 +0900 *************** btgettuple(PG_FUNCTION_ARGS) *** 271,276 **** --- 271,277 ---- offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData)); page = BufferGetPage(so->btso_curbuf); PageGetItemId(page, offnum)->lp_flags |= LP_DELETE; + ((BTPageOpaque) PageGetSpecialPointer(page))->btpo_flags |= BTP_HAS_DEAD; /* * Since this can be redone later if needed, it's treated the same *************** btbulkdelete(PG_FUNCTION_ARGS) *** 632,637 **** --- 633,641 ---- } } + /* clear the dead tuple hint bit */ + opaque->btpo_flags &= ~BTP_HAS_DEAD; + /* * If we need to delete anything, do it and write the buffer; else * just release the buffer. *************** _bt_restscan(IndexScanDesc scan) *** 898,903 **** --- 902,929 ---- return; } + /* Check for item on this page */ + if (offnum <= maxoff) + { + for (; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + item = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); + if (BTTidSame(item->bti_itup.t_tid, *target)) + { + /* Found it */ + current->ip_posid = offnum; + return; + } + } + + /* Move offsetnumber to first, because we may delete LP_DELETE tuple */ + maxoff = ItemPointerGetOffsetNumber(current) - 1; + } + + offnum = P_FIRSTDATAKEY(opaque); + /* * The item we were on may have moved right due to insertions. Find it * again. We use the heap TID to identify the item uniquely. diff -cpr pgsql-orig/src/backend/storage/buffer/bufmgr.c pgsql/src/backend/storage/buffer/bufmgr.c *** pgsql-orig/src/backend/storage/buffer/bufmgr.c 2006-01-24 21:39:25.000000000 +0900 --- pgsql/src/backend/storage/buffer/bufmgr.c 2006-01-25 05:19:50.000000000 +0900 *************** LockBufferForCleanup(Buffer buffer) *** 1880,1885 **** --- 1880,1904 ---- } /* + * BufferSuperLockHeldByMe + * + * @param buffer Must have BUFFER_LOCK_EXCLUSIVE. + */ + bool + BufferSuperLockHeldByMe(Buffer buffer) + { + volatile BufferDesc *bufHdr; + unsigned refcount; + + bufHdr = &BufferDescriptors[buffer - 1]; + LockBufHdr(bufHdr); + refcount = bufHdr->refcount; + UnlockBufHdr(bufHdr); + + return refcount == 1; + } + + /* * Functions for buffer I/O handling * * Note: We assume that nested buffer I/O never occurs. diff -cpr pgsql-orig/src/include/access/nbtree.h pgsql/src/include/access/nbtree.h *** pgsql-orig/src/include/access/nbtree.h 2006-01-24 21:39:25.000000000 +0900 --- pgsql/src/include/access/nbtree.h 2006-01-25 04:58:05.000000000 +0900 *************** typedef BTPageOpaqueData *BTPageOpaque; *** 55,60 **** --- 55,61 ---- #define BTP_DELETED (1 << 2) /* page has been deleted from tree */ #define BTP_META (1 << 3) /* meta-page */ #define BTP_HALF_DEAD (1 << 4) /* empty, but still in tree */ + #define BTP_HAS_DEAD (1 << 5) /* page has dead tuples */ /* *************** typedef BTItemData *BTItem; *** 155,160 **** --- 156,162 ---- #define P_ISROOT(opaque) ((opaque)->btpo_flags & BTP_ROOT) #define P_ISDELETED(opaque) ((opaque)->btpo_flags & BTP_DELETED) #define P_IGNORE(opaque) ((opaque)->btpo_flags & (BTP_DELETED|BTP_HALF_DEAD)) + #define P_HAS_DEAD(opaque) ((opaque)->btpo_flags & BTP_HAS_DEAD) /* * Lehman and Yao's algorithm requires a ``high key'' on every non-rightmost diff -cpr pgsql-orig/src/include/storage/bufmgr.h pgsql/src/include/storage/bufmgr.h *** pgsql-orig/src/include/storage/bufmgr.h 2006-01-24 21:39:25.000000000 +0900 --- pgsql/src/include/storage/bufmgr.h 2006-01-25 05:06:04.000000000 +0900 *************** extern void UnlockBuffers(void); *** 149,154 **** --- 149,155 ---- extern void LockBuffer(Buffer buffer, int mode); extern bool ConditionalLockBuffer(Buffer buffer); extern void LockBufferForCleanup(Buffer buffer); + extern bool BufferSuperLockHeldByMe(Buffer buffer); extern void AbortBufferIO(void);