diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index b8aa9bc..29770d2 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -265,6 +265,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, bool is_rootsplit; int npage; + is_rootsplit = (blkno == GIST_ROOT_BLKNO); /* @@ -524,6 +525,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, gistfillbuffer(page, itup, ntup, InvalidOffsetNumber); } + GistPageGetOpaque(page)->nlazy=1; //DO NOT FORGET: remark pages after split + MarkBufferDirty(buffer); if (BufferIsValid(leftchildbuf)) @@ -589,6 +592,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, * this routine assumes it is invoked in a short-lived memory context, * so it does not bother releasing palloc'd allocations. */ + void gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate) { @@ -597,7 +601,6 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate) GISTInsertStack firststack; GISTInsertStack *stack; GISTInsertState state; - bool xlocked = false; memset(&state, 0, sizeof(GISTInsertState)); state.freespace = freespace; @@ -610,6 +613,21 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate) firststack.downlinkoffnum = InvalidOffsetNumber; state.stack = stack = &firststack; + + gistdoinsertloop(r,itup,freespace, giststate, stack, state, 0); +} + +void +gistdoinsertloop(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate,GISTInsertStack *givenstack, GISTInsertState state, GISTInsertStack *nolazy) +{ + ItemId iid; + IndexTuple idxtuple; + + bool xlocked = false; + GISTInsertStack *stack = givenstack; + + + /* * Walk down along the path of smallest penalty, updating the parent * pointers with the key we're inserting as we go. If we crash in the @@ -685,9 +703,86 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate) GISTInsertStack *item; OffsetNumber downlinkoffnum; + + + if(stack!=nolazy) + { + if(GistPageCanStoreLazy(stack->page, itup)) + { + if (!xlocked) + { + LockBuffer(stack->buffer, GIST_UNLOCK); + LockBuffer(stack->buffer, GIST_EXCLUSIVE); + xlocked = true; + stack->page = (Page) BufferGetPage(stack->buffer); + + if (PageGetLSN(stack->page) != stack->lsn) + { + /* the page was changed while we unlocked it, retry */ + continue; + } + } + + //elog(WARNING,"writing lazy tuple"); + + GistTupleSetLazy(itup); + gistinserttuple(&state, stack, giststate, itup, + InvalidOffsetNumber); + LockBuffer(stack->buffer, GIST_UNLOCK); + xlocked = false; + + break; + } + else if (GistPageGetOpaque(stack->page)->nlazy) + { + OffsetNumber i; + OffsetNumber maxoff = PageGetMaxOffsetNumber(stack->page); + + //elog(WARNING,"starting push"); + if (!xlocked) + { + //elog(WARNING,"xloc for push"); + LockBuffer(stack->buffer, GIST_UNLOCK); + LockBuffer(stack->buffer, GIST_EXCLUSIVE); + xlocked = true; + stack->page = (Page) BufferGetPage(stack->buffer); + + if (PageGetLSN(stack->page) != stack->lsn) + { + /* the page was changed while we unlocked it, retry */ + continue; + } + } + + int nlen; + IndexTuple* lazys = gistextractlazy(stack->page,&nlen); + + LockBuffer(stack->buffer, GIST_UNLOCK); + + xlocked = false; + + for (i = 0; i < nlen; i++) + { + IndexTuple ltup = lazys[i]; + //elog(WARNING,"push %d",i); + GistTupleUnsetLazy(ltup); + gistdoinsertloop(r,ltup,freespace,giststate,stack,state,stack); + //elog(WARNING,"push out, on page %d tuples", PageGetMaxOffsetNumber(stack->page)); + } + LockBuffer(stack->buffer, GIST_SHARE); + GistPageGetOpaque(stack->page)->nlazy = 0; + } + + } + else + { + //elog(WARNING,"skipped laziness"); + } + downlinkoffnum = gistchoose(state.r, stack->page, itup, giststate); iid = PageGetItemId(stack->page, downlinkoffnum); idxtuple = (IndexTuple) PageGetItem(stack->page, iid); + //elog(WARNING,"picked %d",downlinkoffnum); childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); /* @@ -753,6 +848,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate) continue; } } + LockBuffer(stack->buffer, GIST_UNLOCK); xlocked = false; @@ -825,12 +921,20 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate) InvalidOffsetNumber); LockBuffer(stack->buffer, GIST_UNLOCK); - /* Release any pins we might still hold before exiting */ - for (; stack; stack = stack->parent) - ReleaseBuffer(stack->buffer); + break; } } + + + /* Release any pins we might still hold before exiting */ + for (; stack; stack = stack->parent) + { + if(nolazy != stack) + ReleaseBuffer(stack->buffer); + if(stack == givenstack) + break; + } } /* @@ -1256,6 +1360,7 @@ gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack, GISTPageSplitInfo *left; IndexTuple tuples[2]; + /* A split always contains at least two halves */ Assert(list_length(splitinfo) >= 2); diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index 5ba7d0a..c51c842 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -197,7 +197,7 @@ gistindex_keytest(IndexScanDesc scan, gistdentryinit(giststate, key->sk_attno - 1, &de, datum, r, page, offset, - FALSE, isNull); + FALSE, isNull, GistTupleIsLazy(tuple)); /* * Call the Consistent function to evaluate the test. The @@ -258,7 +258,7 @@ gistindex_keytest(IndexScanDesc scan, gistdentryinit(giststate, key->sk_attno - 1, &de, datum, r, page, offset, - FALSE, isNull); + FALSE, isNull, GistTupleIsLazy(tuple)); /* * Call the Distance function to evaluate the distance. The @@ -422,7 +422,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances, if (!match) continue; - if (tbm && GistPageIsLeaf(page)) + if (tbm && (GistPageIsLeaf(page) || GistTupleIsLazy(it))) { /* * getbitmap scan, so just push heap tuple TIDs into the bitmap @@ -431,7 +431,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances, tbm_add_tuples(tbm, &it->t_tid, 1, recheck); (*ntids)++; } - else if (scan->numberOfOrderBys == 0 && GistPageIsLeaf(page)) + else if (scan->numberOfOrderBys == 0 && (GistPageIsLeaf(page) || GistTupleIsLazy(it))) { /* * Non-ordered scan, so report tuples in so->pageData[] @@ -466,7 +466,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances, /* Create new GISTSearchItem for this item */ item = palloc(SizeOfGISTSearchItem(scan->numberOfOrderBys)); - if (GistPageIsLeaf(page)) + if ((GistPageIsLeaf(page) || GistTupleIsLazy(it))) { /* Creating heap-tuple GISTSearchItem */ item->blkno = InvalidBlockNumber; diff --git a/src/backend/access/gist/gistsplit.c b/src/backend/access/gist/gistsplit.c index d394969..c9680db 100644 --- a/src/backend/access/gist/gistsplit.c +++ b/src/backend/access/gist/gistsplit.c @@ -643,7 +643,7 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, &IsNull); gistdentryinit(giststate, attno, &(entryvec->vector[i]), datum, r, page, i, - FALSE, IsNull); + FALSE, IsNull, GistTupleIsLazy(itup[i-1])); if (IsNull) offNullTuples[nOffNullTuples++] = i; } diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 887c58b..897e7f0 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -105,6 +105,45 @@ gistextractpage(Page page, int *len /* out */ ) return itvec; } + +IndexTuple * +gistextractlazy(Page page, int *len /* out */ ) +{ + OffsetNumber i, + maxoff; + IndexTuple *itvec; + OffsetNumber* itemNos; + //elog(WARNING,"extracting for push"); + + maxoff = PageGetMaxOffsetNumber(page); + *len = 0; + itvec = palloc(sizeof(IndexTuple) * maxoff); + itemNos = palloc(sizeof(OffsetNumber) * maxoff); + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + ItemId tid = PageGetItemId(page, i); + IndexTuple it = (IndexTuple) PageGetItem(page, tid); + if(GistTupleIsLazy(it)) + { + //elog(WARNING,"extract item %d",i); + IndexTuple copy = palloc(tid->lp_len); + //elog(WARNING,"memmove",i); + memmove(copy,it,tid->lp_len); + //elog(WARNING,"offset %d to index %d",i,*len); + itemNos[*len] = i; + //elog(WARNING,"ptr",i); + itvec[*len] = copy; + *len = *len+1; + //elog(WARNING,"done",i); + } + } + + //elog(WARNING,"deleting %d",*len); + PageIndexMultiDelete(page,itemNos,*len); + + return itvec; +} + /* * join two vectors into one */ @@ -178,7 +217,7 @@ gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, evec->vector + evec->n, datum, NULL, NULL, (OffsetNumber) 0, - FALSE, IsNull); + FALSE, IsNull, GistTupleIsLazy(itvec[j])); evec->n++; } @@ -302,7 +341,7 @@ gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p, datum = index_getattr(tuple, i + 1, giststate->tupdesc, &isnull[i]); gistdentryinit(giststate, i, &attdata[i], datum, r, p, o, - FALSE, isnull[i]); + FALSE, isnull[i], GistTupleIsLazy(tuple)); } } @@ -438,6 +477,9 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ bool zero_penalty; int j; + if(GistTupleIsLazy(itup)) + continue; + zero_penalty = true; /* Loop over index attributes. */ @@ -450,7 +492,7 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ /* Compute penalty for this column. */ datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull); gistdentryinit(giststate, j, &entry, datum, r, p, i, - FALSE, IsNull); + FALSE, IsNull, GistTupleIsLazy(itup)); usize = gistpenalty(giststate, j, &entry, IsNull, &identry[j], isnull[j]); if (usize > 0) @@ -542,7 +584,7 @@ gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e, Datum k, Relation r, Page pg, OffsetNumber o, - bool l, bool isNull) + bool l, bool isNull, bool lazy) { if (!isNull) { @@ -557,9 +599,20 @@ gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e, if (dep != e) gistentryinit(*e, dep->key, dep->rel, dep->page, dep->offset, dep->leafkey); + if(lazy) + { + dep->leafpage = false; + } } else + { gistentryinit(*e, (Datum) 0, r, pg, o, l); + + if(lazy) + { + e->leafpage = false; + } + } } IndexTuple diff --git a/src/include/access/gist.h b/src/include/access/gist.h index 4343d6f..ffa9048 100644 --- a/src/include/access/gist.h +++ b/src/include/access/gist.h @@ -59,6 +59,7 @@ typedef struct GISTPageOpaqueData { PageGistNSN nsn; /* this value must change on page split */ BlockNumber rightlink; /* next page if any */ + uint16 nlazy; uint16 flags; /* see bit definitions above */ uint16 gist_page_id; /* for identification of GiST indexes */ } GISTPageOpaqueData; @@ -125,12 +126,13 @@ typedef struct GISTENTRY Page page; OffsetNumber offset; bool leafkey; + bool leafpage; } GISTENTRY; #define GistPageGetOpaque(page) ( (GISTPageOpaque) PageGetSpecialPointer(page) ) #define GistPageIsLeaf(page) ( GistPageGetOpaque(page)->flags & F_LEAF) -#define GIST_LEAF(entry) (GistPageIsLeaf((entry)->page)) +#define GIST_LEAF(entry) (((entry)->leafpage)) #define GistPageIsDeleted(page) ( GistPageGetOpaque(page)->flags & F_DELETED) #define GistPageSetDeleted(page) ( GistPageGetOpaque(page)->flags |= F_DELETED) @@ -168,6 +170,6 @@ typedef struct */ #define gistentryinit(e, k, r, pg, o, l) \ do { (e).key = (k); (e).rel = (r); (e).page = (pg); \ - (e).offset = (o); (e).leafkey = (l); } while (0) + (e).offset = (o); (e).leafkey = (l); (e).leafpage = (pg) && GistPageIsLeaf(pg);} while (0) #endif /* GIST_H */ diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index 78e87a6..669d697 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -320,6 +320,7 @@ typedef struct #define GistTupleIsInvalid(itup) ( ItemPointerGetOffsetNumber( &((itup)->t_tid) ) == TUPLE_IS_INVALID ) #define GistTupleSetValid(itup) ItemPointerSetOffsetNumber( &((itup)->t_tid), TUPLE_IS_VALID ) +#define GistPageCanStoreLazy(page,itup) ((int) ((PageHeader) page)->pd_upper - (int) ((PageHeader) page)->pd_lower - IndexTupleSize(itup) > 1024) @@ -435,6 +436,13 @@ extern void gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *GISTstate); +extern void gistdoinsertloop(Relation r, + IndexTuple itup, + Size freespace, + GISTSTATE *GISTstate, + GISTInsertStack *stack, + GISTInsertState state, + GISTInsertStack *nolazy); /* A List of these is returned from gistplacetopage() in *splitinfo */ typedef struct @@ -498,6 +506,7 @@ extern Buffer gistNewBuffer(Relation r); extern void gistfillbuffer(Page page, IndexTuple *itup, int len, OffsetNumber off); extern IndexTuple *gistextractpage(Page page, int *len /* out */ ); +extern IndexTuple *gistextractlazy(Page page, int *len /* out */ ); extern IndexTuple *gistjoinvector( IndexTuple *itvec, int *len, IndexTuple *additvec, int addlen); @@ -519,7 +528,7 @@ extern OffsetNumber gistchoose(Relation r, Page p, extern void GISTInitBuffer(Buffer b, uint32 f); extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e, Datum k, Relation r, Page pg, OffsetNumber o, - bool l, bool isNull); + bool l, bool isNull, bool lazy); extern float gistpenalty(GISTSTATE *giststate, int attno, GISTENTRY *key1, bool isNull1, diff --git a/src/include/access/itup.h b/src/include/access/itup.h index 8350fa0..d96dd94 100644 --- a/src/include/access/itup.h +++ b/src/include/access/itup.h @@ -72,6 +72,11 @@ typedef IndexAttributeBitMapData *IndexAttributeBitMap; #define IndexTupleHasNulls(itup) ((((IndexTuple) (itup))->t_info & INDEX_NULL_MASK)) #define IndexTupleHasVarwidths(itup) ((((IndexTuple) (itup))->t_info & INDEX_VAR_MASK)) +#define TUPLE_IS_LAZY 0x2000 +#define GistTupleIsLazy(itup) (((IndexTuple)itup)->t_info & TUPLE_IS_LAZY) +#define GistTupleSetLazy(itup) do { ((IndexTuple)itup)->t_info = ((IndexTuple)itup)->t_info | TUPLE_IS_LAZY; } while(0) +#define GistTupleUnsetLazy(itup) do { ((IndexTuple)itup)->t_info = ((IndexTuple)itup)->t_info & ~TUPLE_IS_LAZY; } while(0) + /* * Takes an infomask as argument (primarily because this needs to be usable diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..3767958 --- /dev/null +++ b/test.sh @@ -0,0 +1,2 @@ +#!/bin/sh +/home/x4m/project/bin/psql postgres