diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index 034dfe5..371ef21 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -618,6 +618,428 @@ heap_copytuple_with_tuple(HeapTuple src, HeapTuple dest) } /* + * get_tuple_info - Gets the tuple offset and value. + * + * calculates the attribute value and offset, where the attribute ends in the + * tuple based on the attribute number and previous fetched attribute info. + * + * offset (I/P and O/P variable) - Input as end of previous attribute offset + * and incase if it is a first attribute then it's value is zero. + * Output as end of the current attribute in the tuple. + * usecacheoff (I/P and O/P variable) - Attribute cacheoff can be used or not. + */ +static void +get_tuple_info(Form_pg_attribute *att, HeapTuple tuple, bits8 *bp, + bool hasnulls, int attnum, Datum *value, uint16 *offset, + bool *usecacheoff) +{ + Form_pg_attribute thisatt = att[attnum]; + uint16 off = *offset; + bool slow = *usecacheoff; + char *tp; + HeapTupleHeader tup = tuple->t_data; + + tp = (char *) tup + tup->t_hoff; + + if (hasnulls && att_isnull(attnum, bp)) + { + slow = true; /* can't use attcacheoff anymore */ + *offset = off; + *usecacheoff = slow; + return; + } + + if (!slow && thisatt->attcacheoff >= 0) + off = thisatt->attcacheoff; + else if (thisatt->attlen == -1) + { + /* + * We can only cache the offset for a varlena attribute if the offset + * is already suitably aligned, so that there would be no pad bytes in + * any case: then the offset will be valid for either an aligned or + * unaligned value. + */ + if (!slow && + off == att_align_nominal(off, thisatt->attalign)) + thisatt->attcacheoff = off; + else + { + off = att_align_pointer(off, thisatt->attalign, -1, + tp + off); + slow = true; + } + } + else + { + /* not varlena, so safe to use att_align_nominal */ + off = att_align_nominal(off, thisatt->attalign); + + if (!slow) + thisatt->attcacheoff = off; + } + + *value = fetchatt(thisatt, tp + off); + + off = att_addlength_pointer(off, thisatt->attlen, tp + off); + + if (thisatt->attlen <= 0) + slow = true; /* can't use attcacheoff anymore */ + + *offset = off; + *usecacheoff = slow; +} + + +/* + * encode_xlog_update + * Forms a diff tuple from old and new tuple with the modified columns. + * + * att - attribute list. + * oldtup - pointer to the old tuple. + * heaptup - pointer to the modified tuple. + * wal_tup - pointer to the wal record which needs to be formed from old + and new tuples by using the modified columns list. + * modifiedCols - modified columns list by the update command. + */ +void +encode_xlog_update(Form_pg_attribute *att, HeapTuple oldtup, + HeapTuple heaptup, HeapTuple wal_tup, + Bitmapset *modifiedCols) +{ + int numberOfAttributes; + uint16 cur_offset = 0, + prev_offset = 0, + offset = 0; + int attnum; + HeapTupleHeader newtuphdr = heaptup->t_data; + bits8 *new_bp = newtuphdr->t_bits, + *old_bp = oldtup->t_data->t_bits; + bool old_hasnulls = HeapTupleHasNulls(oldtup); + bool new_hasnulls = HeapTupleHasNulls(heaptup); + bool cur_usecacheoff = false, + prev_usecacheoff = false; + Datum cur_value, + prev_value; + uint16 data_length; + bool check_for_padding = false; + char *data; + uint16 wal_offset = 0; + + numberOfAttributes = HeapTupleHeaderGetNatts(newtuphdr); + + data = (char *) wal_tup->t_data; + wal_offset = newtuphdr->t_hoff; + + /* Copy the tuple header to the WAL tuple */ + memcpy(data, heaptup->t_data, wal_offset); + + for (attnum = 0; attnum < numberOfAttributes; attnum++) + { + /* + * If the attribute is modified by the update operation, store the + * appropiate offsets in the WAL record, otherwise skip to the next + * attribute. + */ + if (bms_is_member((attnum + 1) - FirstLowInvalidHeapAttributeNumber, + modifiedCols)) + { + check_for_padding = true; + + /* + * calculate the offset where the modified attribute starts in the + * old tuple used to store in the WAL record, this will be used to + * traverse the old tuple during recovery. + */ + if (prev_offset) + { + *(uint8 *) (data + wal_offset) = HEAP_UPDATE_WAL_OPT_COPY; + wal_offset += sizeof(uint8); + + wal_offset = SHORTALIGN(wal_offset); + + *(uint16 *) (data + wal_offset) = prev_offset; + wal_offset += sizeof(uint16); + } + + /* calculate the old tuple field length which needs to ignored */ + offset = prev_offset; + get_tuple_info(att, oldtup, old_bp, old_hasnulls, attnum, + &prev_value, &prev_offset, &prev_usecacheoff); + + data_length = prev_offset - offset; + + if (data_length) + { + *(uint8 *) (data + wal_offset) = HEAP_UPDATE_WAL_OPT_IGN; + wal_offset += sizeof(uint8); + + wal_offset = SHORTALIGN(wal_offset); + + *(uint16 *) (data + wal_offset) = data_length; + wal_offset += sizeof(uint16); + } + + /* + * calculate the new tuple field start position to check whether + * any padding is required or not. + */ + offset = cur_offset; + cur_offset = att_align_pointer(cur_offset, + att[attnum]->attalign, att[attnum]->attlen, + (char *) newtuphdr + newtuphdr->t_hoff + cur_offset); + + data_length = cur_offset - offset; + + /* + * The above calculation is required to identify, that any + * alignment is required or not. And the padding command is added + * only incase of that the data is not NULL. which is done at + * below. + */ + + offset = cur_offset; + get_tuple_info(att, heaptup, new_bp, new_hasnulls, attnum, + &cur_value, &cur_offset, &cur_usecacheoff); + + /* if the new tuple data is null then nothing is required to add */ + if (new_hasnulls && att_isnull(attnum, new_bp)) + { + continue; + } + + /* Add the padding if requires as the data is not NULL */ + if (data_length) + { + *(uint8 *) (data + wal_offset) = HEAP_UPDATE_WAL_OPT_PAD; + wal_offset += sizeof(uint8); + + *(uint8 *) (data + wal_offset) = data_length; + wal_offset += sizeof(uint8); + } + + /* get the attribute value and end offset for same */ + *(uint8 *) (data + wal_offset) = HEAP_UPDATE_WAL_OPT_ADD; + wal_offset += sizeof(uint8); + + wal_offset = SHORTALIGN(wal_offset); + + data_length = cur_offset - offset; + *(uint16 *) (data + wal_offset) = data_length; + wal_offset += sizeof(uint16); + + if (att[attnum]->attbyval) + { + /* pass-by-value */ + char tempdata[sizeof(Datum)]; + + /* + * Here we are not storing the data as aligned in the WAL + * record as we don't have the tuple descriptor while + * replaying the xlog. + * + * But this alignment is of the data is taken care while + * framing the tuple during heap_xlog_update. + */ + store_att_byval(tempdata, + cur_value, + att[attnum]->attlen); + memcpy((data + wal_offset), tempdata, att[attnum]->attlen); + } + else + { + memcpy((data + wal_offset), + DatumGetPointer(cur_value), + data_length); + } + + wal_offset += data_length; + } + else + { + /* + * padding is required if the previous field is modified, so check + * whether padding is required or not. + * + * The attnum is not modified so if the data in the old tuple is + * NULL then in the new tuple also the field data is NULL. + */ + if (check_for_padding && !att_isnull(attnum, old_bp)) + { + check_for_padding = false; + + /* + * calculate the old tuple field start position, required to + * ignore if any alignmet is present. + */ + offset = prev_offset; + prev_offset = att_align_pointer(prev_offset, + att[attnum]->attalign, att[attnum]->attlen, + (char *) oldtup->t_data + oldtup->t_data->t_hoff + prev_offset); + + data_length = prev_offset - offset; + + if (data_length) + { + *(uint8 *) (data + wal_offset) = HEAP_UPDATE_WAL_OPT_IGN; + wal_offset += sizeof(uint8); + + wal_offset = SHORTALIGN(wal_offset); + + *(uint16 *) (data + wal_offset) = data_length; + wal_offset += sizeof(uint16); + } + + /* + * calculate the new tuple field start position to check + * whether any padding is required or not because field + * alignment. + */ + offset = cur_offset; + cur_offset = att_align_pointer(cur_offset, + att[attnum]->attalign, att[attnum]->attlen, + (char *) newtuphdr + newtuphdr->t_hoff + cur_offset); + + data_length = cur_offset - offset; + + if (data_length) + { + *(uint8 *) (data + wal_offset) = HEAP_UPDATE_WAL_OPT_PAD; + wal_offset += sizeof(uint8); + + *(uint8 *) (data + wal_offset) = data_length; + wal_offset += sizeof(uint8); + } + } + + get_tuple_info(att, oldtup, old_bp, old_hasnulls, attnum, + &prev_value, &prev_offset, &prev_usecacheoff); + + get_tuple_info(att, heaptup, new_bp, new_hasnulls, attnum, + &cur_value, &cur_offset, &cur_usecacheoff); + } + } + + wal_tup->t_len = wal_offset; + wal_tup->t_self = heaptup->t_self; + wal_tup->t_tableOid = heaptup->t_tableOid; +} + +/* + * decode_xlog_update + * deforms a diff tuple and forms the new tuple with the help of old tuple. + * + * The WAL record data is in the format as below + * + * COPY + offset until copy required + * IGN + length needs to be ignored from the old tuple. + * PAD + length needs to padded with zero in new tuple. + * ADD + length of data + data which is modified. + * + * For the COPY command, copy the specified length from old tuple. + * + * Once the old tuple data copied, then increase the offset by the + * copied length. + * + * For the IGN command, ignore the specified length in the old tuple. + * + * For the PAD command, fill with zeros of the specified length in + * the new tuple. + * + * For the ADD command, copy the corresponding length of data from WAL + * record to the new tuple. + * + * Repeat this procedure until the WAL record reaches the end. + * + * If any remaining left out old tuple data will be copied at last. + * + * htup - old tuple data pointer from which new tuple needs to be formed. + * old_tup_len - old tuple length. + * data - pointer to the new tuple which needs to be framed. + * new_tup_len - output of new tuple data length. + * waldata - wal record pointer from which the new tuple needs to formed. + * wal_len - wal record length. + */ +void +decode_xlog_update(HeapTupleHeader htup, uint32 old_tup_len, char *data, + uint32 *new_tup_len, char *waldata, uint32 wal_len) +{ + uint8 command; + uint16 len = 0, + data_length, + prev_offset = 0, + cur_offset = 0; + char *olddata = (char *) htup + htup->t_hoff; + + /* + * Frame the new tuple from old tuple and WAL record + */ + len = 0; + + /* Frame the new tuple from the old and WAL tuples */ + while (len < wal_len) + { + command = *(uint8 *) (waldata + len); + len += sizeof(uint8); + + switch (command) + { + case HEAP_UPDATE_WAL_OPT_COPY: + len = SHORTALIGN(len); + data_length = *(uint16 *) (waldata + len) - prev_offset; + + /* Copy the old tuple data */ + memcpy((data + cur_offset), + (olddata + prev_offset), + data_length); + cur_offset += data_length; + prev_offset += data_length; + + len += sizeof(uint16); + break; + case HEAP_UPDATE_WAL_OPT_ADD: + len = SHORTALIGN(len); + data_length = *(uint16 *) (waldata + len); + len += sizeof(uint16); + + /* Copy the modified attribute data from WAL record */ + memcpy((data + cur_offset), (waldata + len), data_length); + cur_offset += data_length; + len += data_length; + break; + case HEAP_UPDATE_WAL_OPT_IGN: + len = SHORTALIGN(len); + data_length = *(uint16 *) (waldata + len); + + /* Skip the oldtuple with data length in the WAL record */ + prev_offset += data_length; + len += sizeof(uint16); + break; + case HEAP_UPDATE_WAL_OPT_PAD: + data_length = *(uint8 *) (waldata + len); + cur_offset += data_length; + len += sizeof(uint8); + break; + default: + Assert(0); + break; + } + } + + /* Copy the remaining old tuple data to the new tuple */ + if (prev_offset < old_tup_len) + { + memcpy((data + cur_offset), + (olddata + prev_offset), + (old_tup_len - prev_offset)); + cur_offset += (old_tup_len - prev_offset); + } + + *new_tup_len = cur_offset + + (htup->t_hoff - offsetof(HeapTupleHeaderData, t_bits)); +} + + +/* * heap_form_tuple * construct a tuple from the given values[] and isnull[] arrays, * which are of the length indicated by tupleDescriptor->natts diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index f28026b..bcf830e 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -70,7 +70,6 @@ #include "utils/syscache.h" #include "utils/tqual.h" - /* GUC variable */ bool synchronize_seqscans = true; @@ -84,7 +83,8 @@ static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options); static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, Buffer newbuf, HeapTuple newtup, - bool all_visible_cleared, bool new_all_visible_cleared); + bool all_visible_cleared, bool new_all_visible_cleared, + bool diff_update); static bool HeapSatisfiesHOTUpdate(Relation relation, Bitmapset *hot_attrs, HeapTuple oldtup, HeapTuple newtup); @@ -2686,6 +2686,7 @@ simple_heap_delete(Relation relation, ItemPointer tid) * cid - update command ID (used for visibility test, and stored into * cmax/cmin if successful) * crosscheck - if not InvalidSnapshot, also check old tuple against this + * modifiedCols - the modified column list of the update command. * wait - true if should wait for any conflicting update to commit/abort * * Normal, successful return value is HeapTupleMayBeUpdated, which @@ -2707,7 +2708,8 @@ simple_heap_delete(Relation relation, ItemPointer tid) HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, ItemPointer ctid, TransactionId *update_xmax, - CommandId cid, Snapshot crosscheck, bool wait) + CommandId cid, Snapshot crosscheck, Bitmapset *modifiedCols, + bool wait) { HTSU_Result result; TransactionId xid = GetCurrentTransactionId(); @@ -2715,6 +2717,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, ItemId lp; HeapTupleData oldtup; HeapTuple heaptup; + HeapTupleData wal_tup; Page page; BlockNumber block; Buffer buffer, @@ -2730,6 +2733,11 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, bool use_hot_update = false; bool all_visible_cleared = false; bool all_visible_cleared_new = false; + struct + { + HeapTupleHeaderData hdr; + char data[MaxHeapTupleSize]; + } tbuf; Assert(ItemPointerIsValid(otid)); @@ -3173,10 +3181,31 @@ l2: /* XLOG stuff */ if (RelationNeedsWAL(relation)) { - XLogRecPtr recptr = log_heap_update(relation, buffer, oldtup.t_self, - newbuf, heaptup, - all_visible_cleared, - all_visible_cleared_new); + XLogRecPtr recptr; + + /* + * Apply the xlog diff update algorithm only for hot updates. + */ + if (modifiedCols && use_hot_update) + { + wal_tup.t_data = (HeapTupleHeader) &tbuf; + encode_xlog_update(relation->rd_att->attrs, &oldtup, heaptup, + &wal_tup, modifiedCols); + + recptr = log_heap_update(relation, buffer, oldtup.t_self, + newbuf, &wal_tup, + all_visible_cleared, + all_visible_cleared_new, + true); + } + else + { + recptr = log_heap_update(relation, buffer, oldtup.t_self, + newbuf, heaptup, + all_visible_cleared, + all_visible_cleared_new, + false); + } if (newbuf != buffer) { @@ -3363,6 +3392,7 @@ simple_heap_update(Relation relation, ItemPointer otid, HeapTuple tup) result = heap_update(relation, otid, tup, &update_ctid, &update_xmax, GetCurrentCommandId(true), InvalidSnapshot, + NULL, true /* wait for commit */ ); switch (result) { @@ -4407,7 +4437,8 @@ log_heap_visible(RelFileNode rnode, BlockNumber block, Buffer vm_buffer, static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, Buffer newbuf, HeapTuple newtup, - bool all_visible_cleared, bool new_all_visible_cleared) + bool all_visible_cleared, bool new_all_visible_cleared, + bool diff_update) { xl_heap_update xlrec; xl_heap_header xlhdr; @@ -4426,9 +4457,15 @@ log_heap_update(Relation reln, Buffer oldbuf, ItemPointerData from, xlrec.target.node = reln->rd_node; xlrec.target.tid = from; - xlrec.all_visible_cleared = all_visible_cleared; + xlrec.diff_update = diff_update; xlrec.newtid = newtup->t_self; - xlrec.new_all_visible_cleared = new_all_visible_cleared; + + /* + * MSB 4 bits tells PD_ALL_VISIBLE was cleared of new page and rest 4 bits + * for the old page + */ + xlrec.new_all_visible_cleared |= all_visible_cleared; + xlrec.new_all_visible_cleared |= new_all_visible_cleared << 4; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfHeapUpdate; @@ -5217,14 +5254,18 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update) } tbuf; xl_heap_header xlhdr; int hsize; - uint32 newlen; + uint32 new_tup_len = 0; Size freespace; + /* Initialize the buffer, used to frame the new tuple */ + MemSet((char *) &tbuf.hdr, 0, sizeof(HeapTupleHeaderData)); + hsize = SizeOfHeapUpdate + SizeOfHeapHeader; + /* * The visibility map may need to be fixed even if the heap page is * already up-to-date. */ - if (xlrec->all_visible_cleared) + if (xlrec->new_all_visible_cleared & 0x0F) { Relation reln = CreateFakeRelcacheEntry(xlrec->target.node); BlockNumber block = ItemPointerGetBlockNumber(&xlrec->target.tid); @@ -5244,12 +5285,14 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update) } /* Deal with old tuple version */ - buffer = XLogReadBuffer(xlrec->target.node, ItemPointerGetBlockNumber(&(xlrec->target.tid)), false); if (!BufferIsValid(buffer)) + { goto newt; + } + page = (Page) BufferGetPage(buffer); if (XLByteLE(lsn, PageGetLSN(page))) /* changes are applied */ @@ -5269,6 +5312,29 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update) htup = (HeapTupleHeader) PageGetItem(page, lp); + if (xlrec->diff_update) + { + char *data = (char *) &tbuf.hdr + htup->t_hoff; + uint32 old_tup_len; + uint32 wal_len; + char *waldata = (char *) xlrec + hsize + htup->t_hoff + - offsetof(HeapTupleHeaderData, t_bits); + + wal_len = record->xl_len - hsize; + Assert(wal_len <= MaxHeapTupleSize); + + wal_len -= (htup->t_hoff - offsetof(HeapTupleHeaderData, t_bits)); + + old_tup_len = ItemIdGetLength(lp) - htup->t_hoff; + + /* copy exactly the tuple header present in the WAL to new tuple */ + memcpy((char *) &tbuf.hdr + offsetof(HeapTupleHeaderData, t_bits), + (char *) xlrec + hsize, + (htup->t_hoff - offsetof(HeapTupleHeaderData, t_bits))); + + decode_xlog_update(htup, old_tup_len, data, &new_tup_len, waldata, wal_len); + } + htup->t_infomask &= ~(HEAP_XMAX_COMMITTED | HEAP_XMAX_INVALID | HEAP_XMAX_IS_MULTI | @@ -5286,7 +5352,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update) /* Mark the page as a candidate for pruning */ PageSetPrunable(page, record->xl_xid); - if (xlrec->all_visible_cleared) + if (xlrec->new_all_visible_cleared & 0x0F) PageClearAllVisible(page); /* @@ -5295,6 +5361,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update) */ if (samepage) goto newsame; + PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); @@ -5308,7 +5375,7 @@ newt:; * The visibility map may need to be fixed even if the heap page is * already up-to-date. */ - if (xlrec->new_all_visible_cleared) + if ((xlrec->new_all_visible_cleared >> 4) & 0x0F) { Relation reln = CreateFakeRelcacheEntry(xlrec->target.node); BlockNumber block = ItemPointerGetBlockNumber(&xlrec->newtid); @@ -5355,20 +5422,24 @@ newsame:; if (PageGetMaxOffsetNumber(page) + 1 < offnum) elog(PANIC, "heap_update_redo: invalid max offset number"); - hsize = SizeOfHeapUpdate + SizeOfHeapHeader; - - newlen = record->xl_len - hsize; - Assert(newlen <= MaxHeapTupleSize); memcpy((char *) &xlhdr, (char *) xlrec + SizeOfHeapUpdate, SizeOfHeapHeader); + htup = &tbuf.hdr; - MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData)); - /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */ - memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits), - (char *) xlrec + hsize, - newlen); - newlen += offsetof(HeapTupleHeaderData, t_bits); + + if (!xlrec->diff_update) + { + new_tup_len = record->xl_len - hsize; + Assert(new_tup_len <= MaxHeapTupleSize); + + /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */ + memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits), + (char *) xlrec + hsize, + new_tup_len); + } + + new_tup_len += offsetof(HeapTupleHeaderData, t_bits); htup->t_infomask2 = xlhdr.t_infomask2; htup->t_infomask = xlhdr.t_infomask; htup->t_hoff = xlhdr.t_hoff; @@ -5378,7 +5449,7 @@ newsame:; /* Make sure there is no forward chain link in t_ctid */ htup->t_ctid = xlrec->newtid; - offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true); + offnum = PageAddItem(page, (Item) htup, new_tup_len, offnum, true, true); if (offnum == InvalidOffsetNumber) elog(PANIC, "heap_update_redo: failed to add tuple"); diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index a7bce75..756fcf7 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -48,6 +48,7 @@ #include "utils/memutils.h" #include "utils/rel.h" #include "utils/tqual.h" +#include "parser/parsetree.h" /* @@ -478,12 +479,14 @@ ExecUpdate(ItemPointer tupleid, bool canSetTag) { HeapTuple tuple; + HeapTuple tuple_bf_trigger; ResultRelInfo *resultRelInfo; Relation resultRelationDesc; HTSU_Result result; ItemPointerData update_ctid; TransactionId update_xmax; List *recheckIndexes = NIL; + Bitmapset *modifiedCols = NULL; /* * abort the operation if not running transactions @@ -495,7 +498,7 @@ ExecUpdate(ItemPointer tupleid, * get the heap tuple out of the tuple table slot, making sure we have a * writable copy */ - tuple = ExecMaterializeSlot(slot); + tuple = tuple_bf_trigger = ExecMaterializeSlot(slot); /* * get information on the (current) result relation @@ -553,6 +556,15 @@ lreplace:; if (resultRelationDesc->rd_att->constr) ExecConstraints(resultRelInfo, slot, estate); + /* check whether the xlog diff update can be applied or not? */ + if ((resultRelationDesc->rd_toastoid == InvalidOid) + && (tuple_bf_trigger == tuple) + && (tuple->t_len > MinHeapTupleSizeForDiffUpdate)) + { + modifiedCols = (rt_fetch(resultRelInfo->ri_RangeTableIndex, + estate->es_range_table)->modifiedCols); + } + /* * replace the heap tuple * @@ -566,6 +578,7 @@ lreplace:; &update_ctid, &update_xmax, estate->es_output_cid, estate->es_crosscheck_snapshot, + modifiedCols, true /* wait for commit */ ); switch (result) { @@ -596,6 +609,14 @@ lreplace:; *tupleid = update_ctid; slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot); tuple = ExecMaterializeSlot(slot); + + /* + * Incase of revalidation reinitialize the values + * which are used for the xlog diff update algorithm. + */ + tuple_bf_trigger = tuple; + modifiedCols = NULL; + goto lreplace; } } diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 660a854..5e91ba8 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -105,7 +105,8 @@ extern HTSU_Result heap_delete(Relation relation, ItemPointer tid, extern HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, ItemPointer ctid, TransactionId *update_xmax, - CommandId cid, Snapshot crosscheck, bool wait); + CommandId cid, Snapshot crosscheck, Bitmapset *modifiedCols, + bool wait); extern HTSU_Result heap_lock_tuple(Relation relation, HeapTuple tuple, Buffer *buffer, ItemPointer ctid, TransactionId *update_xmax, CommandId cid, diff --git a/src/include/access/htup.h b/src/include/access/htup.h index b289e14..9ab9713 100644 --- a/src/include/access/htup.h +++ b/src/include/access/htup.h @@ -16,6 +16,7 @@ #include "access/tupdesc.h" #include "access/tupmacs.h" +#include "nodes/bitmapset.h" #include "storage/bufpage.h" #include "storage/itemptr.h" #include "storage/relfilenode.h" @@ -575,6 +576,11 @@ typedef HeapTupleData *HeapTuple; #define HeapTupleSetOid(tuple, oid) \ HeapTupleHeaderSetOid((tuple)->t_data, (oid)) +/* + * Minimum tuple length required by the tuple during update operation for doing + * WAL optimization of update operation. + */ +#define MinHeapTupleSizeForDiffUpdate 128 /* * WAL record definitions for heapam.c's WAL operations @@ -692,18 +698,29 @@ typedef struct xl_multi_insert_tuple #define SizeOfMultiInsertTuple (offsetof(xl_multi_insert_tuple, t_hoff) + sizeof(uint8)) -/* This is what we need to know about update|hot_update */ +/* This is what we need to know about update|hot_update|optimized_update */ typedef struct xl_heap_update { xl_heaptid target; /* deleted tuple id */ ItemPointerData newtid; /* new inserted tuple id */ - bool all_visible_cleared; /* PD_ALL_VISIBLE was cleared */ - bool new_all_visible_cleared; /* same for the page of newtid */ + bool diff_update; /* optimized update or not */ + /* + * To keep the structure size same all_visible_cleared is merged with + * new_all_visible_cleared. + */ + bool new_all_visible_cleared; /* MSB 4 bits tells PD_ALL_VISIBLE was + cleared of new page and rest 4 bits + for the old page */ /* NEW TUPLE xl_heap_header AND TUPLE DATA FOLLOWS AT END OF STRUCT */ } xl_heap_update; #define SizeOfHeapUpdate (offsetof(xl_heap_update, new_all_visible_cleared) + sizeof(bool)) +#define HEAP_UPDATE_WAL_OPT_COPY 0 +#define HEAP_UPDATE_WAL_OPT_ADD 1 +#define HEAP_UPDATE_WAL_OPT_IGN 2 +#define HEAP_UPDATE_WAL_OPT_PAD 3 + /* * This is what we need to know about vacuum page cleanup/redirect * @@ -900,6 +917,11 @@ extern Datum heap_getsysattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull); extern HeapTuple heap_copytuple(HeapTuple tuple); extern void heap_copytuple_with_tuple(HeapTuple src, HeapTuple dest); +extern void encode_xlog_update(Form_pg_attribute *att, HeapTuple oldtup, + HeapTuple heaptup, HeapTuple wal_tup, + Bitmapset *modifiedCols); +extern void decode_xlog_update(HeapTupleHeader htup, uint32 old_tup_len, + char *data, uint32* new_tup_len, char *waldata, uint32 wal_len); extern HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull); extern HeapTuple heap_modify_tuple(HeapTuple tuple,