*** src/backend/access/heap/heapam.c.orig Wed Jun 8 11:50:21 2005 --- src/backend/access/heap/heapam.c Mon Jun 20 13:50:16 2005 *************** *** 1034,1042 **** * * The new tuple is stamped with current transaction ID and the specified * command ID. */ Oid ! heap_insert(Relation relation, HeapTuple tup, CommandId cid) { TransactionId xid = GetCurrentTransactionId(); Buffer buffer; --- 1034,1053 ---- * * The new tuple is stamped with current transaction ID and the specified * command ID. + * + * If use_wal is false, the new tuple is not logged in WAL, even for a + * non-temp relation. Safe usage of this behavior requires that we arrange + * that all new tuples go into new pages not containing any tuples from other + * transactions, that the relation gets fsync'd before commit, and that the + * transaction emits at least one WAL record to ensure RecordTransactionCommit + * will decide to WAL-log the commit. + * + * use_fsm is passed directly to RelationGetBufferForTuple, which see for + * more info. */ Oid ! heap_insert(Relation relation, HeapTuple tup, CommandId cid, ! bool use_wal, bool use_fsm) { TransactionId xid = GetCurrentTransactionId(); Buffer buffer; *************** *** 1086,1092 **** heap_tuple_toast_attrs(relation, tup, NULL); /* Find buffer to insert this tuple into */ ! buffer = RelationGetBufferForTuple(relation, tup->t_len, InvalidBuffer); /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); --- 1097,1104 ---- heap_tuple_toast_attrs(relation, tup, NULL); /* Find buffer to insert this tuple into */ ! buffer = RelationGetBufferForTuple(relation, tup->t_len, ! InvalidBuffer, use_fsm); /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); *************** *** 1096,1102 **** pgstat_count_heap_insert(&relation->pgstat_info); /* XLOG stuff */ ! if (!relation->rd_istemp) { xl_heap_insert xlrec; xl_heap_header xlhdr; --- 1108,1119 ---- pgstat_count_heap_insert(&relation->pgstat_info); /* XLOG stuff */ ! if (relation->rd_istemp) ! { ! /* No XLOG record, but still need to flag that XID exists on disk */ ! MyXactMadeTempRelUpdate = true; ! } ! else if (use_wal) { xl_heap_insert xlrec; xl_heap_header xlhdr; *************** *** 1151,1161 **** PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } - else - { - /* No XLOG record, but still need to flag that XID exists on disk */ - MyXactMadeTempRelUpdate = true; - } END_CRIT_SECTION(); --- 1168,1173 ---- *************** *** 1183,1189 **** Oid simple_heap_insert(Relation relation, HeapTuple tup) { ! return heap_insert(relation, tup, GetCurrentCommandId()); } /* --- 1195,1201 ---- Oid simple_heap_insert(Relation relation, HeapTuple tup) { ! return heap_insert(relation, tup, GetCurrentCommandId(), true, true); } /* *************** *** 1743,1749 **** { /* Assume there's no chance to put newtup on same page. */ newbuf = RelationGetBufferForTuple(relation, newtup->t_len, ! buffer); } else { --- 1755,1761 ---- { /* Assume there's no chance to put newtup on same page. */ newbuf = RelationGetBufferForTuple(relation, newtup->t_len, ! buffer, true); } else { *************** *** 1760,1766 **** */ LockBuffer(buffer, BUFFER_LOCK_UNLOCK); newbuf = RelationGetBufferForTuple(relation, newtup->t_len, ! buffer); } else { --- 1772,1778 ---- */ LockBuffer(buffer, BUFFER_LOCK_UNLOCK); newbuf = RelationGetBufferForTuple(relation, newtup->t_len, ! buffer, true); } else { *** src/backend/access/heap/hio.c.orig Sat May 7 17:32:23 2005 --- src/backend/access/heap/hio.c Mon Jun 20 13:50:16 2005 *************** *** 79,90 **** * happen if space is freed in that page after heap_update finds there's not * enough there). In that case, the page will be pinned and locked only once. * * ereport(ERROR) is allowed here, so this routine *must* be called * before any (unlogged) changes are made in buffer pool. */ Buffer RelationGetBufferForTuple(Relation relation, Size len, ! Buffer otherBuffer) { Buffer buffer = InvalidBuffer; Page pageHeader; --- 79,104 ---- * happen if space is freed in that page after heap_update finds there's not * enough there). In that case, the page will be pinned and locked only once. * + * If use_fsm is true (the normal case), we use FSM to help us find free + * space. If use_fsm is false, we always append a new empty page to the + * end of the relation if the tuple won't fit on the current target page. + * This can save some cycles when we know the relation is new and doesn't + * contain useful amounts of free space. + * + * The use_fsm = false case is also useful for non-WAL-logged additions to a + * relation, if the caller holds exclusive lock and is careful to invalidate + * relation->rd_targblock before the first insertion --- that ensures that + * all insertions will occur into newly added pages and not be intermixed + * with tuples from other transactions. That way, a crash can't risk losing + * any committed data of other transactions. (See heap_insert's comments + * for additional constraints needed for safe usage of this behavior.) + * * ereport(ERROR) is allowed here, so this routine *must* be called * before any (unlogged) changes are made in buffer pool. */ Buffer RelationGetBufferForTuple(Relation relation, Size len, ! Buffer otherBuffer, bool use_fsm) { Buffer buffer = InvalidBuffer; Page pageHeader; *************** *** 121,131 **** * on each page that proves not to be suitable.) If the FSM has no * record of a page with enough free space, we give up and extend the * relation. */ targetBlock = relation->rd_targblock; ! if (targetBlock == InvalidBlockNumber) { /* * We have no cached target page, so ask the FSM for an initial --- 135,148 ---- * on each page that proves not to be suitable.) If the FSM has no * record of a page with enough free space, we give up and extend the * relation. + * + * When use_fsm is false, we either put the tuple onto the existing + * target page or extend the relation. */ targetBlock = relation->rd_targblock; ! if (targetBlock == InvalidBlockNumber && use_fsm) { /* * We have no cached target page, so ask the FSM for an initial *************** *** 208,213 **** --- 225,234 ---- LockBuffer(otherBuffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); } + + /* Without FSM, always fall out of the loop and extend */ + if (!use_fsm) + break; /* * Update FSM as to condition of this page, and ask for another *** src/backend/executor/execMain.c.orig Sun May 22 18:30:19 2005 --- src/backend/executor/execMain.c Mon Jun 20 14:22:16 2005 *************** *** 33,38 **** --- 33,39 ---- #include "postgres.h" #include "access/heapam.h" + #include "access/xlog.h" #include "catalog/heap.h" #include "catalog/namespace.h" #include "commands/tablecmds.h" *************** *** 44,49 **** --- 45,51 ---- #include "optimizer/clauses.h" #include "optimizer/var.h" #include "parser/parsetree.h" + #include "storage/smgr.h" #include "utils/acl.h" #include "utils/guc.h" #include "utils/lsyscache.h" *************** *** 784,789 **** --- 786,805 ---- * And open the constructed table for writing. */ intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock); + + /* use_wal off requires rd_targblock be initially invalid */ + Assert(intoRelationDesc->rd_targblock == InvalidBlockNumber); + + /* + * We can skip WAL-logging the insertions, unless PITR is in use. + * + * Note that for a non-temp INTO table, this is safe only because + * we know that the catalog changes above will have been WAL-logged, + * and so RecordTransactionCommit will think it needs to WAL-log the + * eventual transaction commit. Else the commit might be lost, even + * though all the data is safely fsync'd ... + */ + estate->es_into_relation_use_wal = XLogArchivingActive(); } estate->es_into_relation_descriptor = intoRelationDesc; *************** *** 979,985 **** --- 995,1016 ---- * close the "into" relation if necessary, again keeping lock */ if (estate->es_into_relation_descriptor != NULL) + { + /* + * If we skipped using WAL, and it's not a temp relation, + * we must force the relation down to disk before it's + * safe to commit the transaction. This requires forcing + * out any dirty buffers and then doing a forced fsync. + */ + if (!estate->es_into_relation_use_wal && + !estate->es_into_relation_descriptor->rd_istemp) + { + FlushRelationBuffers(estate->es_into_relation_descriptor); + smgrimmedsync(estate->es_into_relation_descriptor->rd_smgr); + } + heap_close(estate->es_into_relation_descriptor, NoLock); + } /* * close any relations selected FOR UPDATE/FOR SHARE, again keeping locks *************** *** 1307,1313 **** tuple = ExecCopySlotTuple(slot); heap_insert(estate->es_into_relation_descriptor, tuple, ! estate->es_snapshot->curcid); /* we know there are no indexes to update */ heap_freetuple(tuple); IncrAppended(); --- 1338,1346 ---- tuple = ExecCopySlotTuple(slot); heap_insert(estate->es_into_relation_descriptor, tuple, ! estate->es_snapshot->curcid, ! estate->es_into_relation_use_wal, ! false); /* never any point in using FSM */ /* we know there are no indexes to update */ heap_freetuple(tuple); IncrAppended(); *************** *** 1386,1392 **** * insert the tuple */ newId = heap_insert(resultRelationDesc, tuple, ! estate->es_snapshot->curcid); IncrAppended(); (estate->es_processed)++; --- 1419,1426 ---- * insert the tuple */ newId = heap_insert(resultRelationDesc, tuple, ! estate->es_snapshot->curcid, ! true, true); IncrAppended(); (estate->es_processed)++; *************** *** 2089,2094 **** --- 2123,2129 ---- epqstate->es_result_relation_info = estate->es_result_relation_info; epqstate->es_junkFilter = estate->es_junkFilter; epqstate->es_into_relation_descriptor = estate->es_into_relation_descriptor; + epqstate->es_into_relation_use_wal = estate->es_into_relation_use_wal; epqstate->es_param_list_info = estate->es_param_list_info; if (estate->es_topPlan->nParamExec > 0) epqstate->es_param_exec_vals = (ParamExecData *) *** src/backend/executor/execUtils.c.orig Thu Apr 28 17:47:12 2005 --- src/backend/executor/execUtils.c Mon Jun 20 13:08:30 2005 *************** *** 186,192 **** --- 186,194 ---- estate->es_result_relation_info = NULL; estate->es_junkFilter = NULL; + estate->es_into_relation_descriptor = NULL; + estate->es_into_relation_use_wal = false; estate->es_param_list_info = NULL; estate->es_param_exec_vals = NULL; *** src/backend/storage/smgr/md.c.orig Sun May 29 00:23:05 2005 --- src/backend/storage/smgr/md.c Mon Jun 20 14:26:50 2005 *************** *** 660,665 **** --- 660,668 ---- /* * mdimmedsync() -- Immediately sync a relation to stable storage. + * + * Note that only writes already issued are synced; this routine knows + * nothing of dirty buffers that may exist inside the buffer manager. */ bool mdimmedsync(SMgrRelation reln) *** src/backend/storage/smgr/smgr.c.orig Fri Jun 17 18:32:46 2005 --- src/backend/storage/smgr/smgr.c Mon Jun 20 14:26:50 2005 *************** *** 650,656 **** /* * smgrimmedsync() -- Force the specified relation to stable storage. * ! * Synchronously force all of the specified relation down to disk. * * This is useful for building completely new relations (eg, new * indexes). Instead of incrementally WAL-logging the index build --- 650,657 ---- /* * smgrimmedsync() -- Force the specified relation to stable storage. * ! * Synchronously force all previous writes to the specified relation ! * down to disk. * * This is useful for building completely new relations (eg, new * indexes). Instead of incrementally WAL-logging the index build *************** *** 664,669 **** --- 665,674 ---- * * The preceding writes should specify isTemp = true to avoid * duplicative fsyncs. + * + * Note that you need to do FlushRelationBuffers() first if there is + * any possibility that there are dirty buffers for the relation; + * otherwise the sync is not very meaningful. */ void smgrimmedsync(SMgrRelation reln) *** src/include/access/heapam.h.orig Mon Jun 6 13:01:24 2005 --- src/include/access/heapam.h Mon Jun 20 13:46:30 2005 *************** *** 156,162 **** ItemPointer tid); extern void setLastTid(const ItemPointer tid); ! extern Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid); extern HTSU_Result heap_delete(Relation relation, ItemPointer tid, ItemPointer ctid, CommandId cid, Snapshot crosscheck, bool wait); extern HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple tup, --- 156,163 ---- ItemPointer tid); extern void setLastTid(const ItemPointer tid); ! extern Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid, ! bool use_wal, bool use_fsm); extern HTSU_Result heap_delete(Relation relation, ItemPointer tid, ItemPointer ctid, CommandId cid, Snapshot crosscheck, bool wait); extern HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple tup, *** src/include/access/hio.h.orig Fri Dec 31 17:46:38 2004 --- src/include/access/hio.h Mon Jun 20 13:08:23 2005 *************** *** 19,24 **** extern void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple); extern Buffer RelationGetBufferForTuple(Relation relation, Size len, ! Buffer otherBuffer); #endif /* HIO_H */ --- 19,24 ---- extern void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple); extern Buffer RelationGetBufferForTuple(Relation relation, Size len, ! Buffer otherBuffer, bool use_fsm); #endif /* HIO_H */ *** src/include/nodes/execnodes.h.orig Fri Jun 17 14:54:18 2005 --- src/include/nodes/execnodes.h Mon Jun 20 13:08:12 2005 *************** *** 304,310 **** --- 304,312 ---- ResultRelInfo *es_result_relation_info; /* currently active array * elt */ JunkFilter *es_junkFilter; /* currently active junk filter */ + Relation es_into_relation_descriptor; /* for SELECT INTO */ + bool es_into_relation_use_wal; /* Parameter info: */ ParamListInfo es_param_list_info; /* values of external params */