diff -Ncr --exclude-from=diff-ignore 00orig/doc/src/sgml/advanced.sgml 04pgproc/doc/src/sgml/advanced.sgml *** 00orig/doc/src/sgml/advanced.sgml 2004-04-14 16:45:53.000000000 -0400 --- 04pgproc/doc/src/sgml/advanced.sgml 2004-07-27 10:29:09.000000000 -0400 *************** *** 257,262 **** --- 257,310 ---- you are using. + + + It's possible to control the statements in a transaction in a more + granular fashion through the use of savepoints. Savepoints + allow you to selectively discard parts of the transaction, while + committing the rest. This is done be defining a savepoint with + SAVEPOINT, to which you can later roll back using + ROLLBACK TO. All statements between defining the savepoint + and rolling back to it will have no effect on the final transaction. + + + + After rolling back to a savepoint, it continues to be defined, so you can + roll back to it several times. Conversely, if you are sure you won't need + to roll back to a particular savepoint again, it can be released, so the + system can free some resources. Keep in mind that releasing a savepoint + will automatically release all savepoints that were defined after it. + + + + Remembering the bank database, suppose we debit $100.00 from Alice's + account, and credit Bob's account, only to find later that we wanted to + credit Wally's account. We could do it using savepoints like + + + BEGIN; + UPDATE accounts SET balance = balance - 100.00 + WHERE name = 'Alice'; + SAVEPOINT my_savepoint; + UPDATE accounts SET balance = balance + 100.00 + WHERE name = 'Bob'; + -- oops ... forget that and use Wally's account + ROLLBACK TO my_savepoint; + UPDATE accounts SET balance = balance + 100.00 + WHERE name = 'Wally'; + COMMIT; + + + + + This example is, of course, oversimplified, but there's a lot of control + to be had over a transaction block through the use of savepoints. + Moreover, ROLLBACK TO is the only way to regain control of a + transaction block that was automatically put on aborted state by the + system for some reason, short of rolling it back completely and starting + again. + + diff -Ncr --exclude-from=diff-ignore 00orig/doc/src/sgml/ref/allfiles.sgml 04pgproc/doc/src/sgml/ref/allfiles.sgml *** 00orig/doc/src/sgml/ref/allfiles.sgml 2004-06-26 00:28:45.000000000 -0400 --- 04pgproc/doc/src/sgml/ref/allfiles.sgml 2004-07-27 10:29:09.000000000 -0400 *************** *** 88,96 **** --- 88,99 ---- + + + diff -Ncr --exclude-from=diff-ignore 00orig/doc/src/sgml/ref/begin.sgml 04pgproc/doc/src/sgml/ref/begin.sgml *** 00orig/doc/src/sgml/ref/begin.sgml 2004-01-11 06:24:17.000000000 -0300 --- 04pgproc/doc/src/sgml/ref/begin.sgml 2004-07-27 10:29:09.000000000 -0400 *************** *** 145,150 **** --- 145,151 ---- + diff -Ncr --exclude-from=diff-ignore 00orig/doc/src/sgml/ref/release.sgml 04pgproc/doc/src/sgml/ref/release.sgml *** 00orig/doc/src/sgml/ref/release.sgml 1969-12-31 21:00:00.000000000 -0300 --- 04pgproc/doc/src/sgml/ref/release.sgml 2004-07-27 10:29:09.000000000 -0400 *************** *** 0 **** --- 1,138 ---- + + + + + RELEASE + SQL - Language Statements + + + + RELEASE + destroy a previously defined savepoint + + + + RELEASE + + + + savepoints + releasing + + + + + RELEASE savepoint_name + + + + + Description + + + RELEASE destroys a previously defined savepoint + in the current transaction. + + + + Destroying a savepoint makes it—and all savepoints established after + it was established—unavailable as rollback points, + but it has no other user visible behavior. It does not undo the + effects of command executed after the savepoint was established. + To do that, see . + + + + RELEASE also destroys all savepoints that were established + after the named savepoint was established. + + + + Parameters + + + + savepoint_name + + + The name of the savepoint to destroy. + + + + + + + + Notes + + + Specifying a savepoint name that was not previously defined raises + an exception. + + + + It is not possible to release a savepoint when the transaction is in + aborted state. + + + + If multiple savepoints have the same name, only the one that was last + defined is released. + + + + + + Examples + + + To establish and later destroy a savepoint: + + BEGIN; + INSERT INTO table VALUES (3); + SAVEPOINT my_savepoint; + INSERT INTO table VALUES (4); + RELEASE my_savepoint; + COMMIT; + + + + Compatibility + + + RELEASE is fully conforming to the SQL standard. + + + + + See Also + + + + + + + + + + + diff -Ncr --exclude-from=diff-ignore 00orig/doc/src/sgml/ref/rollback.sgml 04pgproc/doc/src/sgml/ref/rollback.sgml *** 00orig/doc/src/sgml/ref/rollback.sgml 2003-11-29 16:51:39.000000000 -0300 --- 04pgproc/doc/src/sgml/ref/rollback.sgml 2004-07-27 10:29:09.000000000 -0400 *************** *** 90,95 **** --- 90,96 ---- + diff -Ncr --exclude-from=diff-ignore 00orig/doc/src/sgml/ref/rollback_to.sgml 04pgproc/doc/src/sgml/ref/rollback_to.sgml *** 00orig/doc/src/sgml/ref/rollback_to.sgml 1969-12-31 21:00:00.000000000 -0300 --- 04pgproc/doc/src/sgml/ref/rollback_to.sgml 2004-07-27 10:29:09.000000000 -0400 *************** *** 0 **** --- 1,158 ---- + + + + + ROLLBACK TO + SQL - Language Statements + + + + ROLLBACK TO + roll back to a savepoint + + + + ROLLBACK TO + + + + savepoints + rolling back + + + + + ROLLBACK TO savepoint_name + + + + + Description + + + Roll back all commands that were executed and destroy all savepoints that + were created after the savepoint was established. The savepoint is + automatically established again. + + + + Parameters + + + + savepoint_name + + + The savepoint to roll back to. + + + + + + + + Notes + + + Use to + destroy a savepoint without discarding the effects of commands executed + after it was established. + + + + Specifying a savepoint name that has not been established causes an + exception to be raised. + + + + Cursors have somewhat non-transactional behavior with respect to + savepoints. Any cursor that is opened inside the savepoint is not closed + when the savepoint is rolled back. If a cursor is affected by a + FETCH command inside a savepoint that is later rolled + back, the cursor position remains at the position that FETCH + left it pointing to (that is, FETCH is not rolled back). + A cursor whose execution causes a transaction to abort is put in a + can't-execute state, so while the transaction can be restored using + ROLLBACK TO, the cursor no longer can be used. + + + + + Examples + + + To undo the effects of the commands executed after my_savepoint + was established, and establish my_savepoint again: + + ROLLBACK TO my_savepoint; + + + + + Cursor positions are not affected by savepoint rollback: + + BEGIN; + + DECLARE foo CURSOR FOR SELECT 1 UNION SELECT 2; + + SAVEPOINT foo; + + FETCH 1 FROM foo; + ?column? + ---------- + 1 + + ROLLBACK TO foo; + + FETCH 1 FROM foo; + ?column? + ---------- + 2 + + COMMIT; + + + + + + + + Compatibility + + + This command is fully SQL standard conforming. + + + + + See Also + + + + + + + + + + + + diff -Ncr --exclude-from=diff-ignore 00orig/doc/src/sgml/ref/savepoint.sgml 04pgproc/doc/src/sgml/ref/savepoint.sgml *** 00orig/doc/src/sgml/ref/savepoint.sgml 1969-12-31 21:00:00.000000000 -0300 --- 04pgproc/doc/src/sgml/ref/savepoint.sgml 2004-07-27 10:29:09.000000000 -0400 *************** *** 0 **** --- 1,153 ---- + + + + + SAVEPOINT + SQL - Language Statements + + + + SAVEPOINT + define a new savepoint within the current transaction + + + + SAVEPOINT + + + + savepoints + defining + + + + + SAVEPOINT savepoint_name + + + + + Description + + + SAVEPOINT establishes a new savepoint within + the current transaction. + + + + + + Parameters + + + + savepoint_name + + + The name to give to the new savepoint. + + + + + + + + Notes + + + A savepoint is a special mark inside a transaction that allows all commands + that are executed after it was established to be rolled back. + Alternatively, a savepoint can be destroyed so that it isn't a possible + rollback destination anymore. In this case, all commands that were executed after + the savepoint was established are preserved. + + + + Use to + rollback to a savepoint. Use to destroy a savepoint, keeping + the effects of commands executed after it was established. + + + + Savepoints can only be established when inside a transaction block. + Issuing SAVEPOINT when not inside a transaction block + will cause an exception to be raised. + + + + There can be multiple savepoints defined within a transaction. + + + + + Examples + + + To establish a savepoint and undo the effects of all commands executed + after it was established, keeping only the first inserted value + in the table: + + BEGIN; + INSERT INTO table VALUES (1); + SAVEPOINT my_savepoint; + INSERT INTO table VALUES (2); + ROLLBACK TO my_savepoint; + COMMIT; + + + + + To establish and later destroy a savepoint, keeping both values in the table: + + BEGIN; + INSERT INTO table VALUES (3); + SAVEPOINT my_savepoint; + INSERT INTO table VALUES (4); + RELEASE my_savepoint; + COMMIT; + + + + Compatibility + + + SQL requires a savepoint to be automatically destroyed when another savepoint + with the same name is established. In PostgreSQL, the old + savepoint is kept, though only the last one will be used when rolling back or + releasing. Other than that, SAVEPOINT is fully SQL conforming. + + + + + See Also + + + + + + + + + + + + diff -Ncr --exclude-from=diff-ignore 00orig/doc/src/sgml/ref/start_transaction.sgml 04pgproc/doc/src/sgml/ref/start_transaction.sgml *** 00orig/doc/src/sgml/ref/start_transaction.sgml 2004-01-11 02:46:58.000000000 -0300 --- 04pgproc/doc/src/sgml/ref/start_transaction.sgml 2004-07-27 10:29:09.000000000 -0400 *************** *** 66,71 **** --- 66,72 ---- + diff -Ncr --exclude-from=diff-ignore 00orig/doc/src/sgml/reference.sgml 04pgproc/doc/src/sgml/reference.sgml *** 00orig/doc/src/sgml/reference.sgml 2004-06-26 00:28:44.000000000 -0400 --- 04pgproc/doc/src/sgml/reference.sgml 2004-07-27 10:29:09.000000000 -0400 *************** *** 120,128 **** --- 120,131 ---- ¬ify; &prepare; &reindex; + &releaseSavepoint; &reset; &revoke; &rollback; + &rollbackTo; + &savepoint; &select; &selectInto; &set; diff -Ncr --exclude-from=diff-ignore 00orig/src/backend/access/transam/README 04pgproc/src/backend/access/transam/README *** 00orig/src/backend/access/transam/README 1969-12-31 21:00:00.000000000 -0300 --- 04pgproc/src/backend/access/transam/README 2004-07-28 19:18:11.856226058 -0400 *************** *** 0 **** --- 1,224 ---- + The Transaction System + ====================== + + xact.c + ------ + + PostgreSQL's transaction system is a three-layer system, implementing + low-level transactions and subtransactions, on top of which rests the + mainloop's control code, which in turn implements user-visible transactions + and savepoints. + + The middle layer of code is called by postgres.c before and after the + processing of each query, + + StartTransactionCommand + CommitTransactionCommand + AbortCurrentTransaction + + Meanwhile, the user can alter the system's state by issuing the SQL commands + BEGIN, ROLLBACK, SAVEPOINT or RELEASE. The traffic cop redirects these calls + to the toplevel routines + + BeginTransactionBlock + EndTransactionBlock + UserAbortTransactionBlock + DefineSavepoint + RollbackToSavepoint + ReleaseSavepoint + RollbackAndReleaseSavepoint + + Depending on the current state of the system, these function call low level + functions to activate the real transaction system, + + StartTransaction + CommitTransaction + AbortTransaction + CleanupTransaction + StartSubTransaction + CommitSubTransaction + AbortSubTransaction + CleanupSubTransaction + + Additionally, within a transaction, CommandCounterIncrement is called to + increment the command counter, which allows future commands to "see" the + effects of previous commands within the same transaction. Note that this is + done automatically by CommitTransactionCommand after each query inside a + transaction block, but some utility functions also do it to allow some + operations (usually in the system catalogs) to be seen by future operations in + the same utility command processing (for example, in DefineRelation it is done + after creating the heap so the pg_class row is visible, to be able to lock + it). + + + For example, consider the following sequence of user commands: + + 1) BEGIN + 2) SELECT * FROM foo + 3) INSERT INTO foo VALUES (...) + 4) COMMIT + + In the main processing loop, this results in the following function call + sequence: + + / StartTransactionCommand; + / ProcessUtility; << BEGIN + 1) < BeginTransactionBlock; + \ StartTransaction; + \ CommitTransactionCommand; + + / StartTransactionCommand; + 2) / ProcessQuery; << SELECT * FROM foo + \ CommitTransactionCommand; + \ CommandCounterIncrement; + + / StartTransactionCommand; + 3) / ProcessQuery; << INSERT INTO foo VALUES (...) + \ CommitTransactionCommand; + \ CommandCounterIncrement; + + / StartTransactionCommand; + / ProcessUtility; << COMMIT + 4) < EndTransactionBlock; + \ CommitTransaction; + \ CommitTransactionCommand; + + The point of this example is to demonstrate the need for + StartTransactionCommand and CommitTransactionCommand to be state smart -- they + should do nothing in between the calls to BeginTransactionBlock and + EndTransactionBlock and outside these calls they need to do normal start, + commit or abort processing. + + Furthermore, suppose the "SELECT * FROM foo" caused an abort condition. In + this case AbortCurrentTransaction is called, and the transaction is put in + aborted state. In this state, any user input is ignored except for + transaction-termination statements, or ROLLBACK TO commands. + + Transaction aborts can occur in two ways: + + 1) system dies from some internal cause (syntax error, etc) + 2) user types ROLLBACK + + The reason we have to distinguish them is illustrated by the following two + situations: + + case 1 case 2 + ------ ------ + 1) user types BEGIN 1) user types BEGIN + 2) user does something 2) user does something + 3) user does not like what 3) system aborts for some reason + she sees and types ABORT (syntax error, etc) + + In case 1, we want to abort the transaction and return to the default state. + In case 2, there may be more commands coming our way which are part of the + same transaction block; we have to ignore these commands until we see a COMMIT + or ROLLBACK. + + Internal aborts are handled by AbortCurrentTransaction, while user aborts are + handled by UserAbortTransactionBlock. Both of them rely on AbortTransaction + to do all the real work. The only difference is what state we enter after + AbortTransaction does its work: + + * AbortCurrentTransaction leaves us in TBLOCK_ABORT, + * UserAbortTransactionBlock leaves us in TBLOCK_ENDABORT + + Low-level transaction abort handling is divided in two phases: + * AbortTransaction executes as soon as we realize the transaction has + failed. It should release all shared resources (locks etc) so that we do + not delay other backends unnecessarily. + * CleanupTransaction executes when we finally see a user COMMIT + or ROLLBACK command; it cleans things up and gets us out of the transaction + internally. In particular, we mustn't destroy TopTransactionContext until + this point. + + Also, note that when a transaction is committed, we don't close it right away. + Rather it's put in TBLOCK_END state, which means that when + CommitTransactionCommand is called after the query has finished processing, + the transaction has to be closed. The distinction is subtle but important, + because it means that control will leave the xact.c code with the transaction + open, and the main loop will be able to keep processing inside the same + transaction. So, in a sense, transaction commit is also handled in two + phases, the first at EndTransactionBlock and the second at + CommitTransactionCommand. + + The rest of the code in xact.c are routines to support the creation and + finishing of transactions and subtransactions. For example, AtStart_Memory + takes care of initializing the memory subsystem at main transaction start. + + + Subtransaction handling + ----------------------- + + Subtransactions are implemented using a stack of TransactionState structures, + which has a pointer to its parent transaction. When a new subtransaction is + to be opened, PushTransaction is called, which creates a new TransactionState, + with its parent pointing to the current transaction. StartSubTransaction is + in charge of initializing the new TransactionState to sane values, and + properly initializing other subsystems. + + When closing a subtransaction, either CommitSubTransaction has to be called + (if the subtransaction is committing), or AbortSubTransaction and + CleanupSubTransaction (if it's aborting). In either case, PopTransaction is + called so the system returns to the parent transaction. + + One important point regarding subtransaction handling is that several may need + to be closed in response to a single user command. That's because savepoints + have names, and we allow to commit or rollback a savepoint by name, which not + necessarily is the one that was last opened. In the case of subtransaction + commit this is not a problem, and we close all the involved subtransactions + right away by calling CommitTransactionToLevel, which in turn calls + CommitSubTransaction and PopTransaction as many times as needed. + + In the case of subtransaction abort (when the user issues ROLLBACK TO + ), things are not so easy. We have to keep the subtransactions + open and return control to the main loop. So what RollbackToSavepoint does is + abort the innermost subtransaction and put it in TBLOCK_SUBENDABORT state, and + put the rest in TBLOCK_SUBABORT_PENDING state. Then we return control to the + main loop, which will in turn return control to us by calling + CommitTransactionCommand. At this point we can close all subtransactions that + are marked with the "abort pending" state. + + + + pg_clog and pg_subtrans + ----------------------- + + pg_clog and pg_subtrans are permanent (on-disk) storage of transaction related + information. There is a limited number of pages of each kept in memory, so + in many cases there is no need to actually read from disk. However, if + there's a long running transaction or a backend sitting idle with an open + transaction, it's necessary to be able to read and write this information from + disk. They also allow information to be permanent across server restarts. + + pg_clog records the commit status for each transaction. A transaction can be + in progress, committed, aborted, or "sub-committed". This last state means + that it's a subtransaction that's no longer running, but it's parent has not + updated its state yet (either it is still running, or the backend crashed + without updating its status). A sub-committed transaction's status will be + updated again to the final value as soon as the parent commits or aborts, or + when the parent is detected to be aborted. + + Savepoints are implemented using subtransactions. A subtransaction is a + transaction inside a transaction; it gets its own TransactionId, but its + commit or abort status is not only dependent on whether it committed itself, + but also whether its parent transaction committed. To implement multiple + savepoints in a transaction we allow unlimited transaction nesting depth, so + any particular subtransaction's commit state is dependent on the commit status + of each and every ancestor transaction. + + The "subtransaction parent" (pg_subtrans) mechanism records, for each + transaction, the TransactionId of its parent transaction. This information is + stored as soon as the subtransaction is created. + + pg_subtrans is used to know whether the transaction in question is still + running --- the main Xid of a transaction is recorded in the PGPROC struct, + but since we allow arbitrary nesting of subtransactions, we can't fit all Xids + in shared memory, so we have to store them on disk. Note, however, that for + each transaction we keep a "cache" of Xids that are known to be part of the + transaction tree, so we can skip looking at pg_subtrans unless we know the + cache has been overflowed. See storage/ipc/sinval.c for the gory details. + + slru.c is the supporting mechanism for both pg_clog and pg_subtrans. It + implements the LRU policy for in-memory buffer pages. The high-level routines + for pg_clog are implemented transam.c, while the low-level functions are in + clog.c. pg_subtrans is contained completely in subtrans.c. diff -Ncr --exclude-from=diff-ignore 00orig/src/backend/access/transam/xact.c 04pgproc/src/backend/access/transam/xact.c *** 00orig/src/backend/access/transam/xact.c 2004-07-28 14:36:40.351828862 -0400 --- 04pgproc/src/backend/access/transam/xact.c 2004-07-28 14:24:59.073029982 -0400 *************** *** 10,141 **** * IDENTIFICATION * $PostgreSQL: pgsql-server/src/backend/access/transam/xact.c,v 1.172 2004/07/27 05:10:49 tgl Exp $ * - * NOTES - * Transaction aborts can now occur two ways: - * - * 1) system dies from some internal cause (syntax error, etc..) - * 2) user types ABORT - * - * These two cases used to be treated identically, but now - * we need to distinguish them. Why? consider the following - * two situations: - * - * case 1 case 2 - * ------ ------ - * 1) user types BEGIN 1) user types BEGIN - * 2) user does something 2) user does something - * 3) user does not like what 3) system aborts for some reason - * she sees and types ABORT - * - * In case 1, we want to abort the transaction and return to the - * default state. In case 2, there may be more commands coming - * our way which are part of the same transaction block and we have - * to ignore these commands until we see a COMMIT transaction or - * ROLLBACK. - * - * Internal aborts are now handled by AbortTransactionBlock(), just as - * they always have been, and user aborts are now handled by - * UserAbortTransactionBlock(). Both of them rely on AbortTransaction() - * to do all the real work. The only difference is what state we - * enter after AbortTransaction() does its work: - * - * * AbortTransactionBlock() leaves us in TBLOCK_ABORT and - * * UserAbortTransactionBlock() leaves us in TBLOCK_ENDABORT - * - * Low-level transaction abort handling is divided into two phases: - * * AbortTransaction() executes as soon as we realize the transaction - * has failed. It should release all shared resources (locks etc) - * so that we do not delay other backends unnecessarily. - * * CleanupTransaction() executes when we finally see a user COMMIT - * or ROLLBACK command; it cleans things up and gets us out of - * the transaction internally. In particular, we mustn't destroy - * TopTransactionContext until this point. - * - * NOTES - * The essential aspects of the transaction system are: - * - * o transaction id generation - * o transaction log updating - * o memory cleanup - * o cache invalidation - * o lock cleanup - * - * Hence, the functional division of the transaction code is - * based on which of the above things need to be done during - * a start/commit/abort transaction. For instance, the - * routine AtCommit_Memory() takes care of all the memory - * cleanup stuff done at commit time. - * - * The code is layered as follows: - * - * StartTransaction - * CommitTransaction - * AbortTransaction - * CleanupTransaction - * - * are provided to do the lower level work like recording - * the transaction status in the log and doing memory cleanup. - * above these routines are another set of functions: - * - * StartTransactionCommand - * CommitTransactionCommand - * AbortCurrentTransaction - * - * These are the routines used in the postgres main processing - * loop. They are sensitive to the current transaction block state - * and make calls to the lower level routines appropriately. - * - * Support for transaction blocks is provided via the functions: - * - * BeginTransactionBlock - * CommitTransactionBlock - * AbortTransactionBlock - * - * These are invoked only in response to a user "BEGIN WORK", "COMMIT", - * or "ROLLBACK" command. The tricky part about these functions - * is that they are called within the postgres main loop, in between - * the StartTransactionCommand() and CommitTransactionCommand(). - * - * For example, consider the following sequence of user commands: - * - * 1) begin - * 2) select * from foo - * 3) insert into foo (bar = baz) - * 4) commit - * - * in the main processing loop, this results in the following - * transaction sequence: - * - * / StartTransactionCommand(); - * 1) / ProcessUtility(); << begin - * \ BeginTransactionBlock(); - * \ CommitTransactionCommand(); - * - * / StartTransactionCommand(); - * 2) < ProcessQuery(); << select * from foo - * \ CommitTransactionCommand(); - * - * / StartTransactionCommand(); - * 3) < ProcessQuery(); << insert into foo (bar = baz) - * \ CommitTransactionCommand(); - * - * / StartTransactionCommand(); - * 4) / ProcessUtility(); << commit - * \ CommitTransactionBlock(); - * \ CommitTransactionCommand(); - * - * The point of this example is to demonstrate the need for - * StartTransactionCommand() and CommitTransactionCommand() to - * be state smart -- they should do nothing in between the calls - * to BeginTransactionBlock() and EndTransactionBlock() and - * outside these calls they need to do normal start/commit - * processing. - * - * Furthermore, suppose the "select * from foo" caused an abort - * condition. We would then want to abort the transaction and - * ignore all subsequent commands up to the "commit". - * -cim 3/23/90 - * *------------------------------------------------------------------------- */ --- 10,15 ---- *************** *** 1020,1025 **** --- 894,901 ---- TransactionIdAbortTree(nchildren, children); TransactionIdAbort(xid); + XidCacheClean(); + END_CRIT_SECTION(); } *************** *** 1159,1164 **** --- 1035,1042 ---- TransactionIdAbortTree(nchildren, children); TransactionIdAbort(xid); + XidCacheRemoveRunningXids(nchildren, children, xid); + END_CRIT_SECTION(); } *************** *** 1390,1395 **** --- 1268,1278 ---- MyProc->xid = InvalidTransactionId; MyProc->xmin = InvalidTransactionId; LWLockRelease(SInvalLock); + + /* + * Clean up the Xid cache. + */ + XidCacheClean(); } /* *************** *** 1589,1595 **** * State should still be TRANS_ABORT from AbortTransaction(). */ if (s->state != TRANS_ABORT) ! elog(FATAL, "CleanupTransaction and not in abort state"); /* * do abort cleanup processing --- 1472,1479 ---- * State should still be TRANS_ABORT from AbortTransaction(). */ if (s->state != TRANS_ABORT) ! elog(FATAL, "CleanupTransaction while in %s state", ! TransStateAsString(s->state)); /* * do abort cleanup processing *************** *** 2897,2902 **** --- 2781,2793 ---- XactLockTableInsert(s->transactionIdData); /* + * Ideally, we would only cache Xids of subtransactions that write tuples + * in permanent storage. We have no clean way of knowing that, however + * (much less in advance ...) + */ + XidCacheAddRunningXid(s->transactionIdData); + + /* * Finish setup of other transaction state fields. */ s->currentUser = GetUserId(); diff -Ncr --exclude-from=diff-ignore 00orig/src/backend/storage/ipc/sinval.c 04pgproc/src/backend/storage/ipc/sinval.c *** 00orig/src/backend/storage/ipc/sinval.c 2004-06-30 23:50:07.000000000 -0400 --- 04pgproc/src/backend/storage/ipc/sinval.c 2004-07-28 19:44:01.587585261 -0400 *************** *** 27,32 **** --- 27,53 ---- #include "utils/tqual.h" #include "miscadmin.h" + #ifdef XIDCACHE_DEBUG + static void + DisplayXidCache(int code, Datum arg); + + /* counters for XidCache measurement */ + static int xc_by_recent_xmin = 0; + static int xc_by_main_xid = 0; + static int xc_by_child_xid = 0; + static int xc_slow_answer = 0; + #define xc_by_recent_xmin_inc xc_by_recent_xmin++ + #define xc_by_main_xid_inc xc_by_main_xid++ + #define xc_by_child_xid_inc xc_by_child_xid++ + #define xc_slow_answer_inc xc_slow_answer++ + + #else /* XIDCACHE_DEBUG */ + + #define xc_by_recent_xmin_inc + #define xc_by_main_xid_inc + #define xc_by_child_xid_inc + #define xc_slow_answer_inc + #endif /* XIDCACHE_DEBUG */ /* * Because backends sitting idle will not be reading sinval events, we *************** *** 80,85 **** --- 101,110 ---- ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("sorry, too many clients already"))); + + #ifdef XIDCACHE_DEBUG + on_proc_exit(DisplayXidCache, (Datum) NULL); + #endif /* XIDCACHE_DEBUG */ } /* *************** *** 444,451 **** * * SInvalLock has to be held while we do 1 and 2. If we save all the Xids * while doing 1, we can release the SInvalLock while we do 3. This buys back ! * some concurrency (we can't retrieve the main Xids from PGPROC again anyway, ! * see GetNewTransactionId) */ bool TransactionIdIsInProgress(TransactionId xid) --- 469,476 ---- * * SInvalLock has to be held while we do 1 and 2. If we save all the Xids * while doing 1, we can release the SInvalLock while we do 3. This buys back ! * some concurrency (we can't retrieve the main Xids from PGPROC again anyway; ! * see GetNewTransactionId). */ bool TransactionIdIsInProgress(TransactionId xid) *************** *** 453,465 **** bool result = false; SISeg *segP = shmInvalBuffer; ProcState *stateP = segP->procState; ! int i; ! int nxids = 0; TransactionId *xids; ! xids = (TransactionId *)palloc(sizeof(TransactionId) * segP->maxBackends); LWLockAcquire(SInvalLock, LW_SHARED); for (i = 0; i < segP->lastBackend; i++) { --- 478,502 ---- bool result = false; SISeg *segP = shmInvalBuffer; ProcState *stateP = segP->procState; ! int i, ! j; TransactionId *xids; + bool locked; + bool overflowed = false; + + /* + * Don't bother checking a very old transaction. + */ + if (TransactionIdPrecedes(xid, RecentGlobalXmin)) + { + xc_by_recent_xmin_inc; + return false; + } ! xids = (TransactionId *) palloc(sizeof(TransactionId) * segP->maxBackends); LWLockAcquire(SInvalLock, LW_SHARED); + locked = true; for (i = 0; i < segP->lastBackend; i++) { *************** *** 473,545 **** TransactionId pxid = proc->xid; /* ! * check the main Xid (step 1 above) */ if (TransactionIdEquals(pxid, xid)) { result = true; ! break; } ! /* ! * save the main Xid for step 3. ! */ ! xids[nxids++] = pxid; ! #ifdef NOT_USED ! FIXME -- waiting to save the Xids in PGPROC ... /* ! * check the saved Xids array (step 2) */ ! for (j = 0; j < PGPROC_MAX_SAVED_XIDS; j++) { ! pxid = proc->savedxids[j]; ! if (!TransactionIdIsValid(pxids)) ! break; if (TransactionIdEquals(pxid, xid)) { result = true; ! break; } } - #endif - - if (result) - break; } } LWLockRelease(SInvalLock); /* * Step 3: have to check pg_subtrans. Use the saved Xids. * ! * XXX Could save the cached Xids too for further improvement. */ ! if (!result) { ! /* this is a potentially expensive call. */ ! xid = SubTransGetTopmostTransaction(xid); ! ! Assert(TransactionIdIsValid(xid)); ! /* ! * We don't care if it aborted, because if it did, we won't find ! * it in the array. ! */ ! for (i = 0; i < nxids; i++) { ! if (TransactionIdEquals(xids[i], xid)) ! { ! result = true; ! break; ! } } } pfree(xids); return result; --- 510,629 ---- TransactionId pxid = proc->xid; /* ! * Step 1: check the main Xid */ if (TransactionIdEquals(pxid, xid)) { + xc_by_main_xid_inc; result = true; ! goto result_known; } ! /* save the main Xid for step 3. */ ! xids[i] = pxid; ! if (proc->cache.overflow) ! overflowed = true; /* ! * Step 2: check the cached Xids arrays */ ! for (j = 0; j < PGPROC_MAX_CACHED_SUBXIDS; j++) { ! pxid = proc->cache.xids[j]; ! if (!TransactionIdIsValid(pxid)) ! continue; if (TransactionIdEquals(pxid, xid)) { + xc_by_child_xid_inc; result = true; ! goto result_known; } } } } LWLockRelease(SInvalLock); + locked = false; + + /* + * If none of the caches overflowed, we know the Xid is + * not running without looking at pg_subtrans. + */ + if (!overflowed) + goto result_known; /* * Step 3: have to check pg_subtrans. Use the saved Xids. + */ + xc_slow_answer_inc; + + /* + * At this point, we know it's either a subtransaction or + * it's not running. If it's a subtransaction, we have to + * check whether it's part of a running subtransaction tree + * or it was aborted. So we have to look at pg_clog, but + * since we already checked the PGPROC array we don't have to + * worry about a race condition. + */ + if (TransactionIdDidAbort(xid)) + { + result = false; + goto result_known; + } + + /* + * It isn't aborted, so check whether the transaction tree it + * belongs to is still running (or, more precisely, whether it + * was running when this routine started -- note that we just + * released SInvalLock.) + */ + xid = SubTransGetTopmostTransaction(xid); + Assert(TransactionIdIsValid(xid)); + + for (i = 0; i < segP->maxBackends; i++) + { + if (TransactionIdEquals(xids[i], xid)) + { + result = true; + break; + } + } + + /* + * pg_subtrans says it's running in the i-nd backend. Check if it's + * still true. * ! * This is strictly not needed, but 1) an eternity has passed since ! * the main Xid was read (we took a peek at both pg_clog and pg_subtrans ! * in the meantime), and 2) it's a cheap test. */ ! if (result) { ! LWLockAcquire(SInvalLock, LW_SHARED); ! locked = true; ! SHMEM_OFFSET pOffset = stateP[i].procStruct; ! ! if (pOffset != INVALID_OFFSET) { ! PGPROC *proc = (PGPROC *) MAKE_PTR(pOffset); ! ! /* Fetch xid just once - see GetNewTransactionId */ ! TransactionId pxid = proc->xid; ! ! if (!TransactionIdEquals(pxid, xid)) ! result = false; } } + result_known: ; + + if (locked) + LWLockRelease(SInvalLock); + pfree(xids); return result; *************** *** 794,799 **** --- 878,885 ---- snapshot->curcid = GetCurrentCommandId(); + DisplayXidCache(0, 0); + return snapshot; } *************** *** 928,930 **** --- 1014,1128 ---- return count; } + + /* + * XidCacheAddRunningXid + * + * Add a TransactionId to the list of known-running transactions. + * If there is no space in the cache, mark overflow and return. + */ + void + XidCacheAddRunningXid(TransactionId xid) + { + LWLockAcquire(SInvalLock, LW_SHARED); + + if (MyProc->cache.nxids >= PGPROC_MAX_CACHED_SUBXIDS) + { + MyProc->cache.overflow = true; + LWLockRelease(SInvalLock); + return; + } + + TransactionIdStore(xid, &(MyProc->cache.xids[MyProc->cache.nxids])); + MyProc->cache.nxids ++; + LWLockRelease(SInvalLock); + } + + #define XidCacheRemove(i) \ + do { \ + TransactionIdStore(InvalidTransactionId, &(MyProc->cache.xids[i])); \ + MyProc->cache.nxids --; \ + } while (0); + + /* + * XidCacheRemoveRunningXids + * + * Remove a bunch of TransactionIds from the list of known-running + * transactions. + */ + void + XidCacheRemoveRunningXids(int nxids, TransactionId *xids, TransactionId xid) + { + int i, j; + + Assert(!TransactionIdEquals(xid, InvalidTransactionId)); + + LWLockAcquire(SInvalLock, LW_SHARED); + + for (i = 0; i < nxids; i++) + { + for (j = 0; j < PGPROC_MAX_CACHED_SUBXIDS; j++) + { + if (TransactionIdEquals(MyProc->cache.xids[j], xids[i])) + { + XidCacheRemove(j); + break; + } + } + } + + for (j = 0; j < PGPROC_MAX_CACHED_SUBXIDS; j++) + { + if (TransactionIdEquals(MyProc->cache.xids[j], xid)) + { + XidCacheRemove(j); + break; + } + } + + LWLockRelease(SInvalLock); + } + + /* + * XidCacheClean + * + * Fast cache cleanup at transaction end. + */ + void + XidCacheClean(void) + { + LWLockAcquire(SInvalLock, LW_SHARED); + + MyProc->cache.overflow = false; + MyProc->cache.nxids = 0; + MemSet(MyProc->cache.xids, '\0', + PGPROC_MAX_CACHED_SUBXIDS * sizeof(TransactionId)); + + LWLockRelease(SInvalLock); + } + + #ifdef XIDCACHE_DEBUG + static void + DisplayXidCache(int code, Datum arg) + { + int i; + if (MyProc == NULL) + { + fprintf(stderr,"XidCache: xmin: %d, mainxid: %d, childxid: %d, slow: %d\n", + xc_by_recent_xmin, + xc_by_main_xid, + xc_by_child_xid, + xc_slow_answer); + } + else + { + fprintf(stderr, "(%s) children:\t", + MyProc->cache.overflow ? "overf" : "no overf"); + for (i = 0; i < PGPROC_MAX_CACHED_SUBXIDS; i++) + { + fprintf(stderr, "%d ", MyProc->cache.xids[i]); + } + fprintf(stderr, "\n"); + } + } + #endif /* XIDCACHE_DEBUG */ diff -Ncr --exclude-from=diff-ignore 00orig/src/backend/tcop/utility.c 04pgproc/src/backend/tcop/utility.c *** 00orig/src/backend/tcop/utility.c 2004-07-26 21:59:36.000000000 -0400 --- 04pgproc/src/backend/tcop/utility.c 2004-07-27 10:29:30.000000000 -0400 *************** *** 326,333 **** { /* * START TRANSACTION, as defined by SQL99: ! * Identical to BEGIN, except that it takes a few ! * additional options. Same code for both. */ case TRANS_STMT_BEGIN: case TRANS_STMT_START: --- 326,332 ---- { /* * START TRANSACTION, as defined by SQL99: ! * Identical to BEGIN. Same code for both. */ case TRANS_STMT_BEGIN: case TRANS_STMT_START: diff -Ncr --exclude-from=diff-ignore 00orig/src/include/access/htup.h 04pgproc/src/include/access/htup.h *** 00orig/src/include/access/htup.h 2004-07-17 18:10:20.000000000 -0400 --- 04pgproc/src/include/access/htup.h 2004-07-27 10:52:44.000000000 -0400 *************** *** 68,101 **** * object ID (if HEAP_HASOID is set in t_infomask) * user data fields * ! * We store five "virtual" fields Xmin, Cmin, Xmax, Cmax, and Xvac ! * in just three physical fields. Xmin is always really stored, but ! * Cmin and Xmax share a field, as do Cmax and Xvac. This works because ! * we know that there are only a limited number of states that a tuple can ! * be in, and that Cmin and Cmax are only interesting for the lifetime of ! * the inserting and deleting transactions respectively. We have the ! * following possible states of a tuple: ! * ! * XMIN CMIN XMAX CMAX XVAC ! * ! * NEW (never deleted, not moved by vacuum): ! * valid valid invalid invalid invalid ! * ! * DELETED BY CREATING XACT: ! * valid valid = XMIN valid invalid ! * ! * DELETED BY OTHER XACT: ! * valid unneeded valid valid invalid ! * ! * MOVED BY VACUUM FULL: ! * valid unneeded maybe-valid unneeded valid ! * ! * This assumes that VACUUM FULL never tries to move a tuple whose Cmin or ! * Cmax is still interesting (ie, insert-in-progress or delete-in-progress). ! * ! * This table shows that if we use an infomask bit to handle the case ! * XMAX=XMIN specially, we never need to store Cmin and Xmax at the same ! * time. Nor do we need to store Cmax and Xvac at the same time. * * Following the fixed header fields, the nulls bitmap is stored (beginning * at t_bits). The bitmap is *not* stored if t_infomask shows that there --- 68,84 ---- * object ID (if HEAP_HASOID is set in t_infomask) * user data fields * ! * We store five "virtual" fields Xmin, Cmin, Xmax, Cmax, and Xvac in four ! * physical fields. Xmin, Cmin and Xmax are always really stored, but ! * Cmax and Xvac share a field. This works because we know that there are ! * only a limited number of states that a tuple can be in, and that Cmax ! * is only interesting for the lifetime of the deleting transactions ! * respectively. This assumes that VACUUM FULL never tries to move a ! * tuple whose Cmax is still interesting (ie, delete-in-progress). ! * ! * Note that in 7.3 and 7.4 a similar idea was applied to Xmax and Cmin. ! * However, with the advent of subtransactions, a tuple may need both Xmax ! * and Cmin simultaneously, so this is no longer possible. * * Following the fixed header fields, the nulls bitmap is stored (beginning * at t_bits). The bitmap is *not* stored if t_infomask shows that there *************** *** 424,430 **** #define XLOG_HEAP_MOVE 0x30 #define XLOG_HEAP_CLEAN 0x40 #define XLOG_HEAP_NEWPAGE 0x50 ! /* opcodes 0x60, 0x70 still free */ #define XLOG_HEAP_OPMASK 0x70 /* * When we insert 1st item on new page in INSERT/UPDATE --- 407,413 ---- #define XLOG_HEAP_MOVE 0x30 #define XLOG_HEAP_CLEAN 0x40 #define XLOG_HEAP_NEWPAGE 0x50 ! /* opcode 0x60 still free */ #define XLOG_HEAP_OPMASK 0x70 /* * When we insert 1st item on new page in INSERT/UPDATE diff -Ncr --exclude-from=diff-ignore 00orig/src/include/storage/proc.h 04pgproc/src/include/storage/proc.h *** 00orig/src/include/storage/proc.h 2004-07-26 21:59:44.000000000 -0400 --- 04pgproc/src/include/storage/proc.h 2004-07-28 14:41:03.827806803 -0400 *************** *** 19,24 **** --- 19,44 ---- #include "storage/lock.h" #include "storage/pg_sema.h" + /* + * XXX This number is made up ... + */ + #define PGPROC_MAX_CACHED_SUBXIDS 32 + + /* + * Each backend keeps track of (some of) its subtransactions' + * TransactionIds in the PGPROC struct. + * + * We also keep track of whether the cache overflowed. If it + * hasn't overflowed, we can assume that a Xid that's not present + * in the cache is not a running transaction. Else we have to look + * at pg_subtrans. + */ + struct XidCache { + /* running Xids cache */ + int nxids; + TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS]; + bool overflow; + }; /* * Each backend has a PGPROC struct in shared memory. There is also a list of *************** *** 39,44 **** --- 59,66 ---- TransactionId xid; /* transaction currently being executed by * this proc */ + struct XidCache cache; /* Xid cache */ + TransactionId xmin; /* minimal running XID as it was when we * were starting our xact: vacuum must not * remove tuples deleted by xid >= xmin ! */ diff -Ncr --exclude-from=diff-ignore 00orig/src/include/storage/sinval.h 04pgproc/src/include/storage/sinval.h *** 00orig/src/include/storage/sinval.h 2004-06-03 15:59:03.000000000 -0400 --- 04pgproc/src/include/storage/sinval.h 2004-07-27 23:30:29.000000000 -0400 *************** *** 115,118 **** --- 115,123 ---- extern void EnableCatchupInterrupt(void); extern bool DisableCatchupInterrupt(void); + /* Xid cache updaters */ + extern void XidCacheAddRunningXid(TransactionId xid); + extern void XidCacheRemoveRunningXids(int nxids, TransactionId *xids, TransactionId xid); + extern void XidCacheClean(void); + #endif /* SINVAL_H */