diff -Ncr --exclude-from=diff-ignore 00orig/doc/src/sgml/advanced.sgml 04pgproc/doc/src/sgml/advanced.sgml
*** 00orig/doc/src/sgml/advanced.sgml	2004-04-14 16:45:53.000000000 -0400
--- 04pgproc/doc/src/sgml/advanced.sgml	2004-07-27 10:29:09.000000000 -0400
***************
*** 257,262 ****
--- 257,310 ----
       you are using.
      </para>
     </note>
+ 
+    <para>
+     It's possible to control the statements in a transaction in a more
+     granular fashion through the use of <firstterm>savepoints</>.  Savepoints
+     allow you to selectively discard parts of the transaction, while
+     committing the rest.  This is done be defining a savepoint with
+     <command>SAVEPOINT</>, to which you can later roll back using
+     <command>ROLLBACK TO</>.  All statements between defining the savepoint
+     and rolling back to it will have no effect on the final transaction.
+    </para> 
+ 
+    <para>
+     After rolling back to a savepoint, it continues to be defined, so you can
+     roll back to it several times.  Conversely, if you are sure you won't need
+     to roll back to a particular savepoint again, it can be released, so the
+     system can free some resources.  Keep in mind that releasing a savepoint
+     will automatically release all savepoints that were defined after it.
+    </para> 
+ 
+    <para>
+     Remembering the bank database, suppose we debit $100.00 from Alice's
+     account, and credit Bob's account, only to find later that we wanted to
+     credit Wally's account.  We could do it using savepoints like
+ 
+ <programlisting>
+ BEGIN;
+ UPDATE accounts SET balance = balance - 100.00
+     WHERE name = 'Alice';
+ SAVEPOINT my_savepoint;
+ UPDATE accounts SET balance = balance + 100.00
+     WHERE name = 'Bob';
+ -- oops ... forget that and use Wally's account
+ ROLLBACK TO my_savepoint;
+ UPDATE accounts SET balance = balance + 100.00
+     WHERE name = 'Wally';
+ COMMIT;
+ </programlisting>
+    </para>
+ 
+    <para>
+     This example is, of course, oversimplified, but there's a lot of control
+     to be had over a transaction block through the use of savepoints.
+     Moreover, <command>ROLLBACK TO</> is the only way to regain control of a
+     transaction block that was automatically put on aborted state by the
+     system for some reason, short of rolling it back completely and starting
+     again.
+    </para>
+ 
    </sect1>
  
  
diff -Ncr --exclude-from=diff-ignore 00orig/doc/src/sgml/ref/allfiles.sgml 04pgproc/doc/src/sgml/ref/allfiles.sgml
*** 00orig/doc/src/sgml/ref/allfiles.sgml	2004-06-26 00:28:45.000000000 -0400
--- 04pgproc/doc/src/sgml/ref/allfiles.sgml	2004-07-27 10:29:09.000000000 -0400
***************
*** 88,96 ****
--- 88,99 ----
  <!entity notify             system "notify.sgml">
  <!entity prepare            system "prepare.sgml">
  <!entity reindex            system "reindex.sgml">
+ <!entity releaseSavepoint   system "release.sgml">
  <!entity reset              system "reset.sgml">
  <!entity revoke             system "revoke.sgml">
  <!entity rollback           system "rollback.sgml">
+ <!entity rollbackTo         system "rollback_to.sgml">
+ <!entity savepoint          system "savepoint.sgml">
  <!entity select             system "select.sgml">
  <!entity selectInto         system "select_into.sgml">
  <!entity set                system "set.sgml">
diff -Ncr --exclude-from=diff-ignore 00orig/doc/src/sgml/ref/begin.sgml 04pgproc/doc/src/sgml/ref/begin.sgml
*** 00orig/doc/src/sgml/ref/begin.sgml	2004-01-11 06:24:17.000000000 -0300
--- 04pgproc/doc/src/sgml/ref/begin.sgml	2004-07-27 10:29:09.000000000 -0400
***************
*** 145,150 ****
--- 145,151 ----
     <member><xref linkend="sql-commit" endterm="sql-commit-title"></member>
     <member><xref linkend="sql-rollback" endterm="sql-rollback-title"></member>
     <member><xref linkend="sql-start-transaction" endterm="sql-start-transaction-title"></member>
+    <member><xref linkend="sql-savepoint" endterm="sql-savepoint-title"></member>
    </simplelist>
   </refsect1>
  </refentry>
diff -Ncr --exclude-from=diff-ignore 00orig/doc/src/sgml/ref/release.sgml 04pgproc/doc/src/sgml/ref/release.sgml
*** 00orig/doc/src/sgml/ref/release.sgml	1969-12-31 21:00:00.000000000 -0300
--- 04pgproc/doc/src/sgml/ref/release.sgml	2004-07-27 10:29:09.000000000 -0400
***************
*** 0 ****
--- 1,138 ----
+ <!--
+ $PostgreSQL$
+ PostgreSQL documentation
+ -->
+ 
+ <refentry id="SQL-RELEASE">
+  <refmeta>
+   <refentrytitle id="SQL-RELEASE-TITLE">RELEASE</refentrytitle>
+   <refmiscinfo>SQL - Language Statements</refmiscinfo>
+  </refmeta>
+ 
+  <refnamediv>
+   <refname>RELEASE</refname>
+   <refpurpose>destroy a previously defined savepoint</refpurpose>
+  </refnamediv>
+ 
+  <indexterm zone="sql-release">
+   <primary>RELEASE</primary>
+  </indexterm>
+ 
+  <indexterm zone="sql-release">
+   <primary>savepoints</primary>
+   <secondary>releasing</secondary>
+  </indexterm>
+ 
+  <refsynopsisdiv>
+ <synopsis>
+ RELEASE <replaceable>savepoint_name</replaceable>
+ </synopsis>
+  </refsynopsisdiv>
+   
+  <refsect1>
+   <title>Description</title>
+ 
+   <para>
+    <command>RELEASE</command> destroys a previously defined savepoint
+    in the current transaction.
+   </para>
+ 
+   <para>
+    Destroying a savepoint makes it&mdash;and all savepoints established after
+    it was established&mdash;unavailable as rollback points,
+    but it has no other user visible behavior.  It does not undo the
+    effects of command executed after the savepoint was established.
+    To do that, see <xref linkend="sql-rollback-to"
+    endterm="sql-rollback-to-title">.
+   </para>
+ 
+   <para>
+    <command>RELEASE</command> also destroys all savepoints that were established
+    after the named savepoint was established.
+   </para>
+ 
+  <refsect1>
+   <title>Parameters</title>
+ 
+   <variablelist>
+    <varlistentry>
+     <term><replaceable>savepoint_name</replaceable></term>
+     <listitem>
+      <para>
+       The name of the savepoint to destroy.
+      </para>
+     </listitem>
+    </varlistentry>
+   </variablelist>
+  </refsect1>
+ 
+  <refsect1>
+   <title>Notes</title>
+ 
+   <para>
+    Specifying a savepoint name that was not previously defined raises
+    an exception.
+   </para>
+ 
+   <para>
+    It is not possible to release a savepoint when the transaction is in
+    aborted state.
+   </para>
+ 
+   <para>
+    If multiple savepoints have the same name, only the one that was last
+    defined is released.
+   </para>
+ 
+  </refsect1>
+ 
+  <refsect1>
+   <title>Examples</title>
+ 
+   <para>
+    To establish and later destroy a savepoint:
+ <programlisting>
+ BEGIN;
+     INSERT INTO table VALUES (3);
+     SAVEPOINT my_savepoint;
+     INSERT INTO table VALUES (4);
+     RELEASE my_savepoint;
+ COMMIT;
+  </refsect1>
+ 
+  <refsect1>
+   <title>Compatibility</title>
+   
+   <para>
+    RELEASE is fully conforming to the SQL standard.
+   </para>
+  </refsect1>
+ 
+  <refsect1>
+   <title>See Also</title>
+ 
+   <simplelist type="inline">
+    <member><xref linkend="sql-begin" endterm="sql-begin-title"></member>
+    <member><xref linkend="sql-rollback" endterm="sql-rollback-title"></member>
+    <member><xref linkend="sql-savepoint" endterm="sql-savepoint-title"></member>
+    <member><xref linkend="sql-commit" endterm="sql-commit-title"></member>
+   </simplelist>
+  </refsect1>
+ </refentry>
+ 
+ <!-- Keep this comment at the end of the file
+ Local variables:
+ mode: sgml
+ sgml-omittag:nil
+ sgml-shorttag:t
+ sgml-minimize-attributes:nil
+ sgml-always-quote-attributes:t
+ sgml-indent-step:1
+ sgml-indent-data:t
+ sgml-parent-document:nil
+ sgml-default-dtd-file:"../reference.ced"
+ sgml-exposed-tags:nil
+ sgml-local-catalogs:"/usr/lib/sgml/catalog"
+ sgml-local-ecat-files:nil
+ End:
+ -->
diff -Ncr --exclude-from=diff-ignore 00orig/doc/src/sgml/ref/rollback.sgml 04pgproc/doc/src/sgml/ref/rollback.sgml
*** 00orig/doc/src/sgml/ref/rollback.sgml	2003-11-29 16:51:39.000000000 -0300
--- 04pgproc/doc/src/sgml/ref/rollback.sgml	2004-07-27 10:29:09.000000000 -0400
***************
*** 90,95 ****
--- 90,96 ----
    <simplelist type="inline">
     <member><xref linkend="sql-begin" endterm="sql-begin-title"></member>
     <member><xref linkend="sql-commit" endterm="sql-commit-title"></member>
+    <member><xref linkend="sql-rollback-to" endterm="sql-rollback-to-title"></member>
    </simplelist>
   </refsect1>
  </refentry>
diff -Ncr --exclude-from=diff-ignore 00orig/doc/src/sgml/ref/rollback_to.sgml 04pgproc/doc/src/sgml/ref/rollback_to.sgml
*** 00orig/doc/src/sgml/ref/rollback_to.sgml	1969-12-31 21:00:00.000000000 -0300
--- 04pgproc/doc/src/sgml/ref/rollback_to.sgml	2004-07-27 10:29:09.000000000 -0400
***************
*** 0 ****
--- 1,158 ----
+ <!--
+ $PostgreSQL$
+ PostgreSQL documentation
+ -->
+ 
+ <refentry id="SQL-ROLLBACK-TO">
+  <refmeta>
+   <refentrytitle id="SQL-ROLLBACK-TO-TITLE">ROLLBACK TO</refentrytitle>
+   <refmiscinfo>SQL - Language Statements</refmiscinfo>
+  </refmeta>
+ 
+  <refnamediv>
+   <refname>ROLLBACK TO</refname>
+   <refpurpose>roll back to a savepoint</refpurpose>
+  </refnamediv>
+ 
+  <indexterm zone="sql-rollback-to">
+   <primary>ROLLBACK TO</primary>
+  </indexterm>
+ 
+  <indexterm zone="sql-rollback-to">
+   <primary>savepoints</primary>
+   <secondary>rolling back</secondary>
+  </indexterm>
+ 
+  <refsynopsisdiv>
+ <synopsis>
+ ROLLBACK TO <replaceable>savepoint_name</replaceable>
+ </synopsis>
+  </refsynopsisdiv>
+ 
+  <refsect1>
+   <title>Description</title>
+ 
+   <para>
+    Roll back all commands that were executed and destroy all savepoints that
+    were created after the savepoint was established.  The savepoint is
+    automatically established again.
+   </para>
+    
+  <refsect1>
+   <title>Parameters</title>
+ 
+   <variablelist>
+    <varlistentry>
+     <term><replaceable class="PARAMETER">savepoint_name</></term>
+     <listitem>
+      <para>
+       The savepoint to roll back to.
+      </para>
+     </listitem>
+    </varlistentry>
+   </variablelist>
+  </refsect1>
+ 
+  <refsect1>
+   <title>Notes</title>
+ 
+   <para>
+    Use <xref linkend="SQL-RELEASE" endterm="SQL-RELEASE-TITLE"> to
+    destroy a savepoint without discarding the effects of commands executed
+    after it was established.
+   </para>
+ 
+   <para>
+    Specifying a savepoint name that has not been established causes an
+    exception to be raised.
+   </para>
+ 
+   <para>
+    Cursors have somewhat non-transactional behavior with respect to
+    savepoints.  Any cursor that is opened inside the savepoint is not closed
+    when the savepoint is rolled back.  If a cursor is affected by a
+    <command>FETCH</> command inside a savepoint that is later rolled
+    back, the cursor position remains at the position that <command>FETCH</>
+    left it pointing to (that is, <command>FETCH</> is not rolled back).
+    A cursor whose execution causes a transaction to abort is put in a
+    can't-execute state, so while the transaction can be restored using
+    <command>ROLLBACK TO</>, the cursor no longer can be used.
+   </para>
+  </refsect1>
+ 
+  <refsect1>
+   <title>Examples</title>
+ 
+   <para>
+    To undo the effects of the commands executed after <literal>my_savepoint</literal>
+    was established, and establish <literal>my_savepoint</> again:
+ <programlisting>
+ ROLLBACK TO my_savepoint;
+ </programlisting>
+   </para>
+ 
+   <para>
+    Cursor positions are not affected by savepoint rollback:
+ <programlisting>
+ BEGIN;
+ 
+ DECLARE foo CURSOR FOR SELECT 1 UNION SELECT 2;
+ 
+ SAVEPOINT foo;
+ 
+ FETCH 1 FROM foo;
+  ?column? 
+ ----------
+         1
+ 
+ ROLLBACK TO foo;
+ 
+ FETCH 1 FROM foo;
+  ?column? 
+ ----------
+         2
+ 
+ COMMIT;
+ </programlisting>
+    </para>
+ 
+ 
+  </refsect1>
+ 
+  <refsect1>
+   <title>Compatibility</title>
+ 
+   <para>
+    This command is fully SQL standard conforming.
+   </para>
+  </refsect1>
+ 
+  <refsect1>
+   <title>See Also</title>
+ 
+   <simplelist type="inline">
+    <member><xref linkend="sql-begin" endterm="sql-begin-title"></member>
+    <member><xref linkend="sql-commit" endterm="sql-commit-title"></member>
+    <member><xref linkend="sql-savepoint" endterm="sql-savepoint-title"></member>
+    <member><xref linkend="sql-release" endterm="sql-release-title"></member>
+    <member><xref linkend="sql-rollback" endterm="sql-rollback-title"></member>
+   </simplelist>
+  </refsect1>
+ </refentry>
+ 
+ <!-- Keep this comment at the end of the file
+ Local variables:
+ mode: sgml
+ sgml-omittag:nil
+ sgml-shorttag:t
+ sgml-minimize-attributes:nil
+ sgml-always-quote-attributes:t
+ sgml-indent-step:1
+ sgml-indent-data:t
+ sgml-parent-document:nil
+ sgml-default-dtd-file:"../reference.ced"
+ sgml-exposed-tags:nil
+ sgml-local-catalogs:"/usr/lib/sgml/catalog"
+ sgml-local-ecat-files:nil
+ End:
+ -->
diff -Ncr --exclude-from=diff-ignore 00orig/doc/src/sgml/ref/savepoint.sgml 04pgproc/doc/src/sgml/ref/savepoint.sgml
*** 00orig/doc/src/sgml/ref/savepoint.sgml	1969-12-31 21:00:00.000000000 -0300
--- 04pgproc/doc/src/sgml/ref/savepoint.sgml	2004-07-27 10:29:09.000000000 -0400
***************
*** 0 ****
--- 1,153 ----
+ <!--
+ $PostgreSQL$
+ PostgreSQL documentation
+ -->
+ 
+ <refentry id="SQL-SAVEPOINT">
+  <refmeta>
+   <refentrytitle id="SQL-SAVEPOINT-TITLE">SAVEPOINT</refentrytitle>
+   <refmiscinfo>SQL - Language Statements</refmiscinfo>
+  </refmeta>
+ 
+  <refnamediv>
+   <refname>SAVEPOINT</refname>
+   <refpurpose>define a new savepoint within the current transaction</refpurpose>
+  </refnamediv>
+ 
+  <indexterm zone="sql-savepoint">
+   <primary>SAVEPOINT</primary>
+  </indexterm>
+ 
+  <indexterm zone="sql-savepoint">
+   <primary>savepoints</primary>
+   <secondary>defining</secondary>
+  </indexterm>
+ 
+  <refsynopsisdiv>
+ <synopsis>
+ SAVEPOINT <replaceable>savepoint_name</replaceable>
+ </synopsis>
+  </refsynopsisdiv>
+   
+  <refsect1>
+   <title>Description</title>
+ 
+   <para>
+    <command>SAVEPOINT</command> establishes a new savepoint within
+    the current transaction.
+   </para>
+ 
+  </refsect1>
+   
+  <refsect1>
+   <title>Parameters</title>
+ 
+   <variablelist>
+    <varlistentry>
+     <term><replaceable>savepoint_name</replaceable></term>
+     <listitem>
+      <para>
+       The name to give to the new savepoint.
+      </para>
+     </listitem>
+    </varlistentry>
+   </variablelist>
+  </refsect1>
+ 
+  <refsect1>
+   <title>Notes</title>
+ 
+   <para>
+    A savepoint is a special mark inside a transaction that allows all commands
+    that are executed after it was established to be rolled back.
+    Alternatively, a savepoint can be destroyed so that it isn't a possible
+    rollback destination anymore.  In this case, all commands that were executed after
+    the savepoint was established are preserved.
+   </para>
+ 
+   <para>
+    Use <xref linkend="SQL-ROLLBACK-TO" endterm="SQL-ROLLBACK-TO-TITLE"> to
+    rollback to a savepoint.  Use <xref linkend="SQL-RELEASE"
+    endterm="SQL-RELEASE-TITLE"> to destroy a savepoint, keeping
+    the effects of commands executed after it was established.
+   </para>
+ 
+   <para>
+    Savepoints can only be established when inside a transaction block.
+    Issuing <command>SAVEPOINT</> when not inside a transaction block
+    will cause an exception to be raised.
+   </para>
+ 
+   <para>
+    There can be multiple savepoints defined within a transaction.
+   </para>
+  </refsect1>
+ 
+  <refsect1>
+   <title>Examples</title>
+ 
+   <para>
+    To establish a savepoint and undo the effects of all commands executed
+    after it was established, keeping only the first inserted value
+    in the table:
+ <programlisting>
+ BEGIN;
+     INSERT INTO table VALUES (1);
+     SAVEPOINT my_savepoint;
+     INSERT INTO table VALUES (2);
+     ROLLBACK TO my_savepoint;
+ COMMIT;
+ </programlisting>
+   </para>
+ 
+   <para>
+    To establish and later destroy a savepoint, keeping both values in the table:
+ <programlisting>
+ BEGIN;
+     INSERT INTO table VALUES (3);
+     SAVEPOINT my_savepoint;
+     INSERT INTO table VALUES (4);
+     RELEASE my_savepoint;
+ COMMIT;
+  </refsect1>
+ 
+  <refsect1>
+   <title>Compatibility</title>
+   
+   <para>
+    SQL requires a savepoint to be automatically destroyed when another savepoint
+    with the same name is established.  In <productname>PostgreSQL</>, the old
+    savepoint is kept, though only the last one will be used when rolling back or
+    releasing.  Other than that, <command>SAVEPOINT</command> is fully SQL conforming.
+   </para>
+  </refsect1>
+ 
+  <refsect1>
+   <title>See Also</title>
+ 
+   <simplelist type="inline">
+    <member><xref linkend="sql-begin" endterm="sql-begin-title"></member>
+    <member><xref linkend="sql-rollback" endterm="sql-rollback-title"></member>
+    <member><xref linkend="sql-rollback-to" endterm="sql-rollback-to-title"></member>
+    <member><xref linkend="sql-release" endterm="sql-release-title"></member>
+    <member><xref linkend="sql-commit" endterm="sql-commit-title"></member>
+   </simplelist>
+  </refsect1>
+ </refentry>
+ 
+ <!-- Keep this comment at the end of the file
+ Local variables:
+ mode: sgml
+ sgml-omittag:nil
+ sgml-shorttag:t
+ sgml-minimize-attributes:nil
+ sgml-always-quote-attributes:t
+ sgml-indent-step:1
+ sgml-indent-data:t
+ sgml-parent-document:nil
+ sgml-default-dtd-file:"../reference.ced"
+ sgml-exposed-tags:nil
+ sgml-local-catalogs:"/usr/lib/sgml/catalog"
+ sgml-local-ecat-files:nil
+ End:
+ -->
diff -Ncr --exclude-from=diff-ignore 00orig/doc/src/sgml/ref/start_transaction.sgml 04pgproc/doc/src/sgml/ref/start_transaction.sgml
*** 00orig/doc/src/sgml/ref/start_transaction.sgml	2004-01-11 02:46:58.000000000 -0300
--- 04pgproc/doc/src/sgml/ref/start_transaction.sgml	2004-07-27 10:29:09.000000000 -0400
***************
*** 66,71 ****
--- 66,72 ----
     <member><xref linkend="sql-commit" endterm="sql-commit-title"></member>
     <member><xref linkend="sql-rollback" endterm="sql-rollback-title"></member>
     <member><xref linkend="sql-set-transaction" endterm="sql-set-transaction-title"></member>
+    <member><xref linkend="sql-savepoint" endterm="sql-savepoint-title"></member>
    </simplelist>
   </refsect1>
  </refentry>
diff -Ncr --exclude-from=diff-ignore 00orig/doc/src/sgml/reference.sgml 04pgproc/doc/src/sgml/reference.sgml
*** 00orig/doc/src/sgml/reference.sgml	2004-06-26 00:28:44.000000000 -0400
--- 04pgproc/doc/src/sgml/reference.sgml	2004-07-27 10:29:09.000000000 -0400
***************
*** 120,128 ****
--- 120,131 ----
     &notify;
     &prepare;
     &reindex;
+    &releaseSavepoint;
     &reset;
     &revoke;
     &rollback;
+    &rollbackTo;
+    &savepoint;
     &select;
     &selectInto;
     &set;
diff -Ncr --exclude-from=diff-ignore 00orig/src/backend/access/transam/README 04pgproc/src/backend/access/transam/README
*** 00orig/src/backend/access/transam/README	1969-12-31 21:00:00.000000000 -0300
--- 04pgproc/src/backend/access/transam/README	2004-07-28 19:18:11.856226058 -0400
***************
*** 0 ****
--- 1,224 ----
+ The Transaction System
+ ======================
+ 
+ xact.c
+ ------
+ 
+ PostgreSQL's transaction system is a three-layer system, implementing
+ low-level transactions and subtransactions, on top of which rests the
+ mainloop's control code, which in turn implements user-visible transactions
+ and savepoints.
+ 
+ The middle layer of code is called by postgres.c before and after the
+ processing of each query,
+ 
+ 		StartTransactionCommand
+ 		CommitTransactionCommand
+ 		AbortCurrentTransaction
+ 
+ Meanwhile, the user can alter the system's state by issuing the SQL commands
+ BEGIN, ROLLBACK, SAVEPOINT or RELEASE.  The traffic cop redirects these calls
+ to the toplevel routines
+ 
+ 		BeginTransactionBlock
+ 		EndTransactionBlock
+ 		UserAbortTransactionBlock
+ 		DefineSavepoint
+ 		RollbackToSavepoint
+ 		ReleaseSavepoint
+ 		RollbackAndReleaseSavepoint
+ 
+ Depending on the current state of the system, these function call low level
+ functions to activate the real transaction system,
+ 
+ 		StartTransaction
+ 		CommitTransaction
+ 		AbortTransaction
+ 		CleanupTransaction
+ 		StartSubTransaction
+ 		CommitSubTransaction
+ 		AbortSubTransaction
+ 		CleanupSubTransaction
+ 
+ Additionally, within a transaction, CommandCounterIncrement is called to
+ increment the command counter, which allows future commands to "see" the
+ effects of previous commands within the same transaction.  Note that this is
+ done automatically by CommitTransactionCommand after each query inside a
+ transaction block, but some utility functions also do it to allow some
+ operations (usually in the system catalogs) to be seen by future operations in
+ the same utility command processing (for example, in DefineRelation it is done
+ after creating the heap so the pg_class row is visible, to be able to lock
+ it).
+ 
+ 
+ For example, consider the following sequence of user commands:
+ 
+ 1)		BEGIN
+ 2)		SELECT * FROM foo
+ 3)		INSERT INTO foo VALUES (...)
+ 4)		COMMIT
+ 
+ In the main processing loop, this results in the following function call
+ sequence:
+ 
+ 	 /	StartTransactionCommand;
+ 	/	ProcessUtility;				<< BEGIN
+ 1) <		BeginTransactionBlock;
+ 	\			StartTransaction;
+ 	 \	CommitTransactionCommand;
+ 
+ 	/	StartTransactionCommand;
+ 2) /	ProcessQuery;					<< SELECT * FROM foo
+    \	CommitTransactionCommand;
+ 	\		CommandCounterIncrement;
+ 
+ 	/	StartTransactionCommand;
+ 3) /	ProcessQuery;					<< INSERT INTO foo VALUES (...)
+    \	CommitTransactionCommand;
+ 	\		CommandCounterIncrement;
+ 
+ 	 /	StartTransactionCommand;
+ 	/	ProcessUtility;				<< COMMIT
+ 4) <		EndTransactionBlock;
+ 	\			CommitTransaction;
+ 	 \	CommitTransactionCommand;
+ 
+ The point of this example is to demonstrate the need for
+ StartTransactionCommand and CommitTransactionCommand to be state smart -- they
+ should do nothing in between the calls to BeginTransactionBlock and
+ EndTransactionBlock and outside these calls they need to do normal start,
+ commit or abort processing.
+ 
+ Furthermore, suppose the "SELECT * FROM foo" caused an abort condition.	In
+ this case AbortCurrentTransaction is called, and the transaction is put in
+ aborted state.  In this state, any user input is ignored except for
+ transaction-termination statements, or ROLLBACK TO <savepoint> commands.
+ 
+ Transaction aborts can occur in two ways:
+ 
+ 1)	system dies from some internal cause  (syntax error, etc)
+ 2)	user types ROLLBACK
+ 
+ The reason we have to distinguish them is illustrated by the following two
+ situations:
+ 
+ 		case 1							case 2
+ 		------							------
+ 1) user types BEGIN				1) user types BEGIN
+ 2) user does something			2) user does something
+ 3) user does not like what		3) system aborts for some reason
+    she sees and types ABORT		   (syntax error, etc)
+ 
+ In case 1, we want to abort the transaction and return to the default state.
+ In case 2, there may be more commands coming our way which are part of the
+ same transaction block; we have to ignore these commands until we see a COMMIT
+ or ROLLBACK.
+ 
+ Internal aborts are handled by AbortCurrentTransaction, while user aborts are
+ handled by UserAbortTransactionBlock.  Both of them rely on AbortTransaction
+ to do all the real work.  The only difference is what state we enter after
+ AbortTransaction does its work:
+ 
+ * AbortCurrentTransaction leaves us in TBLOCK_ABORT,
+ * UserAbortTransactionBlock leaves us in TBLOCK_ENDABORT
+ 
+ Low-level transaction abort handling is divided in two phases:
+ * AbortTransaction executes as soon as we realize the transaction has
+   failed.  It should release all shared resources (locks etc) so that we do
+   not delay other backends unnecessarily.
+ * CleanupTransaction executes when we finally see a user COMMIT
+   or ROLLBACK command; it cleans things up and gets us out of the transaction
+   internally.  In particular, we mustn't destroy TopTransactionContext until
+   this point.
+ 
+ Also, note that when a transaction is committed, we don't close it right away.
+ Rather it's put in TBLOCK_END state, which means that when
+ CommitTransactionCommand is called after the query has finished processing,
+ the transaction has to be closed.  The distinction is subtle but important,
+ because it means that control will leave the xact.c code with the transaction
+ open, and the main loop will be able to keep processing inside the same
+ transaction.  So, in a sense, transaction commit is also handled in two
+ phases, the first at EndTransactionBlock and the second at
+ CommitTransactionCommand.
+ 
+ The rest of the code in xact.c are routines to support the creation and
+ finishing of transactions and subtransactions.  For example, AtStart_Memory
+ takes care of initializing the memory subsystem at main transaction start.
+ 
+ 
+ Subtransaction handling
+ -----------------------
+ 
+ Subtransactions are implemented using a stack of TransactionState structures,
+ which has a pointer to its parent transaction.  When a new subtransaction is
+ to be opened, PushTransaction is called, which creates a new TransactionState,
+ with its parent pointing to the current transaction.  StartSubTransaction is
+ in charge of initializing the new TransactionState to sane values, and
+ properly initializing other subsystems.
+ 
+ When closing a subtransaction, either CommitSubTransaction has to be called
+ (if the subtransaction is committing), or AbortSubTransaction and
+ CleanupSubTransaction (if it's aborting).  In either case, PopTransaction is
+ called so the system returns to the parent transaction.
+ 
+ One important point regarding subtransaction handling is that several may need
+ to be closed in response to a single user command.  That's because savepoints
+ have names, and we allow to commit or rollback a savepoint by name, which not
+ necessarily is the one that was last opened.  In the case of subtransaction
+ commit this is not a problem, and we close all the involved subtransactions
+ right away by calling CommitTransactionToLevel, which in turn calls
+ CommitSubTransaction and PopTransaction as many times as needed.
+ 
+ In the case of subtransaction abort (when the user issues ROLLBACK TO
+ <savepoint>), things are not so easy.  We have to keep the subtransactions
+ open and return control to the main loop.  So what RollbackToSavepoint does is
+ abort the innermost subtransaction and put it in TBLOCK_SUBENDABORT state, and
+ put the rest in TBLOCK_SUBABORT_PENDING state.  Then we return control to the
+ main loop, which will in turn return control to us by calling
+ CommitTransactionCommand.  At this point we can close all subtransactions that
+ are marked with the "abort pending" state.
+ 
+ 
+ 
+ pg_clog and pg_subtrans
+ -----------------------
+ 
+ pg_clog and pg_subtrans are permanent (on-disk) storage of transaction related
+ information.  There is a limited number of pages of each kept in memory, so
+ in many cases there is no need to actually read from disk.  However, if
+ there's a long running transaction or a backend sitting idle with an open
+ transaction, it's necessary to be able to read and write this information from
+ disk.  They also allow information to be permanent across server restarts.
+ 
+ pg_clog records the commit status for each transaction.  A transaction can be
+ in progress, committed, aborted, or "sub-committed".  This last state means
+ that it's a subtransaction that's no longer running, but it's parent has not
+ updated its state yet (either it is still running, or the backend crashed
+ without updating its status).  A sub-committed transaction's status will be
+ updated again to the final value as soon as the parent commits or aborts, or
+ when the parent is detected to be aborted.
+ 
+ Savepoints are implemented using subtransactions.  A subtransaction is a
+ transaction inside a transaction; it gets its own TransactionId, but its
+ commit or abort status is not only dependent on whether it committed itself,
+ but also whether its parent transaction committed.  To implement multiple
+ savepoints in a transaction we allow unlimited transaction nesting depth, so
+ any particular subtransaction's commit state is dependent on the commit status
+ of each and every ancestor transaction.
+ 
+ The "subtransaction parent" (pg_subtrans) mechanism records, for each
+ transaction, the TransactionId of its parent transaction.  This information is
+ stored as soon as the subtransaction is created.
+ 
+ pg_subtrans is used to know whether the transaction in question is still
+ running --- the main Xid of a transaction is recorded in the PGPROC struct,
+ but since we allow arbitrary nesting of subtransactions, we can't fit all Xids
+ in shared memory, so we have to store them on disk.  Note, however, that for
+ each transaction we keep a "cache" of Xids that are known to be part of the
+ transaction tree, so we can skip looking at pg_subtrans unless we know the
+ cache has been overflowed.  See storage/ipc/sinval.c for the gory details.
+ 
+ slru.c is the supporting mechanism for both pg_clog and pg_subtrans.  It
+ implements the LRU policy for in-memory buffer pages.  The high-level routines
+ for pg_clog are implemented transam.c, while the low-level functions are in
+ clog.c.  pg_subtrans is contained completely in subtrans.c.
diff -Ncr --exclude-from=diff-ignore 00orig/src/backend/access/transam/xact.c 04pgproc/src/backend/access/transam/xact.c
*** 00orig/src/backend/access/transam/xact.c	2004-07-28 14:36:40.351828862 -0400
--- 04pgproc/src/backend/access/transam/xact.c	2004-07-28 14:24:59.073029982 -0400
***************
*** 10,141 ****
   * IDENTIFICATION
   *	  $PostgreSQL: pgsql-server/src/backend/access/transam/xact.c,v 1.172 2004/07/27 05:10:49 tgl Exp $
   *
-  * NOTES
-  *		Transaction aborts can now occur two ways:
-  *
-  *		1)	system dies from some internal cause  (syntax error, etc..)
-  *		2)	user types ABORT
-  *
-  *		These two cases used to be treated identically, but now
-  *		we need to distinguish them.  Why?	consider the following
-  *		two situations:
-  *
-  *				case 1							case 2
-  *				------							------
-  *		1) user types BEGIN				1) user types BEGIN
-  *		2) user does something			2) user does something
-  *		3) user does not like what		3) system aborts for some reason
-  *		   she sees and types ABORT
-  *
-  *		In case 1, we want to abort the transaction and return to the
-  *		default state.	In case 2, there may be more commands coming
-  *		our way which are part of the same transaction block and we have
-  *		to ignore these commands until we see a COMMIT transaction or
-  *		ROLLBACK.
-  *
-  *		Internal aborts are now handled by AbortTransactionBlock(), just as
-  *		they always have been, and user aborts are now handled by
-  *		UserAbortTransactionBlock().  Both of them rely on AbortTransaction()
-  *		to do all the real work.  The only difference is what state we
-  *		enter after AbortTransaction() does its work:
-  *
-  *		* AbortTransactionBlock() leaves us in TBLOCK_ABORT and
-  *		* UserAbortTransactionBlock() leaves us in TBLOCK_ENDABORT
-  *
-  *		Low-level transaction abort handling is divided into two phases:
-  *		* AbortTransaction() executes as soon as we realize the transaction
-  *		  has failed.  It should release all shared resources (locks etc)
-  *		  so that we do not delay other backends unnecessarily.
-  *		* CleanupTransaction() executes when we finally see a user COMMIT
-  *		  or ROLLBACK command; it cleans things up and gets us out of
-  *		  the transaction internally.  In particular, we mustn't destroy
-  *		  TopTransactionContext until this point.
-  *
-  *	 NOTES
-  *		The essential aspects of the transaction system are:
-  *
-  *				o  transaction id generation
-  *				o  transaction log updating
-  *				o  memory cleanup
-  *				o  cache invalidation
-  *				o  lock cleanup
-  *
-  *		Hence, the functional division of the transaction code is
-  *		based on which of the above things need to be done during
-  *		a start/commit/abort transaction.  For instance, the
-  *		routine AtCommit_Memory() takes care of all the memory
-  *		cleanup stuff done at commit time.
-  *
-  *		The code is layered as follows:
-  *
-  *				StartTransaction
-  *				CommitTransaction
-  *				AbortTransaction
-  *				CleanupTransaction
-  *
-  *		are provided to do the lower level work like recording
-  *		the transaction status in the log and doing memory cleanup.
-  *		above these routines are another set of functions:
-  *
-  *				StartTransactionCommand
-  *				CommitTransactionCommand
-  *				AbortCurrentTransaction
-  *
-  *		These are the routines used in the postgres main processing
-  *		loop.  They are sensitive to the current transaction block state
-  *		and make calls to the lower level routines appropriately.
-  *
-  *		Support for transaction blocks is provided via the functions:
-  *
-  *				BeginTransactionBlock
-  *				CommitTransactionBlock
-  *				AbortTransactionBlock
-  *
-  *		These are invoked only in response to a user "BEGIN WORK", "COMMIT",
-  *		or "ROLLBACK" command.	The tricky part about these functions
-  *		is that they are called within the postgres main loop, in between
-  *		the StartTransactionCommand() and CommitTransactionCommand().
-  *
-  *		For example, consider the following sequence of user commands:
-  *
-  *		1)		begin
-  *		2)		select * from foo
-  *		3)		insert into foo (bar = baz)
-  *		4)		commit
-  *
-  *		in the main processing loop, this results in the following
-  *		transaction sequence:
-  *
-  *			/	StartTransactionCommand();
-  *		1) /	ProcessUtility();				<< begin
-  *		   \		BeginTransactionBlock();
-  *			\	CommitTransactionCommand();
-  *
-  *			/	StartTransactionCommand();
-  *		2) <	ProcessQuery();					<< select * from foo
-  *			\	CommitTransactionCommand();
-  *
-  *			/	StartTransactionCommand();
-  *		3) <	ProcessQuery();					<< insert into foo (bar = baz)
-  *			\	CommitTransactionCommand();
-  *
-  *			/	StartTransactionCommand();
-  *		4) /	ProcessUtility();				<< commit
-  *		   \		CommitTransactionBlock();
-  *			\	CommitTransactionCommand();
-  *
-  *		The point of this example is to demonstrate the need for
-  *		StartTransactionCommand() and CommitTransactionCommand() to
-  *		be state smart -- they should do nothing in between the calls
-  *		to BeginTransactionBlock() and EndTransactionBlock() and
-  *		outside these calls they need to do normal start/commit
-  *		processing.
-  *
-  *		Furthermore, suppose the "select * from foo" caused an abort
-  *		condition.	We would then want to abort the transaction and
-  *		ignore all subsequent commands up to the "commit".
-  *		-cim 3/23/90
-  *
   *-------------------------------------------------------------------------
   */
  
--- 10,15 ----
***************
*** 1020,1025 ****
--- 894,901 ----
  		TransactionIdAbortTree(nchildren, children);
  		TransactionIdAbort(xid);
  
+ 		XidCacheClean();
+ 
  		END_CRIT_SECTION();
  	}
  
***************
*** 1159,1164 ****
--- 1035,1042 ----
  		TransactionIdAbortTree(nchildren, children);
  		TransactionIdAbort(xid);
  
+ 		XidCacheRemoveRunningXids(nchildren, children, xid);
+ 
  		END_CRIT_SECTION();
  	}
  
***************
*** 1390,1395 ****
--- 1268,1278 ----
  		MyProc->xid = InvalidTransactionId;
  		MyProc->xmin = InvalidTransactionId;
  		LWLockRelease(SInvalLock);
+ 
+ 		/*
+ 		 * Clean up the Xid cache.
+ 		 */
+ 		XidCacheClean();
  	}
  
  	/*
***************
*** 1589,1595 ****
  	 * State should still be TRANS_ABORT from AbortTransaction().
  	 */
  	if (s->state != TRANS_ABORT)
! 		elog(FATAL, "CleanupTransaction and not in abort state");
  
  	/*
  	 * do abort cleanup processing
--- 1472,1479 ----
  	 * State should still be TRANS_ABORT from AbortTransaction().
  	 */
  	if (s->state != TRANS_ABORT)
! 		elog(FATAL, "CleanupTransaction while in %s state",
! 			 TransStateAsString(s->state));
  
  	/*
  	 * do abort cleanup processing
***************
*** 2897,2902 ****
--- 2781,2793 ----
  	XactLockTableInsert(s->transactionIdData);
  
  	/*
+ 	 * Ideally, we would only cache Xids of subtransactions that write tuples
+ 	 * in permanent storage.  We have no clean way of knowing that, however
+ 	 * (much less in advance ...)
+ 	 */
+ 	XidCacheAddRunningXid(s->transactionIdData);
+ 
+ 	/*
  	 * Finish setup of other transaction state fields.
  	 */
  	s->currentUser = GetUserId();
diff -Ncr --exclude-from=diff-ignore 00orig/src/backend/storage/ipc/sinval.c 04pgproc/src/backend/storage/ipc/sinval.c
*** 00orig/src/backend/storage/ipc/sinval.c	2004-06-30 23:50:07.000000000 -0400
--- 04pgproc/src/backend/storage/ipc/sinval.c	2004-07-28 19:44:01.587585261 -0400
***************
*** 27,32 ****
--- 27,53 ----
  #include "utils/tqual.h"
  #include "miscadmin.h"
  
+ #ifdef XIDCACHE_DEBUG
+ static void
+ DisplayXidCache(int code, Datum arg);
+ 
+ /* counters for XidCache measurement */
+ static int xc_by_recent_xmin = 0;
+ static int xc_by_main_xid = 0;
+ static int xc_by_child_xid = 0;
+ static int xc_slow_answer = 0;
+ #define xc_by_recent_xmin_inc	xc_by_recent_xmin++
+ #define xc_by_main_xid_inc		xc_by_main_xid++
+ #define xc_by_child_xid_inc		xc_by_child_xid++
+ #define xc_slow_answer_inc		xc_slow_answer++
+ 
+ #else /* XIDCACHE_DEBUG */
+ 
+ #define xc_by_recent_xmin_inc
+ #define xc_by_main_xid_inc
+ #define xc_by_child_xid_inc
+ #define xc_slow_answer_inc
+ #endif /* XIDCACHE_DEBUG */
  
  /*
   * Because backends sitting idle will not be reading sinval events, we
***************
*** 80,85 ****
--- 101,110 ----
  		ereport(FATAL,
  				(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
  				 errmsg("sorry, too many clients already")));
+ 
+ #ifdef XIDCACHE_DEBUG
+ 	on_proc_exit(DisplayXidCache, (Datum) NULL);
+ #endif /* XIDCACHE_DEBUG */
  }
  
  /*
***************
*** 444,451 ****
   *
   * SInvalLock has to be held while we do 1 and 2.  If we save all the Xids
   * while doing 1, we can release the SInvalLock while we do 3.  This buys back
!  * some concurrency (we can't retrieve the main Xids from PGPROC again anyway,
!  * see GetNewTransactionId)
   */
  bool
  TransactionIdIsInProgress(TransactionId xid)
--- 469,476 ----
   *
   * SInvalLock has to be held while we do 1 and 2.  If we save all the Xids
   * while doing 1, we can release the SInvalLock while we do 3.  This buys back
!  * some concurrency (we can't retrieve the main Xids from PGPROC again anyway;
!  * see GetNewTransactionId).
   */
  bool
  TransactionIdIsInProgress(TransactionId xid)
***************
*** 453,465 ****
  	bool			result = false;
  	SISeg		   *segP = shmInvalBuffer;
  	ProcState	   *stateP = segP->procState;
! 	int				i;
! 	int				nxids = 0;
  	TransactionId  *xids;
  
! 	xids = (TransactionId *)palloc(sizeof(TransactionId) * segP->maxBackends);
  
  	LWLockAcquire(SInvalLock, LW_SHARED);
  
  	for (i = 0; i < segP->lastBackend; i++)
  	{
--- 478,502 ----
  	bool			result = false;
  	SISeg		   *segP = shmInvalBuffer;
  	ProcState	   *stateP = segP->procState;
! 	int				i,
! 					j;
  	TransactionId  *xids;
+ 	bool			locked;
+ 	bool			overflowed = false;
+ 
+ 	/*
+ 	 * Don't bother checking a very old transaction.
+ 	 */
+ 	if (TransactionIdPrecedes(xid, RecentGlobalXmin))
+ 	{
+ 		xc_by_recent_xmin_inc;
+ 		return false;
+ 	}
  
! 	xids = (TransactionId *) palloc(sizeof(TransactionId) * segP->maxBackends);
  
  	LWLockAcquire(SInvalLock, LW_SHARED);
+ 	locked = true;
  
  	for (i = 0; i < segP->lastBackend; i++)
  	{
***************
*** 473,545 ****
  			TransactionId pxid = proc->xid;
  
  			/*
! 			 * check the main Xid (step 1 above)
  			 */
  			if (TransactionIdEquals(pxid, xid))
  			{
  				result = true;
! 				break;
  			}
  
! 			/*
! 			 * save the main Xid for step 3.
! 			 */
! 			xids[nxids++] = pxid;
  
! #ifdef NOT_USED
! 			FIXME -- waiting to save the Xids in PGPROC ...
  
  			/*
! 			 * check the saved Xids array (step 2)
  			 */
! 			for (j = 0; j < PGPROC_MAX_SAVED_XIDS; j++)
  			{
! 				pxid = proc->savedxids[j];
  
! 				if (!TransactionIdIsValid(pxids))
! 					break;
  
  				if (TransactionIdEquals(pxid, xid))
  				{
  					result = true;
! 					break;
  				}
  			}
- #endif
- 
- 			if (result)
- 				break;
  		}
  	}
  
  	LWLockRelease(SInvalLock);
  
  	/*
  	 * Step 3: have to check pg_subtrans.  Use the saved Xids.
  	 *
! 	 * XXX Could save the cached Xids too for further improvement.
  	 */
! 	if (!result)
  	{
! 		/* this is a potentially expensive call. */
! 		xid = SubTransGetTopmostTransaction(xid);
! 		
! 		Assert(TransactionIdIsValid(xid));
  
! 		/*
! 		 * We don't care if it aborted, because if it did, we won't find
! 		 * it in the array.
! 		 */
! 		for (i = 0; i < nxids; i++)
  		{
! 			if (TransactionIdEquals(xids[i], xid))
! 			{
! 				result = true;
! 				break;
! 			}
  		}
  	}
  
  	pfree(xids);
  
  	return result;
--- 510,629 ----
  			TransactionId pxid = proc->xid;
  
  			/*
! 			 * Step 1: check the main Xid
  			 */
  			if (TransactionIdEquals(pxid, xid))
  			{
+ 				xc_by_main_xid_inc;
  				result = true;
! 				goto result_known;
  			}
  
! 			/* save the main Xid for step 3. */
! 			xids[i] = pxid;
  
! 			if (proc->cache.overflow)
! 				overflowed = true;
  
  			/*
! 			 * Step 2: check the cached Xids arrays
  			 */
! 			for (j = 0; j < PGPROC_MAX_CACHED_SUBXIDS; j++)
  			{
! 				pxid = proc->cache.xids[j];
  
! 				if (!TransactionIdIsValid(pxid))
! 					continue;
  
  				if (TransactionIdEquals(pxid, xid))
  				{
+ 					xc_by_child_xid_inc;
  					result = true;
! 					goto result_known;
  				}
  			}
  		}
  	}
  
  	LWLockRelease(SInvalLock);
+ 	locked = false;
+ 
+ 	/*
+ 	 * If none of the caches overflowed, we know the Xid is
+ 	 * not running without looking at pg_subtrans.
+ 	 */
+ 	if (!overflowed)
+ 		goto result_known;
  
  	/*
  	 * Step 3: have to check pg_subtrans.  Use the saved Xids.
+ 	 */
+ 	xc_slow_answer_inc;
+ 
+ 	/*
+ 	 * At this point, we know it's either a subtransaction or
+ 	 * it's not running.  If it's a subtransaction, we have to
+ 	 * check whether it's part of a running subtransaction tree
+ 	 * or it was aborted.  So we have to look at pg_clog, but
+ 	 * since we already checked the PGPROC array we don't have to
+ 	 * worry about a race condition.
+ 	 */
+ 	if (TransactionIdDidAbort(xid))
+ 	{
+ 		result = false;
+ 		goto result_known;
+ 	}
+ 
+ 	/*
+ 	 * It isn't aborted, so check whether the transaction tree it
+ 	 * belongs to is still running (or, more precisely, whether it
+ 	 * was running when this routine started -- note that we just
+ 	 * released SInvalLock.)
+ 	 */
+ 	xid = SubTransGetTopmostTransaction(xid);
+ 	Assert(TransactionIdIsValid(xid));
+ 
+ 	for (i = 0; i < segP->maxBackends; i++)
+ 	{
+ 		if (TransactionIdEquals(xids[i], xid))
+ 		{
+ 			result = true;
+ 			break;
+ 		}
+ 	}
+ 
+ 	/*
+ 	 * pg_subtrans says it's running in the i-nd backend.  Check if it's
+ 	 * still true.
  	 *
! 	 * This is strictly not needed, but 1) an eternity has passed since
! 	 * the main Xid was read (we took a peek at both pg_clog and pg_subtrans
! 	 * in the meantime), and 2) it's a cheap test.
  	 */
! 	if (result)
  	{
! 		LWLockAcquire(SInvalLock, LW_SHARED);
! 		locked = true;
  
! 		SHMEM_OFFSET pOffset = stateP[i].procStruct;
! 
! 		if (pOffset != INVALID_OFFSET)
  		{
! 			PGPROC	   *proc = (PGPROC *) MAKE_PTR(pOffset);
! 
! 			/* Fetch xid just once - see GetNewTransactionId */
! 			TransactionId pxid = proc->xid;
! 
! 			if (!TransactionIdEquals(pxid, xid))
! 				result = false;
  		}
  	}
  
+ result_known: ;
+ 
+ 	if (locked)
+ 		LWLockRelease(SInvalLock);
+ 
  	pfree(xids);
  
  	return result;
***************
*** 794,799 ****
--- 878,885 ----
  
  	snapshot->curcid = GetCurrentCommandId();
  
+ 	DisplayXidCache(0, 0);
+ 
  	return snapshot;
  }
  
***************
*** 928,930 ****
--- 1014,1128 ----
  
  	return count;
  }
+ 
+ /*
+  * XidCacheAddRunningXid
+  *
+  * Add a TransactionId to the list of known-running transactions.
+  * If there is no space in the cache, mark overflow and return.
+  */
+ void
+ XidCacheAddRunningXid(TransactionId xid)
+ {
+ 	LWLockAcquire(SInvalLock, LW_SHARED);
+ 
+ 	if (MyProc->cache.nxids >= PGPROC_MAX_CACHED_SUBXIDS)
+ 	{
+ 		MyProc->cache.overflow = true;
+ 		LWLockRelease(SInvalLock);
+ 		return;
+ 	}
+ 
+ 	TransactionIdStore(xid, &(MyProc->cache.xids[MyProc->cache.nxids]));
+ 	MyProc->cache.nxids ++;
+ 	LWLockRelease(SInvalLock);
+ }
+ 
+ #define XidCacheRemove(i) \
+ 	do { \
+ 		TransactionIdStore(InvalidTransactionId, &(MyProc->cache.xids[i])); \
+ 		MyProc->cache.nxids --; \
+ 	} while (0);
+ 
+ /*
+  * XidCacheRemoveRunningXids
+  *
+  * Remove a bunch of TransactionIds from the list of known-running
+  * transactions.
+  */
+ void
+ XidCacheRemoveRunningXids(int nxids, TransactionId *xids, TransactionId xid)
+ {
+ 	int		i, j;
+ 
+ 	Assert(!TransactionIdEquals(xid, InvalidTransactionId));
+ 
+ 	LWLockAcquire(SInvalLock, LW_SHARED);
+ 
+ 	for (i = 0; i < nxids; i++)
+ 	{
+ 		for (j = 0; j < PGPROC_MAX_CACHED_SUBXIDS; j++)
+ 		{
+ 			if (TransactionIdEquals(MyProc->cache.xids[j], xids[i]))
+ 			{
+ 				XidCacheRemove(j);
+ 				break;
+ 			}
+ 		}
+ 	}
+ 
+ 	for (j = 0; j < PGPROC_MAX_CACHED_SUBXIDS; j++)
+ 	{
+ 		if (TransactionIdEquals(MyProc->cache.xids[j], xid))
+ 		{
+ 			XidCacheRemove(j);
+ 			break;
+ 		}
+ 	}
+ 
+ 	LWLockRelease(SInvalLock);
+ }
+ 
+ /*
+  * XidCacheClean
+  *
+  * Fast cache cleanup at transaction end.
+  */
+ void
+ XidCacheClean(void)
+ {
+ 	LWLockAcquire(SInvalLock, LW_SHARED);
+ 
+ 	MyProc->cache.overflow = false;
+ 	MyProc->cache.nxids = 0;
+ 	MemSet(MyProc->cache.xids, '\0',
+ 		   PGPROC_MAX_CACHED_SUBXIDS * sizeof(TransactionId));
+ 
+ 	LWLockRelease(SInvalLock);
+ }
+ 
+ #ifdef XIDCACHE_DEBUG
+ static void
+ DisplayXidCache(int code, Datum arg)
+ {
+ 	int i;
+ 	if (MyProc == NULL)
+ 	{
+ 		fprintf(stderr,"XidCache: xmin: %d, mainxid: %d, childxid: %d, slow: %d\n",
+ 				xc_by_recent_xmin,
+ 				xc_by_main_xid,
+ 				xc_by_child_xid,
+ 				xc_slow_answer);
+ 	}
+ 	else
+ 	{
+ 		fprintf(stderr, "(%s) children:\t",
+ 				MyProc->cache.overflow ? "overf" : "no overf");
+ 		for (i = 0; i < PGPROC_MAX_CACHED_SUBXIDS; i++)
+ 		{
+ 			fprintf(stderr, "%d ", MyProc->cache.xids[i]);
+ 		}
+ 		fprintf(stderr, "\n");
+ 	}
+ }
+ #endif /* XIDCACHE_DEBUG */
diff -Ncr --exclude-from=diff-ignore 00orig/src/backend/tcop/utility.c 04pgproc/src/backend/tcop/utility.c
*** 00orig/src/backend/tcop/utility.c	2004-07-26 21:59:36.000000000 -0400
--- 04pgproc/src/backend/tcop/utility.c	2004-07-27 10:29:30.000000000 -0400
***************
*** 326,333 ****
  				{
  					/*
  					 * START TRANSACTION, as defined by SQL99:
! 					 * Identical to BEGIN, except that it takes a few
! 					 * additional options.  Same code for both.
  					 */
  					case TRANS_STMT_BEGIN:
  					case TRANS_STMT_START:
--- 326,332 ----
  				{
  					/*
  					 * START TRANSACTION, as defined by SQL99:
! 					 * Identical to BEGIN.  Same code for both.
  					 */
  					case TRANS_STMT_BEGIN:
  					case TRANS_STMT_START:
diff -Ncr --exclude-from=diff-ignore 00orig/src/include/access/htup.h 04pgproc/src/include/access/htup.h
*** 00orig/src/include/access/htup.h	2004-07-17 18:10:20.000000000 -0400
--- 04pgproc/src/include/access/htup.h	2004-07-27 10:52:44.000000000 -0400
***************
*** 68,101 ****
   *			object ID (if HEAP_HASOID is set in t_infomask)
   *			user data fields
   *
!  * We store five "virtual" fields Xmin, Cmin, Xmax, Cmax, and Xvac
!  * in just three physical fields.  Xmin is always really stored, but
!  * Cmin and Xmax share a field, as do Cmax and Xvac.  This works because
!  * we know that there are only a limited number of states that a tuple can
!  * be in, and that Cmin and Cmax are only interesting for the lifetime of
!  * the inserting and deleting transactions respectively.  We have the
!  * following possible states of a tuple:
!  *
!  *		XMIN		CMIN		XMAX		CMAX		XVAC
!  *
!  * NEW (never deleted, not moved by vacuum):
!  *		valid		valid		invalid		invalid		invalid
!  *
!  * DELETED BY CREATING XACT:
!  *		valid		valid		= XMIN		valid		invalid
!  *
!  * DELETED BY OTHER XACT:
!  *		valid		unneeded	valid		valid		invalid
!  *
!  * MOVED BY VACUUM FULL:
!  *		valid		unneeded	maybe-valid unneeded	valid
!  *
!  * This assumes that VACUUM FULL never tries to move a tuple whose Cmin or
!  * Cmax is still interesting (ie, insert-in-progress or delete-in-progress).
!  *
!  * This table shows that if we use an infomask bit to handle the case
!  * XMAX=XMIN specially, we never need to store Cmin and Xmax at the same
!  * time.  Nor do we need to store Cmax and Xvac at the same time.
   *
   * Following the fixed header fields, the nulls bitmap is stored (beginning
   * at t_bits).	The bitmap is *not* stored if t_infomask shows that there
--- 68,84 ----
   *			object ID (if HEAP_HASOID is set in t_infomask)
   *			user data fields
   *
!  * We store five "virtual" fields Xmin, Cmin, Xmax, Cmax, and Xvac in four
!  * physical fields.  Xmin, Cmin and Xmax are always really stored, but
!  * Cmax and Xvac share a field.  This works because we know that there are
!  * only a limited number of states that a tuple can be in, and that Cmax
!  * is only interesting for the lifetime of the deleting transactions
!  * respectively.  This assumes that VACUUM FULL never tries to move a
!  * tuple whose Cmax is still interesting (ie, delete-in-progress).
!  *
!  * Note that in 7.3 and 7.4 a similar idea was applied to Xmax and Cmin.
!  * However, with the advent of subtransactions, a tuple may need both Xmax
!  * and Cmin simultaneously, so this is no longer possible.
   *
   * Following the fixed header fields, the nulls bitmap is stored (beginning
   * at t_bits).	The bitmap is *not* stored if t_infomask shows that there
***************
*** 424,430 ****
  #define XLOG_HEAP_MOVE		0x30
  #define XLOG_HEAP_CLEAN		0x40
  #define XLOG_HEAP_NEWPAGE	0x50
! /* opcodes 0x60, 0x70 still free */
  #define XLOG_HEAP_OPMASK	0x70
  /*
   * When we insert 1st item on new page in INSERT/UPDATE
--- 407,413 ----
  #define XLOG_HEAP_MOVE		0x30
  #define XLOG_HEAP_CLEAN		0x40
  #define XLOG_HEAP_NEWPAGE	0x50
! /* opcode 0x60 still free */
  #define XLOG_HEAP_OPMASK	0x70
  /*
   * When we insert 1st item on new page in INSERT/UPDATE
diff -Ncr --exclude-from=diff-ignore 00orig/src/include/storage/proc.h 04pgproc/src/include/storage/proc.h
*** 00orig/src/include/storage/proc.h	2004-07-26 21:59:44.000000000 -0400
--- 04pgproc/src/include/storage/proc.h	2004-07-28 14:41:03.827806803 -0400
***************
*** 19,24 ****
--- 19,44 ----
  #include "storage/lock.h"
  #include "storage/pg_sema.h"
  
+ /*
+  * XXX This number is made up ...
+  */
+ #define PGPROC_MAX_CACHED_SUBXIDS 32
+ 
+ /*
+  * Each backend keeps track of (some of) its subtransactions'
+  * TransactionIds in the PGPROC struct.
+  *
+  * We also keep track of whether the cache overflowed.  If it
+  * hasn't overflowed, we can assume that a Xid that's not present
+  * in the cache is not a running transaction.  Else we have to look
+  * at pg_subtrans.
+  */
+ struct XidCache {
+ 	/* running Xids cache */
+ 	int				nxids;
+ 	TransactionId	xids[PGPROC_MAX_CACHED_SUBXIDS];
+ 	bool			overflow;
+ };
  
  /*
   * Each backend has a PGPROC struct in shared memory.  There is also a list of
***************
*** 39,44 ****
--- 59,66 ----
  	TransactionId xid;			/* transaction currently being executed by
  								 * this proc */
  
+ 	struct XidCache	cache;			/* Xid cache */
+ 
  	TransactionId xmin;			/* minimal running XID as it was when we
  								 * were starting our xact: vacuum must not
  								 * remove tuples deleted by xid >= xmin ! */
diff -Ncr --exclude-from=diff-ignore 00orig/src/include/storage/sinval.h 04pgproc/src/include/storage/sinval.h
*** 00orig/src/include/storage/sinval.h	2004-06-03 15:59:03.000000000 -0400
--- 04pgproc/src/include/storage/sinval.h	2004-07-27 23:30:29.000000000 -0400
***************
*** 115,118 ****
--- 115,123 ----
  extern void EnableCatchupInterrupt(void);
  extern bool DisableCatchupInterrupt(void);
  
+ /* Xid cache updaters */
+ extern void XidCacheAddRunningXid(TransactionId xid);
+ extern void XidCacheRemoveRunningXids(int nxids, TransactionId *xids, TransactionId xid);
+ extern void XidCacheClean(void);
+ 
  #endif   /* SINVAL_H */