Re: transaction block: server closed the connection unexpectedly

From: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>
To: Alvaro Herrera <alvherre(at)dcc(dot)uchile(dot)cl>
Cc: Koju Iijima <koju(at)fast(dot)fujitsu(dot)com(dot)au>, pgsql-bugs(at)postgresql(dot)org
Subject: Re: transaction block: server closed the connection unexpectedly
Date: 2004-09-06 18:00:24
Message-ID: 17356.1094493624@sss.pgh.pa.us
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-bugs

Alvaro Herrera <alvherre(at)dcc(dot)uchile(dot)cl> writes:
> The problem is that there's a pin on an index page when the smgr tries
> to drop its buffers. This patch corrects this problem, by having
> resowner cleanup before smgr.

Good diagnosis, not very good fix. You didn't do anything about
adjusting the comments to match the code, and you missed the identical
bug occurring during subtransaction abort.

One of the most powerful programming techniques I know is, once you've
identified a bug, to ask yourself "where else might I (or others) have
made this same error?". That would have led you to the subtransaction
case, even if we hadn't already agreed that the order of operations
during xact/subxact commit/abort should be kept the same as much as
possible.

Patch as-applied is attached.

regards, tom lane

*** src/backend/access/transam/xact.c.orig Mon Aug 30 15:00:03 2004
--- src/backend/access/transam/xact.c Mon Sep 6 13:52:42 2004
***************
*** 1333,1341 ****
* backend-wide state.
*/

- smgrDoPendingDeletes(true);
- /* smgrcommit already done */
-
CallXactCallbacks(XACT_EVENT_COMMIT, InvalidTransactionId);

ResourceOwnerRelease(TopTransactionResourceOwner,
--- 1333,1338 ----
***************
*** 1352,1357 ****
--- 1349,1362 ----
*/
AtEOXact_Inval(true);

+ /*
+ * Likewise, dropping of files deleted during the transaction is best done
+ * after releasing relcache and buffer pins. (This is not strictly
+ * necessary during commit, since such pins should have been released
+ * already, but this ordering is definitely critical during abort.)
+ */
+ smgrDoPendingDeletes(true);
+
ResourceOwnerRelease(TopTransactionResourceOwner,
RESOURCE_RELEASE_LOCKS,
true, true);
***************
*** 1363,1368 ****
--- 1368,1374 ----
AtEOXact_SPI(true);
AtEOXact_on_commit_actions(true, s->transactionIdData);
AtEOXact_Namespace(true);
+ /* smgrcommit already done */
AtEOXact_Files();
pgstat_count_xact_commit();

***************
*** 1481,1495 ****
* ordering.
*/

- smgrDoPendingDeletes(false);
- smgrabort();
-
CallXactCallbacks(XACT_EVENT_ABORT, InvalidTransactionId);

ResourceOwnerRelease(TopTransactionResourceOwner,
RESOURCE_RELEASE_BEFORE_LOCKS,
false, true);
AtEOXact_Inval(false);
ResourceOwnerRelease(TopTransactionResourceOwner,
RESOURCE_RELEASE_LOCKS,
false, true);
--- 1487,1499 ----
* ordering.
*/

CallXactCallbacks(XACT_EVENT_ABORT, InvalidTransactionId);

ResourceOwnerRelease(TopTransactionResourceOwner,
RESOURCE_RELEASE_BEFORE_LOCKS,
false, true);
AtEOXact_Inval(false);
+ smgrDoPendingDeletes(false);
ResourceOwnerRelease(TopTransactionResourceOwner,
RESOURCE_RELEASE_LOCKS,
false, true);
***************
*** 1501,1506 ****
--- 1505,1511 ----
AtEOXact_SPI(false);
AtEOXact_on_commit_actions(false, s->transactionIdData);
AtEOXact_Namespace(false);
+ smgrabort();
AtEOXact_Files();
pgstat_count_xact_rollback();

***************
*** 3014,3020 ****
AtSubCommit_Notify();
AtEOSubXact_UpdatePasswordFile(true, s->transactionIdData,
s->parent->transactionIdData);
- AtSubCommit_smgr();

CallXactCallbacks(XACT_EVENT_COMMIT_SUB, s->parent->transactionIdData);

--- 3019,3024 ----
***************
*** 3024,3029 ****
--- 3028,3034 ----
AtEOSubXact_RelationCache(true, s->transactionIdData,
s->parent->transactionIdData);
AtEOSubXact_Inval(true);
+ AtSubCommit_smgr();
ResourceOwnerRelease(s->curTransactionOwner,
RESOURCE_RELEASE_LOCKS,
true, false);
***************
*** 3109,3116 ****
RecordSubTransactionAbort();

/* Post-abort cleanup */
- AtSubAbort_smgr();
-
CallXactCallbacks(XACT_EVENT_ABORT_SUB, s->parent->transactionIdData);

ResourceOwnerRelease(s->curTransactionOwner,
--- 3114,3119 ----
***************
*** 3119,3124 ****
--- 3122,3128 ----
AtEOSubXact_RelationCache(false, s->transactionIdData,
s->parent->transactionIdData);
AtEOSubXact_Inval(false);
+ AtSubAbort_smgr();
ResourceOwnerRelease(s->curTransactionOwner,
RESOURCE_RELEASE_LOCKS,
false, false);
*** src/backend/storage/smgr/smgr.c.orig Sun Aug 29 22:57:38 2004
--- src/backend/storage/smgr/smgr.c Mon Sep 6 13:39:42 2004
***************
*** 784,790 ****
}

/*
! * smgrabort() -- Abort changes made during the current transaction.
*/
void
smgrabort(void)
--- 784,790 ----
}

/*
! * smgrabort() -- Clean up after transaction abort.
*/
void
smgrabort(void)
*** src/test/regress/expected/transactions.out.orig Thu Aug 12 14:57:49 2004
--- src/test/regress/expected/transactions.out Mon Sep 6 13:46:34 2004
***************
*** 374,379 ****
--- 374,394 ----
FETCH 10 FROM c;
ERROR: portal "c" cannot be run
COMMIT;
+ -- test case for problems with dropping an open relation during abort
+ BEGIN;
+ savepoint x;
+ CREATE TABLE koju (a INT UNIQUE);
+ NOTICE: CREATE TABLE / UNIQUE will create implicit index "koju_a_key" for table "koju"
+ INSERT INTO koju VALUES (1);
+ INSERT INTO koju VALUES (1);
+ ERROR: duplicate key violates unique constraint "koju_a_key"
+ rollback to x;
+ CREATE TABLE koju (a INT UNIQUE);
+ NOTICE: CREATE TABLE / UNIQUE will create implicit index "koju_a_key" for table "koju"
+ INSERT INTO koju VALUES (1);
+ INSERT INTO koju VALUES (1);
+ ERROR: duplicate key violates unique constraint "koju_a_key"
+ ROLLBACK;
DROP TABLE foo;
DROP TABLE baz;
DROP TABLE barbaz;
*** src/test/regress/sql/transactions.sql.orig Thu Aug 12 14:25:44 2004
--- src/test/regress/sql/transactions.sql Mon Sep 6 13:44:08 2004
***************
*** 231,236 ****
--- 231,249 ----
FETCH 10 FROM c;
COMMIT;

+ -- test case for problems with dropping an open relation during abort
+ BEGIN;
+ savepoint x;
+ CREATE TABLE koju (a INT UNIQUE);
+ INSERT INTO koju VALUES (1);
+ INSERT INTO koju VALUES (1);
+ rollback to x;
+
+ CREATE TABLE koju (a INT UNIQUE);
+ INSERT INTO koju VALUES (1);
+ INSERT INTO koju VALUES (1);
+ ROLLBACK;
+
DROP TABLE foo;
DROP TABLE baz;
DROP TABLE barbaz;

In response to

Browse pgsql-bugs by date

  From Date Subject
Next Message Tom Lane 2004-09-06 18:12:47 Re: error in simple sql function breaks connection
Previous Message Tom Lane 2004-09-06 16:29:10 Re: BUG #1239: Stale postmaster.pid prevents server start