*** a/src/backend/catalog/index.c --- b/src/backend/catalog/index.c *************** *** 2133,2139 **** IndexCheckExclusion(Relation heapRelation, * * After completing validate_index(), we wait until all transactions that * were alive at the time of the reference snapshot are gone; this is ! * necessary to be sure there are none left with a serializable snapshot * older than the reference (and hence possibly able to see tuples we did * not index). Then we mark the index "indisvalid" and commit. Subsequent * transactions will be able to use it for queries. --- 2133,2139 ---- * * After completing validate_index(), we wait until all transactions that * were alive at the time of the reference snapshot are gone; this is ! * necessary to be sure there are none left with a transaction-based snapshot * older than the reference (and hence possibly able to see tuples we did * not index). Then we mark the index "indisvalid" and commit. Subsequent * transactions will be able to use it for queries. *** a/src/backend/commands/trigger.c --- b/src/backend/commands/trigger.c *************** *** 2319,2325 **** ltrmark:; case HeapTupleUpdated: ReleaseBuffer(buffer); ! if (IsXactIsoLevelSerializable) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); --- 2319,2325 ---- case HeapTupleUpdated: ReleaseBuffer(buffer); ! if (IsXactIsoLevelXactSnapshotBased) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); *** a/src/backend/executor/execMain.c --- b/src/backend/executor/execMain.c *************** *** 1538,1544 **** EvalPlanQualFetch(EState *estate, Relation relation, int lockmode, case HeapTupleUpdated: ReleaseBuffer(buffer); ! if (IsXactIsoLevelSerializable) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); --- 1538,1544 ---- case HeapTupleUpdated: ReleaseBuffer(buffer); ! if (IsXactIsoLevelXactSnapshotBased) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); *** a/src/backend/executor/nodeBitmapHeapscan.c --- b/src/backend/executor/nodeBitmapHeapscan.c *************** *** 42,47 **** --- 42,48 ---- #include "executor/nodeBitmapHeapscan.h" #include "pgstat.h" #include "storage/bufmgr.h" + #include "storage/predicate.h" #include "utils/memutils.h" #include "utils/snapmgr.h" #include "utils/tqual.h" *************** *** 114,119 **** BitmapHeapNext(BitmapHeapScanState *node) --- 115,123 ---- #endif /* USE_PREFETCH */ } + /* TODO SSI: Lock at tuple level subject to granularity promotion. */ + PredicateLockRelation(node->ss.ss_currentRelation); + for (;;) { Page dp; *** a/src/backend/executor/nodeIndexscan.c --- b/src/backend/executor/nodeIndexscan.c *************** *** 30,35 **** --- 30,36 ---- #include "executor/execdebug.h" #include "executor/nodeIndexscan.h" #include "optimizer/clauses.h" + #include "storage/predicate.h" #include "utils/array.h" #include "utils/lsyscache.h" #include "utils/memutils.h" *************** *** 72,77 **** IndexNext(IndexScanState *node) --- 73,81 ---- econtext = node->ss.ps.ps_ExprContext; slot = node->ss.ss_ScanTupleSlot; + /* TODO SSI: Lock at tuple level subject to granularity promotion. */ + PredicateLockRelation(node->ss.ss_currentRelation); + /* * ok, now that we have what we need, fetch the next tuple. */ *** a/src/backend/executor/nodeLockRows.c --- b/src/backend/executor/nodeLockRows.c *************** *** 130,136 **** lnext: break; case HeapTupleUpdated: ! if (IsXactIsoLevelSerializable) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); --- 130,136 ---- break; case HeapTupleUpdated: ! if (IsXactIsoLevelXactSnapshotBased) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); *** a/src/backend/executor/nodeModifyTable.c --- b/src/backend/executor/nodeModifyTable.c *************** *** 326,332 **** ldelete:; break; case HeapTupleUpdated: ! if (IsXactIsoLevelSerializable) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); --- 326,332 ---- break; case HeapTupleUpdated: ! if (IsXactIsoLevelXactSnapshotBased) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); *************** *** 514,520 **** lreplace:; break; case HeapTupleUpdated: ! if (IsXactIsoLevelSerializable) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); --- 514,520 ---- break; case HeapTupleUpdated: ! if (IsXactIsoLevelXactSnapshotBased) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); *** a/src/backend/executor/nodeSeqscan.c --- b/src/backend/executor/nodeSeqscan.c *************** *** 28,33 **** --- 28,34 ---- #include "access/relscan.h" #include "executor/execdebug.h" #include "executor/nodeSeqscan.h" + #include "storage/predicate.h" static void InitScanRelation(SeqScanState *node, EState *estate); static TupleTableSlot *SeqNext(SeqScanState *node); *************** *** 106,116 **** SeqRecheck(SeqScanState *node, TupleTableSlot *slot) --- 107,122 ---- * tuple. * We call the ExecScan() routine and pass it the appropriate * access method functions. + * For serializable transactions, we first lock the entire relation. + * TODO SSI: Would it make sense to optimize cases where the plan + * includes a LIMIT, such that individual tuples are + * locked instead, subject to granularity promotion? * ---------------------------------------------------------------- */ TupleTableSlot * ExecSeqScan(SeqScanState *node) { + PredicateLockRelation(node->ss_currentRelation); return ExecScan((ScanState *) node, (ExecScanAccessMtd) SeqNext, (ExecScanRecheckMtd) SeqRecheck); *** a/src/backend/executor/nodeTidscan.c --- b/src/backend/executor/nodeTidscan.c *************** *** 31,36 **** --- 31,37 ---- #include "executor/nodeTidscan.h" #include "optimizer/clauses.h" #include "storage/bufmgr.h" + #include "storage/predicate.h" #include "utils/array.h" *************** *** 308,313 **** TidNext(TidScanState *node) --- 309,317 ---- node->tss_TidPtr++; } + /* TODO SSI: Lock at tuple level subject to granularity promotion. */ + PredicateLockRelation(heapRelation); + while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids) { tuple->t_self = tidList[node->tss_TidPtr]; *** a/src/backend/storage/lmgr/Makefile --- b/src/backend/storage/lmgr/Makefile *************** *** 12,18 **** subdir = src/backend/storage/lmgr top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global ! OBJS = lmgr.o lock.o proc.o deadlock.o lwlock.o spin.o s_lock.o include $(top_srcdir)/src/backend/common.mk --- 12,18 ---- top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global ! OBJS = lmgr.o lock.o proc.o deadlock.o lwlock.o spin.o s_lock.o predicate.o include $(top_srcdir)/src/backend/common.mk *** /dev/null --- b/src/backend/storage/lmgr/predicate.c *************** *** 0 **** --- 1,43 ---- + /*------------------------------------------------------------------------- + * + * predicate.c + * POSTGRES predicate locking + * to support full serializable transaction isolation + * + * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $PostgreSQL$ + * + *------------------------------------------------------------------------- + */ + /* + * INTERFACE ROUTINES + * + * PredicateLockRelation(Relation relation) + */ + + #include "postgres.h" + + #include "access/xact.h" + #include "storage/predicate.h" + #include "storage/lmgr.h" + + + /* ---------------------------------------------------------------- + * Gets a predicate lock at the relation level. + * Skip if not in full serializable transaction isolation level. + * Skip if this is a temporary table or toast table.. + * Skip this if a write lock exists for the relation; otherwise, + * clear any finer-grained predicate locks this session has on the relation. + * TODO SSI: Some of the above. Using SIREAD locks. + * ---------------------------------------------------------------- + */ + void + PredicateLockRelation(Relation relation) + { + if (IsXactIsoLevelFullySerializable && !(relation->rd_istemp)) + LockRelation(relation, ShareLock); + } *** a/src/backend/utils/adt/ri_triggers.c --- b/src/backend/utils/adt/ri_triggers.c *************** *** 3314,3320 **** ri_PerformCheck(RI_QueryKey *qkey, SPIPlanPtr qplan, /* * In READ COMMITTED mode, we just need to use an up-to-date regular * snapshot, and we will see all rows that could be interesting. But in ! * SERIALIZABLE mode, we can't change the transaction snapshot. If the * caller passes detectNewRows == false then it's okay to do the query * with the transaction snapshot; otherwise we use a current snapshot, and * tell the executor to error out if it finds any rows under the current --- 3314,3320 ---- /* * In READ COMMITTED mode, we just need to use an up-to-date regular * snapshot, and we will see all rows that could be interesting. But in ! * xact-snapshot-based modes, we can't change the transaction snapshot. If the * caller passes detectNewRows == false then it's okay to do the query * with the transaction snapshot; otherwise we use a current snapshot, and * tell the executor to error out if it finds any rows under the current *************** *** 3322,3328 **** ri_PerformCheck(RI_QueryKey *qkey, SPIPlanPtr qplan, * that SPI_execute_snapshot will register the snapshots, so we don't need * to bother here. */ ! if (IsXactIsoLevelSerializable && detectNewRows) { CommandCounterIncrement(); /* be sure all my own work is visible */ test_snapshot = GetLatestSnapshot(); --- 3322,3328 ---- * that SPI_execute_snapshot will register the snapshots, so we don't need * to bother here. */ ! if (IsXactIsoLevelXactSnapshotBased && detectNewRows) { CommandCounterIncrement(); /* be sure all my own work is visible */ test_snapshot = GetLatestSnapshot(); *** a/src/backend/utils/time/snapmgr.c --- b/src/backend/utils/time/snapmgr.c *************** *** 37,44 **** /* ! * CurrentSnapshot points to the only snapshot taken in a serializable ! * transaction, and to the latest one taken in a read-committed transaction. * SecondarySnapshot is a snapshot that's always up-to-date as of the current * instant, even on a serializable transaction. It should only be used for * special-purpose code (say, RI checking.) --- 37,44 ---- /* ! * CurrentSnapshot points to the only snapshot taken in a xact-snapshot-based ! * transaction; otherwise to the latest one taken. * SecondarySnapshot is a snapshot that's always up-to-date as of the current * instant, even on a serializable transaction. It should only be used for * special-purpose code (say, RI checking.) *************** *** 97,107 **** static int RegisteredSnapshots = 0; bool FirstSnapshotSet = false; /* ! * Remembers whether this transaction registered a serializable snapshot at * start. We cannot trust FirstSnapshotSet in combination with ! * IsXactIsoLevelSerializable, because GUC may be reset before us. */ ! static bool registered_serializable = false; static Snapshot CopySnapshot(Snapshot snapshot); --- 97,107 ---- bool FirstSnapshotSet = false; /* ! * Remembers whether this transaction registered a transaction-based snapshot at * start. We cannot trust FirstSnapshotSet in combination with ! * IsXactIsoLevelXactSnapshotBased, because GUC may be reset before us. */ ! static bool registered_xact_snapshot = false; static Snapshot CopySnapshot(Snapshot snapshot); *************** *** 130,150 **** GetTransactionSnapshot(void) FirstSnapshotSet = true; /* ! * In serializable mode, the first snapshot must live until end of * xact regardless of what the caller does with it, so we must * register it internally here and unregister it at end of xact. */ ! if (IsXactIsoLevelSerializable) { CurrentSnapshot = RegisterSnapshotOnOwner(CurrentSnapshot, TopTransactionResourceOwner); ! registered_serializable = true; } return CurrentSnapshot; } ! if (IsXactIsoLevelSerializable) return CurrentSnapshot; CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); --- 130,150 ---- FirstSnapshotSet = true; /* ! * In xact-snapshot-based isolation levels, the first snapshot must live until end of * xact regardless of what the caller does with it, so we must * register it internally here and unregister it at end of xact. */ ! if (IsXactIsoLevelXactSnapshotBased) { CurrentSnapshot = RegisterSnapshotOnOwner(CurrentSnapshot, TopTransactionResourceOwner); ! registered_xact_snapshot = true; } return CurrentSnapshot; } ! if (IsXactIsoLevelXactSnapshotBased) return CurrentSnapshot; CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); *************** *** 155,161 **** GetTransactionSnapshot(void) /* * GetLatestSnapshot * Get a snapshot that is up-to-date as of the current instant, ! * even if we are executing in SERIALIZABLE mode. */ Snapshot GetLatestSnapshot(void) --- 155,161 ---- /* * GetLatestSnapshot * Get a snapshot that is up-to-date as of the current instant, ! * even if we are executing in xact-snapshot-based mode. */ Snapshot GetLatestSnapshot(void) *************** *** 515,527 **** void AtEarlyCommit_Snapshot(void) { /* ! * On a serializable transaction we must unregister our private refcount ! * to the serializable snapshot. */ ! if (registered_serializable) UnregisterSnapshotFromOwner(CurrentSnapshot, TopTransactionResourceOwner); ! registered_serializable = false; } --- 515,527 ---- AtEarlyCommit_Snapshot(void) { /* ! * On a xact-snapshot-based transaction we must unregister our private refcount ! * to the xact snapshot. */ ! if (registered_xact_snapshot) UnregisterSnapshotFromOwner(CurrentSnapshot, TopTransactionResourceOwner); ! registered_xact_snapshot = false; } *************** *** 557,561 **** AtEOXact_Snapshot(bool isCommit) SecondarySnapshot = NULL; FirstSnapshotSet = false; ! registered_serializable = false; } --- 557,561 ---- SecondarySnapshot = NULL; FirstSnapshotSet = false; ! registered_xact_snapshot = false; } *** a/src/include/access/xact.h --- b/src/include/access/xact.h *************** *** 32,41 **** extern int DefaultXactIsoLevel; extern int XactIsoLevel; /* ! * We only implement two isolation levels internally. This macro should ! * be used to check which one is selected. */ ! #define IsXactIsoLevelSerializable (XactIsoLevel >= XACT_REPEATABLE_READ) /* Xact read-only state */ extern bool DefaultXactReadOnly; --- 32,45 ---- extern int XactIsoLevel; /* ! * We implement three isolation levels internally. ! * The two stronger ones use one snapshot per database transaction; ! * the others use one snapshot per statement. ! * Serializable uses predicate locks. ! * These macros should be used to check which isolation level is selected. */ ! #define IsXactIsoLevelXactSnapshotBased (XactIsoLevel >= XACT_REPEATABLE_READ) ! #define IsXactIsoLevelFullySerializable (XactIsoLevel == XACT_SERIALIZABLE) /* Xact read-only state */ extern bool DefaultXactReadOnly; *** /dev/null --- b/src/include/storage/predicate.h *************** *** 0 **** --- 1,22 ---- + /*------------------------------------------------------------------------- + * + * predicate.h + * POSTGRES predicate locking definitions. + * + * + * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $PostgreSQL$ + * + *------------------------------------------------------------------------- + */ + #ifndef PREDICATE_H + #define PREDICATE_H + + #include "storage/lock.h" + #include "utils/relcache.h" + + extern void PredicateLockRelation(Relation relation); + + #endif /* PREDICATE_H */