*** a/src/backend/catalog/heap.c --- b/src/backend/catalog/heap.c *************** *** 63,68 **** --- 63,69 ---- #include "parser/parse_relation.h" #include "storage/bufmgr.h" #include "storage/freespace.h" + #include "storage/predicate.h" #include "storage/smgr.h" #include "utils/acl.h" #include "utils/builtins.h" *************** *** 1658,1663 **** heap_drop_with_catalog(Oid relid) --- 1659,1672 ---- CheckTableNotInUse(rel, "DROP TABLE"); /* + * This effectively deletes all rows in the table, and may be done in a + * serializable transaction. In that case we must record a rw-conflict in + * to this transaction from each transaction holding a predicate lock on + * the table. + */ + CheckTableForSerializableConflictIn(rel); + + /* * Delete pg_foreign_table tuple first. */ if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) *** a/src/backend/catalog/index.c --- b/src/backend/catalog/index.c *************** *** 54,59 **** --- 54,60 ---- #include "parser/parser.h" #include "storage/bufmgr.h" #include "storage/lmgr.h" + #include "storage/predicate.h" #include "storage/procarray.h" #include "storage/smgr.h" #include "utils/builtins.h" *************** *** 1312,1317 **** index_drop(Oid indexId) --- 1313,1324 ---- CheckTableNotInUse(userIndexRelation, "DROP INDEX"); /* + * All predicate locks on the index are about to be made invalid. + * Promote them to relation locks on the heap. + */ + TransferPredicateLocksToHeapRelation(userIndexRelation); + + /* * Schedule physical removal of the files */ RelationDropStorage(userIndexRelation); *************** *** 2799,2804 **** reindex_index(Oid indexId, bool skip_constraint_checks) --- 2806,2817 ---- */ CheckTableNotInUse(iRel, "REINDEX INDEX"); + /* + * All predicate locks on the index are about to be made invalid. + * Promote them to relation locks on the heap. + */ + TransferPredicateLocksToHeapRelation(iRel); + PG_TRY(); { /* Suppress use of the target index while rebuilding it */ *** a/src/backend/commands/cluster.c --- b/src/backend/commands/cluster.c *************** *** 39,44 **** --- 39,45 ---- #include "optimizer/planner.h" #include "storage/bufmgr.h" #include "storage/lmgr.h" + #include "storage/predicate.h" #include "storage/procarray.h" #include "storage/smgr.h" #include "utils/acl.h" *************** *** 385,390 **** cluster_rel(Oid tableOid, Oid indexOid, bool recheck, bool verbose, --- 386,397 ---- if (OidIsValid(indexOid)) check_index_is_clusterable(OldHeap, indexOid, recheck, AccessExclusiveLock); + /* + * All predicate locks on the table and its indexes are about to be made + * invalid. Promote them to relation locks on the heap. + */ + TransferPredicateLocksToHeapRelation(OldHeap); + /* rebuild_relation does all the dirty work */ rebuild_relation(OldHeap, indexOid, freeze_min_age, freeze_table_age, verbose); *** a/src/backend/commands/tablecmds.c --- b/src/backend/commands/tablecmds.c *************** *** 70,75 **** --- 70,76 ---- #include "storage/bufmgr.h" #include "storage/lmgr.h" #include "storage/lock.h" + #include "storage/predicate.h" #include "storage/smgr.h" #include "utils/acl.h" #include "utils/builtins.h" *************** *** 1040,1045 **** ExecuteTruncate(TruncateStmt *stmt) --- 1041,1054 ---- Oid toast_relid; /* + * This effectively deletes all rows in the table, and may be done + * in a serializable transaction. In that case we must record a + * rw-conflict in to this transaction from each transaction + * holding a predicate lock on the table. + */ + CheckTableForSerializableConflictIn(rel); + + /* * Need the full transaction-safe pushups. * * Create a new empty storage file for the relation, and assign it *** a/src/backend/storage/lmgr/predicate.c --- b/src/backend/storage/lmgr/predicate.c *************** *** 155,160 **** --- 155,161 ---- * BlockNumber newblkno); * PredicateLockPageCombine(Relation relation, BlockNumber oldblkno, * BlockNumber newblkno); + * TransferPredicateLocksToHeapRelation(const Relation relation) * ReleasePredicateLocks(bool isCommit) * * conflict detection (may also trigger rollback) *************** *** 162,167 **** --- 163,169 ---- * HeapTupleData *tup, Buffer buffer) * CheckForSerializableConflictIn(Relation relation, HeapTupleData *tup, * Buffer buffer) + * CheckTableForSerializableConflictIn(const Relation relation) * * final rollback checking * PreCommit_CheckForSerializationFailure(void) *************** *** 257,266 **** #define SxactIsMarkedForDeath(sxact) (((sxact)->flags & SXACT_FLAG_MARKED_FOR_DEATH) != 0) /* ! * When a public interface method is called for a split on an index relation, ! * this is the test to see if we should do a quick return. */ ! #define SkipSplitTracking(relation) \ (((relation)->rd_id < FirstBootstrapObjectId) \ || RelationUsesLocalBuffers(relation)) --- 259,269 ---- #define SxactIsMarkedForDeath(sxact) (((sxact)->flags & SXACT_FLAG_MARKED_FOR_DEATH) != 0) /* ! * When a public interface method is called which needs to manipulate locks on ! * a particular relation regardless of the lock holder, do a quick check to ! * see if this relation can be skipped. */ ! #define SkipPredicateLocksForRelation(relation) \ (((relation)->rd_id < FirstBootstrapObjectId) \ || RelationUsesLocalBuffers(relation)) *************** *** 273,279 **** ((!IsolationIsSerializable()) \ || ((MySerializableXact == InvalidSerializableXact)) \ || ReleasePredicateLocksIfROSafe() \ ! || SkipSplitTracking(relation)) /* --- 276,282 ---- ((!IsolationIsSerializable()) \ || ((MySerializableXact == InvalidSerializableXact)) \ || ReleasePredicateLocksIfROSafe() \ ! || SkipPredicateLocksForRelation(relation)) /* *************** *** 434,439 **** static bool TransferPredicateLocksToNewTarget(const PREDICATELOCKTARGETTAG oldta --- 437,443 ---- const PREDICATELOCKTARGETTAG newtargettag, bool removeOld); static void PredicateLockAcquire(const PREDICATELOCKTARGETTAG *targettag); + static void DropAllPredicateLocksFromTableImpl(const Relation relation, bool transfer); static void SetNewSxactGlobalXmin(void); static bool ReleasePredicateLocksIfROSafe(void); static void ClearOldPredicateLocks(void); *************** *** 2543,2548 **** exit: --- 2547,2781 ---- return !outOfShmem; } + /* + * Drop all predicate locks of any granularity from the specified relation, + * which can be a heap relation or an index relation. Optionally acquire a + * relation lock on the heap for any transactions with any lock(s) on the + * specified relation. + * + * This requires grabbing a lot of LW locks and scanning the entire lock + * target table for matches. That makes this more expensive than most + * predicate lock management functions, but it will only be called for DDL + * type commands and there are fast returns when no serializable transactions + * are active or the relation is temporary. + * + * We are not using the TransferPredicateLocksToNewTarget function because + * it acquires its own locks on the partitions of the two targets involved, + * and we'll already be holding all partition locks. + * + * We can't throw an error from here, because the call could be from a + * transaction which is not serializable. + * + * NOTE: This is currently only called with transfer set to true, but that may + * change. If we decide to clean up the locks from a table on commit of a + * transaction which executed DROP TABLE, the false condition will be useful. + */ + static void + DropAllPredicateLocksFromTableImpl(const Relation relation, bool transfer) + { + HASH_SEQ_STATUS seqstat; + PREDICATELOCKTARGET *oldtarget; + PREDICATELOCKTARGET *heaptarget; + PREDICATELOCKTARGETTAG heaptargettag; + PREDICATELOCKTAG newpredlocktag; + Oid dbId; + Oid relId; + Oid heapId; + int i; + bool isIndex; + bool found; + uint32 reservedtargettaghash; + uint32 heaptargettaghash; + + /* + * Bail out quickly if there are no serializable transactions running. + * It's safe to check this without taking locks because the caller is + * holding an ACCESS EXCLUSIVE lock on the relation. No new locks which + * would matter here can be acquired while that is held. + */ + if (!TransactionIdIsValid(PredXact->SxactGlobalXmin)) + return; + + if (SkipPredicateLocksForRelation(relation)) + return; + + dbId = relation->rd_node.dbNode; + relId = relation->rd_id; + if (relation->rd_index == NULL) + { + isIndex = false; + heapId = relId; + } + else + { + isIndex = true; + heapId = relation->rd_index->indrelid; + } + Assert(heapId != InvalidOid); + Assert(transfer || !isIndex); /* index OID only makes sense with + * transfer */ + + SET_PREDICATELOCKTARGETTAG_RELATION(heaptargettag, dbId, heapId); + heaptargettaghash = PredicateLockTargetTagHashCode(&heaptargettag); + heaptarget = NULL; /* Retrieve first time needed, then keep. */ + + LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE); + for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++) + LWLockAcquire(FirstPredicateLockMgrLock + i, LW_EXCLUSIVE); + LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE); + + /* + * Remove the reserved entry to give us scratch space, so we know we'll be + * able to create the new lock target. + */ + reservedtargettaghash = 0; /* quiet compiler warning */ + if (transfer) + { + reservedtargettaghash = PredicateLockTargetTagHashCode(&ReservedTargetTag); + hash_search_with_hash_value(PredicateLockTargetHash, + &ReservedTargetTag, + reservedtargettaghash, + HASH_REMOVE, &found); + Assert(found); + } + + /* Scan through target map */ + hash_seq_init(&seqstat, PredicateLockTargetHash); + + while ((oldtarget = (PREDICATELOCKTARGET *) hash_seq_search(&seqstat))) + { + PREDICATELOCK *oldpredlock; + + /* + * Check whether this is a target which needs attention. + */ + if (GET_PREDICATELOCKTARGETTAG_RELATION(oldtarget->tag) != relId) + continue; /* wrong relation id */ + if (GET_PREDICATELOCKTARGETTAG_DB(oldtarget->tag) != dbId) + continue; /* wrong database id */ + if (transfer && !isIndex + && GET_PREDICATELOCKTARGETTAG_TYPE(oldtarget->tag) == PREDLOCKTAG_RELATION) + continue; /* already the right lock */ + + /* + * If we made it here, we have work to do. We make sure the heap + * relation lock exists, then we walk the list of predicate locks for + * the old target we found, moving all locks to the heap relation lock + * -- unless they already hold that. + */ + + /* + * First make sure we have the heap relation target. We only need to + * do this once. + */ + if (transfer && heaptarget == NULL) + { + heaptarget = hash_search_with_hash_value(PredicateLockTargetHash, + &heaptargettag, + heaptargettaghash, + HASH_ENTER, &found); + Assert(heaptarget != NULL); + if (!found) + SHMQueueInit(&heaptarget->predicateLocks); + newpredlocktag.myTarget = heaptarget; + } + + /* + * Loop through moving locks from this target to the relation target. + */ + oldpredlock = (PREDICATELOCK *) + SHMQueueNext(&(oldtarget->predicateLocks), + &(oldtarget->predicateLocks), + offsetof(PREDICATELOCK, targetLink)); + while (oldpredlock) + { + PREDICATELOCK *nextpredlock; + PREDICATELOCK *newpredlock; + SerCommitSeqNo oldCommitSeqNo = oldpredlock->commitSeqNo; + + nextpredlock = (PREDICATELOCK *) + SHMQueueNext(&(oldtarget->predicateLocks), + &(oldpredlock->targetLink), + offsetof(PREDICATELOCK, targetLink)); + newpredlocktag.myXact = oldpredlock->tag.myXact; + + /* + * It's OK to remove the old lock first because of the ACCESS + * EXCLUSIVE lock on the heap relation when this is called. It is + * desirable to do so because it avoids any chance of running out + * of lock structure entries for the table. + */ + SHMQueueDelete(&(oldpredlock->xactLink)); + /* No need for retail delete from oldtarget list. */ + hash_search(PredicateLockHash, + &oldpredlock->tag, + HASH_REMOVE, &found); + Assert(found); + + if (transfer) + { + newpredlock = (PREDICATELOCK *) + hash_search_with_hash_value + (PredicateLockHash, + &newpredlocktag, + PredicateLockHashCodeFromTargetHashCode(&newpredlocktag, + heaptargettaghash), + HASH_ENTER_NULL, &found); + Assert(newpredlock != NULL); + if (!found) + { + SHMQueueInsertBefore(&(heaptarget->predicateLocks), + &(newpredlock->targetLink)); + SHMQueueInsertBefore(&(newpredlocktag.myXact->predicateLocks), + &(newpredlock->xactLink)); + newpredlock->commitSeqNo = oldCommitSeqNo; + } + else + { + if (newpredlock->commitSeqNo < oldCommitSeqNo) + newpredlock->commitSeqNo = oldCommitSeqNo; + } + + Assert(newpredlock->commitSeqNo != 0); + Assert((newpredlock->commitSeqNo == InvalidSerCommitSeqNo) + || (newpredlock->tag.myXact == OldCommittedSxact)); + } + + oldpredlock = nextpredlock; + } + + hash_search(PredicateLockTargetHash, &oldtarget->tag, HASH_REMOVE, &found); + Assert(found); + } + + if (transfer) + { + /* Put the reserved entry back */ + hash_search_with_hash_value(PredicateLockTargetHash, + &ReservedTargetTag, + reservedtargettaghash, + HASH_ENTER, &found); + Assert(!found); + } + + /* Release locks in reverse order */ + LWLockRelease(SerializableXactHashLock); + for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--) + LWLockRelease(FirstPredicateLockMgrLock + i); + LWLockRelease(SerializablePredicateLockListLock); + } + + /* + * TransferPredicateLocksToHeapRelation + * For all transactions, transfer all predicate locks for the given + * relation to a single relation lock on the heap. + */ + void + TransferPredicateLocksToHeapRelation(const Relation relation) + { + DropAllPredicateLocksFromTableImpl(relation, true); + } + /* * PredicateLockPageSplit *************** *** 2581,2587 **** PredicateLockPageSplit(const Relation relation, const BlockNumber oldblkno, if (!TransactionIdIsValid(PredXact->SxactGlobalXmin)) return; ! if (SkipSplitTracking(relation)) return; Assert(oldblkno != newblkno); --- 2814,2820 ---- if (!TransactionIdIsValid(PredXact->SxactGlobalXmin)) return; ! if (SkipPredicateLocksForRelation(relation)) return; Assert(oldblkno != newblkno); *************** *** 3792,3797 **** CheckForSerializableConflictIn(const Relation relation, const HeapTuple tuple, --- 4025,4129 ---- } /* + * CheckTableForSerializableConflictIn + * The entire table is going through a DDL-style logical mass delete + * (like TRUNCATE TABLE or DROP TABLE). While these operations do not + * operate entirely within the bounds of snapshot isolation, they can + * occur inside of a serialziable transaction, and will logically occur + * after any reads which saw rows which were destroyed by these + * operations, so we do what we can to serialize properly under SSI. + * + * The relation passed in must be a heap relation for a table. Any predicate + * lock of any granularity on the heap will cause a rw-conflict in to this + * transaction. Predicate locks on indexes do not matter because they only + * exist to guard against conflicting inserts into the index, and this is a + * mass *delete*. + * + * This should be done before altering the predicate locks because the + * transaction could be rolled back because of a conflict, in which case the + * lock changes are not needed. + */ + void + CheckTableForSerializableConflictIn(const Relation relation) + { + HASH_SEQ_STATUS seqstat; + PREDICATELOCKTARGET *target; + Oid dbId; + Oid heapId; + int i; + + /* + * Bail out quickly if there are no serializable transactions running. + * It's safe to check this without taking locks because the caller is + * holding an ACCESS EXCLUSIVE lock on the relation. No new locks which + * would matter here can be acquired while that is held. + */ + if (!TransactionIdIsValid(PredXact->SxactGlobalXmin)) + return; + + if (SkipSerialization(relation)) + return; + + Assert(relation->rd_index == NULL); /* not an index relation */ + + dbId = relation->rd_node.dbNode; + heapId = relation->rd_id; + + LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE); + for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++) + LWLockAcquire(FirstPredicateLockMgrLock + i, LW_SHARED); + LWLockAcquire(SerializableXactHashLock, LW_SHARED); + + /* Scan through target list */ + hash_seq_init(&seqstat, PredicateLockTargetHash); + + while ((target = (PREDICATELOCKTARGET *) hash_seq_search(&seqstat))) + { + PREDICATELOCK *predlock; + + /* + * Check whether this is a target which needs attention. + */ + if (GET_PREDICATELOCKTARGETTAG_RELATION(target->tag) != heapId) + continue; /* wrong relation id */ + if (GET_PREDICATELOCKTARGETTAG_DB(target->tag) != dbId) + continue; /* wrong database id */ + + /* + * Loop through locks for this target and flag conflicts. + */ + predlock = (PREDICATELOCK *) + SHMQueueNext(&(target->predicateLocks), + &(target->predicateLocks), + offsetof(PREDICATELOCK, targetLink)); + while (predlock) + { + PREDICATELOCK *nextpredlock; + + nextpredlock = (PREDICATELOCK *) + SHMQueueNext(&(target->predicateLocks), + &(predlock->targetLink), + offsetof(PREDICATELOCK, targetLink)); + + if (predlock->tag.myXact != MySerializableXact + && !RWConflictExists(predlock->tag.myXact, + (SERIALIZABLEXACT *) MySerializableXact)) + FlagRWConflict(predlock->tag.myXact, + (SERIALIZABLEXACT *) MySerializableXact); + + predlock = nextpredlock; + } + } + + /* Release locks in reverse order */ + LWLockRelease(SerializableXactHashLock); + for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--) + LWLockRelease(FirstPredicateLockMgrLock + i); + LWLockRelease(SerializablePredicateLockListLock); + } + + + /* * Flag a rw-dependency between two serializable transactions. * * The caller is responsible for ensuring that we have a LW lock on *** a/src/include/storage/predicate.h --- b/src/include/storage/predicate.h *************** *** 49,59 **** extern void PredicateLockPage(const Relation relation, const BlockNumber blkno); --- 49,61 ---- extern void PredicateLockTuple(const Relation relation, const HeapTuple tuple); extern void PredicateLockPageSplit(const Relation relation, const BlockNumber oldblkno, const BlockNumber newblkno); extern void PredicateLockPageCombine(const Relation relation, const BlockNumber oldblkno, const BlockNumber newblkno); + extern void TransferPredicateLocksToHeapRelation(const Relation relation); extern void ReleasePredicateLocks(const bool isCommit); /* conflict detection (may also trigger rollback) */ extern void CheckForSerializableConflictOut(const bool valid, const Relation relation, const HeapTuple tuple, const Buffer buffer); extern void CheckForSerializableConflictIn(const Relation relation, const HeapTuple tuple, const Buffer buffer); + extern void CheckTableForSerializableConflictIn(const Relation relation); /* final rollback checking */ extern void PreCommit_CheckForSerializationFailure(void);