diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 61d51939b55..1bf8fac7bea 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -470,16 +470,17 @@ BuildConflictIndexInfo(ResultRelInfo *resultRelInfo, Oid conflictindex) } /* - * If the tuple is identified as dead and was deleted by a transaction with a - * more recent commit timestamp, update the transaction ID, deletion time, and - * origin information associated with this tuple. + * If the tuple is recently dead and was deleted by a transaction with a newer + * commit timestamp than previously recorded, update the associated transaction + * ID, commit time, and origin. This helps ensure that conflict detection uses + * the most recent and relevant deletion metadata. */ static void -update_recent_dead_tuple_info(TupleTableSlot *scanslot, - TransactionId oldestxmin, - TransactionId *delete_xid, - TimestampTz *delete_time, - RepOriginId *delete_origin) +update_most_recent_deletion_info(TupleTableSlot *scanslot, + TransactionId oldestxmin, + TransactionId *delete_xid, + TimestampTz *delete_time, + RepOriginId *delete_origin) { BufferHeapTupleTableSlot *hslot; HeapTuple tuple; @@ -532,37 +533,31 @@ update_recent_dead_tuple_info(TupleTableSlot *scanslot, * returns the transaction ID, origin, and commit timestamp of the transaction * that deleted this tuple. * - * 'oldestxmin' serves as a cutoff transaction ID. Tuples deleted by transaction - * IDs greater than or equal to 'oldestxmin' are considered recently dead. + * 'oldestxmin' acts as a cutoff transaction ID. Tuples deleted by transactions + * with IDs >= 'oldestxmin' are considered recently dead and are eligible for + * conflict detection. * - * We scan all matching dead tuples in the relation to find the most recently - * deleted one, rather than stopping at the first match. This is because only - * the latest deletion information is relevant for resolving conflicts. + * Instead of stopping at the first match, we scan all matching dead tuples to + * identify most recent deletion. This is crucial because only the latest + * deletion is relevant for resolving conflicts. * - * For example, consider two dead tuples on the subscriber, which can occur when - * a row is deleted, re-inserted, and deleted again: + * For example, consider a scenario on the subscriber where a row is deleted, + * re-inserted, and then deleted again only on the subscriber: * * - (pk, 1) - deleted at 9:00, * - (pk, 1) - deleted at 9:02, * - * With a remote update (pk, 1) -> (pk, 2) timestamped at 9:01. + * Now, a remote update arrives: (pk, 1) -> (pk, 2), timestamped at 9:01. * - * If the first deleted tuple scanned is the older one (9:00), returning only - * this outdated tuple may lead users to wrongly apply the remote update using a - * last-update-win strategy. The appropriate action is to skip the remote - * update, recognizing the more recent deletion at 9:02. See comments atop - * worker.c for details. + * If we mistakenly return the older deletion (9:00), the system may wrongly + * apply the remote update using a last-update-wins strategy. Instead, we must + * recognize the more recent deletion at 9:02 and skip the update. See + * comments atop worker.c for details. Note, as of now, conflict resolution + * is not implemented. Consequently, the system may incorrectly report the + * older tuple as the conflicted one, leading to misleading results. * - * The commit timestamp of the transaction that deleted the tuple is used to - * determine whether the tuple is the most recently deleted one. - * - * This function performs a full table scan instead of using indexes, and it - * should be used only when the index scans could miss deleted tuples, such as - * when an index has been re-indexed or re-created using CONCURRENTLY option - * during change applications. While this approach may be slow on large tables, - * it is considered acceptable because it is only used in rare conflict cases - * where the target row for an update cannot be found and no usable index is - * found. + * The commit timestamp of the deleting transaction is used to determine which + * tuple was deleted most recently. */ bool FindRecentlyDeletedTupleInfoSeq(Relation rel, TupleTableSlot *searchslot, @@ -584,13 +579,13 @@ FindRecentlyDeletedTupleInfoSeq(Relation rel, TupleTableSlot *searchslot, *delete_time = 0; /* - * When the relation has an replica identity key or primary key that is - * not usable (see IsIndexUsableForFindingDeletedTuple), necessitating a - * full table scan, it is unnecessary to match the full tuple value. This - * is because the remote tuple may not contain all column values and using - * these index are sufficient to locate the target tuple (see - * logicalrep_rel_mark_updatable). So, we only compare indexed column - * values using the bitmap, which we pass to tuples_equal(). + * If the relation has a replica identity key or a primary key that is + * unusable for locating deleted tuples (see + * IsIndexUsableForFindingDeletedTuple), a full table scan becomes + * necessary. In such cases, comparing the entire tuple is not required, + * since the remote tuple might not include all column values. Instead, the + * indexed columns alone are suffcient to identify the target tuple (see + * logicalrep_rel_mark_updatable). */ indexbitmap = RelationGetIndexAttrBitmap(rel, INDEX_ATTR_BITMAP_IDENTITY_KEY); @@ -606,7 +601,7 @@ FindRecentlyDeletedTupleInfoSeq(Relation rel, TupleTableSlot *searchslot, * Start a heap scan using SnapshotAny to identify dead tuples that are * not visible under a standard MVCC snapshot. Tuples from transactions * not yet committed or those just committed prior to the scan are - * excluded in update_recent_dead_tuple_info(). + * excluded in update_most_recent_deletion_info(). */ scan = table_beginscan(rel, SnapshotAny, 0, NULL); scanslot = table_slot_create(rel, NULL); @@ -619,8 +614,8 @@ FindRecentlyDeletedTupleInfoSeq(Relation rel, TupleTableSlot *searchslot, if (!tuples_equal(scanslot, searchslot, eq, indexbitmap)) continue; - update_recent_dead_tuple_info(scanslot, oldestxmin, delete_xid, - delete_time, delete_origin); + update_most_recent_deletion_info(scanslot, oldestxmin, delete_xid, + delete_time, delete_origin); } table_endscan(scan); @@ -670,7 +665,7 @@ FindRecentlyDeletedTupleInfoByIndex(Relation rel, Oid idxoid, * Start an index scan using SnapshotAny to identify dead tuples that are * not visible under a standard MVCC snapshot. Tuples from transactions * not yet committed or those just committed prior to the scan are - * excluded in update_recent_dead_tuple_info(). + * excluded in update_most_recent_deletion_info(). */ scan = index_beginscan(rel, idxrel, SnapshotAny, NULL, skey_attoff, 0); @@ -692,8 +687,8 @@ FindRecentlyDeletedTupleInfoByIndex(Relation rel, Oid idxoid, continue; } - update_recent_dead_tuple_info(scanslot, oldestxmin, delete_xid, - delete_time, delete_origin); + update_most_recent_deletion_info(scanslot, oldestxmin, delete_xid, + delete_time, delete_origin); } index_endscan(scan); diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index f1e56d84bb3..45ec3ad593d 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -2921,6 +2921,10 @@ apply_handle_update_internal(ApplyExecutionData *edata, ConflictType type; TupleTableSlot *newslot = localslot; + /* + * Detecting whether the tuple was recently deleted or never existed is + * crucial to avoid misleading the user during confict handling. + */ if (MySubscription->retaindeadtuples && FindDeletedTupleInLocalRel(localrel, localindexoid, remoteslot, &conflicttuple.xmin, @@ -3163,17 +3167,17 @@ FindReplTupleInLocalRel(ApplyExecutionData *edata, Relation localrel, * Determine whether the index can reliably locate the deleted tuple in the * local relation. * - * An index may exclude deleted tuples if it was re-indexed or re-created using - * the CONCURRENTLY option during change application. Therefore, an index is - * considered usable only if the oldest_nonremovable_xid is greater than the - * index tuple's xmin. This ensures that any tuples deleted prior to the index - * creation or re-indexing are not relevant for conflict detection in the - * current apply worker. + * An index may exclude deleted tuples if it was re-indexed or re-created + * during change application. Therefore, an index is considered usable only + * if the oldest_nonremovable_xid is greater than the index tuple's xmin. + * This ensures that any tuples deleted prior to the index creation or + * re-indexing are not relevant for conflict detection in the current apply + * worker. * - * Note that this might also exclude indexes that are updated due to other - * operations or without the CONCURRENTLY option. However, this is generally - * acceptable, as both the DDL commands that modify indexes and the need to scan - * dead tuples for the update_deleted are relatively rare occurrences. + * Note that indexes may also be excluded if they were modified by other DDL + * operations, such as ALTER INDEX. However, this is acceptable, as the + * likelihood of such DDL changes coinciding with the need to scan dead + * tuples for the update_deleted is low. */ static bool IsIndexUsableForFindingDeletedTuple(Oid localindexoid) @@ -3200,14 +3204,14 @@ IsIndexUsableForFindingDeletedTuple(Oid localindexoid) } /* - * Try to find a deleted tuple in the local relation that matching the values of - * the tuple received from the publication side (in 'remoteslot'). The function - * uses either replica identity index, primary key, index or if needed, - * sequential scan. + * Attempts to locate a deleted tuple in the local relation that matches the + * values of the tuple received from the publication side (in 'remoteslot'). + * The search is performed using either the replica identity index, primary + * key, other available index, or a sequential scan if necessary. * - * Return true if found the deleted tuple. The transaction ID, commit timestamp, - * and origin of the transaction for the deletion, if found, are - * stored in '*delete_xid', '*delete_origin', and '*delete_time' respectively. + * Returns true if the deleted tuple is found. If found, the transaction ID, + * origin, and commit timestamp of the deletion are stored in '*delete_xid', + * '*delete_origin', and '*delete_time' respectively. */ static bool FindDeletedTupleInLocalRel(Relation localrel, @@ -3218,13 +3222,12 @@ FindDeletedTupleInLocalRel(Relation localrel, TransactionId oldestxmin; /* - * Rather than using the conflict detection slot.xmin or invoking - * GetOldestNonRemovableTransactionId(), we directly use the - * oldest_nonremovable_xid maintained by this apply worker to identify - * recently deleted dead tuples for conflict detection. The - * oldest_nonremovable_xid is expected to be greater than or equal to - * other values and is more straightforward to minimize the range of dead - * tuples of interest for the current apply worker. + * Instead of relying on slot.xmin or invoking + * GetOldestNonRemovableTransactionId() for conflict detection, we use the + * oldest_nonremovable_xid maintained by this apply worker. This value will + * be greater than or equal to other thresholds and provides a more direct + * and efficient way to identify recently deleted dead tuples relevant to + * the current apply worker. */ oldestxmin = MyLogicalRepWorker->oldest_nonremovable_xid; @@ -3367,6 +3370,11 @@ apply_handle_tuple_routing(ApplyExecutionData *edata, ConflictType type; TupleTableSlot *newslot = localslot; + /* + * Detecting whether the tuple was recently deleted or + * never existed is crucial to avoid misleading the user + * during confict handling. + */ if (MySubscription->retaindeadtuples && FindDeletedTupleInLocalRel(partrel, part_entry->localindexoid, diff --git a/src/include/replication/worker_internal.h b/src/include/replication/worker_internal.h index 42f4fed38ff..7c0204dd6f4 100644 --- a/src/include/replication/worker_internal.h +++ b/src/include/replication/worker_internal.h @@ -87,9 +87,9 @@ typedef struct LogicalRepWorker bool parallel_apply; /* - * The changes made by this and later transactions shouldn't be removed. - * This allows the detection of update_deleted conflicts when applying - * changes in this logical replication worker. + * Changes made by this transaction and subsequent ones must be preserved. + * This ensures that update_deleted conflicts can be accurately detected + * during the apply phase of logical replication by this worker. * * The logical replication launcher manages an internal replication slot * named "pg_conflict_detection". It asynchronously collects this ID to diff --git a/src/test/subscription/t/035_conflicts.pl b/src/test/subscription/t/035_conflicts.pl index 0a4968f359d..36aeb14c563 100644 --- a/src/test/subscription/t/035_conflicts.pl +++ b/src/test/subscription/t/035_conflicts.pl @@ -273,7 +273,7 @@ $node_A->psql('postgres', ############################################################################### # Check that dead tuples on node A cannot be cleaned by VACUUM until the # concurrent transactions on Node B have been applied and flushed on Node A. -# And check that an update_deleted conflict is detected when updating a row +# Also, check that an update_deleted conflict is detected when updating a row # that was deleted by a different origin. ############################################################################### @@ -352,7 +352,8 @@ ok( $stderr =~ 'the deleted column is removed'); ############################################################################### -# Check that dead tuples can be found through a full table sequential scan +# Ensure that the deleted tuple needed to detect an update_deleted conflict is +# accessible via a sequential table scan. ############################################################################### # Drop the primary key from tab on node A and set REPLICA IDENTITY to FULL to