From 878988979952d0483d9b91626187537b1bf4f044 Mon Sep 17 00:00:00 2001 From: alterego655 <824662526@qq.com> Date: Wed, 17 Jun 2026 14:43:14 +0800 Subject: [PATCH v3] Avoid stale slot access after dropping obsolete synced slots drop_local_obsolete_slots() kept using local_slot after calling ReplicationSlotDropAcquired(). Once the drop completes, the slot array entry can be reused by another backend, so later reads of local_slot->data could refer to a different slot. Copy the slot name and database OID before dropping the slot, and use those saved values for unlocking and logging after the drop. Author: Xuneng Zhou Reviewed-by: Zhijie Hou Reviewed-by: Amit Kapila Reviewed-by: Fujii Masao --- src/backend/replication/logical/slotsync.c | 33 +++++++++++++++------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c index 96107c9475d..a22d0515d48 100644 --- a/src/backend/replication/logical/slotsync.c +++ b/src/backend/replication/logical/slotsync.c @@ -541,6 +541,7 @@ drop_local_obsolete_slots(List *remote_slot_list) /* Drop the local slot if it is not required to be retained. */ if (!local_sync_slot_required(local_slot, remote_slot_list)) { + Oid slot_database = local_slot->data.database; bool synced_slot; /* @@ -548,8 +549,8 @@ drop_local_obsolete_slots(List *remote_slot_list) * ReplicationSlotsDropDBSlots(), trying to drop the same slot * during a drop-database operation. */ - LockSharedObject(DatabaseRelationId, local_slot->data.database, - 0, AccessShareLock); + LockSharedObject(DatabaseRelationId, slot_database, 0, + AccessShareLock); /* * In the small window between getting the slot to drop and @@ -559,6 +560,16 @@ drop_local_obsolete_slots(List *remote_slot_list) * the same shared memory as that of 'local_slot'. Thus check if * local_slot is still the synced one before performing the actual * drop. + * + * We cannot close this window by holding + * ReplicationSlotControlLock while taking the database lock, + * because the database-drop path holds the database lock and then + * scans replication slots. Therefore, local_slot may already + * refer to a reused slot-array entry here, and fields such as + * name or database OID could already be stale. That could cause + * an incorrect cleanup decision for this cycle or briefly lock an + * unrelated database. We accept that risk because this race is + * rare and non-fatal. */ SpinLockAcquire(&local_slot->mutex); synced_slot = local_slot->in_use && local_slot->data.synced; @@ -566,23 +577,25 @@ drop_local_obsolete_slots(List *remote_slot_list) if (synced_slot) { + NameData slot_name = local_slot->data.name; + /* * Now acquire and drop the slot. Note we purposely don't * request logical decoding to be disabled here: since this is * a standby, which derives its logical decoding state from * the primary, it would be wrong to do so. */ - ReplicationSlotAcquire(NameStr(local_slot->data.name), true, false); + ReplicationSlotAcquire(NameStr(slot_name), true, false); ReplicationSlotDropAcquired(false); - } - UnlockSharedObject(DatabaseRelationId, local_slot->data.database, - 0, AccessShareLock); + ereport(LOG, + errmsg("dropped replication slot \"%s\" of database with OID %u", + NameStr(slot_name), + slot_database)); + } - ereport(LOG, - errmsg("dropped replication slot \"%s\" of database with OID %u", - NameStr(local_slot->data.name), - local_slot->data.database)); + UnlockSharedObject(DatabaseRelationId, slot_database, 0, + AccessShareLock); } } } -- 2.51.0