diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index b496b11b5a9..77e022dfb0a 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -1580,16 +1580,19 @@ ReplicationSlotReserveWal(void) Assert(!XLogRecPtrIsValid(slot->last_saved_restart_lsn)); /* + * The replication slot mechanism is used to prevent the removal of + * required WAL. + * * Acquire an exclusive lock to prevent the checkpoint process from - * concurrently calculating the minimum slot LSN (see - * CheckPointReplicationSlots), ensuring that the reserved WAL cannot be - * removed during a checkpoint. + * concurrently computing the minimum slot LSN (see + * CheckPointReplicationSlots). This ensures that the WAL reserved for + * replication cannot be removed during a checkpoint. * * The mechanism is reliable because if WAL reservation occurs first, the * checkpoint must wait for the restart_lsn update before determining the - * minimum non-removable LSN. On the other hand, if the checkpoint occurs - * first, subsequent WAL reservations must choose positions beyond or - * equal to the redo pointer of checkpoint. + * minimum non-removable LSN. On the other hand, if the checkpoint happens + * first, subsequent WAL reservations will select positions at or beyond + * the redo pointer of that checkpoint. */ LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE); @@ -1603,10 +1606,9 @@ ReplicationSlotReserveWal(void) * record. * * None of this is needed (or indeed helpful) for physical slots as - * they'll start replay at the last logged checkpoint anyway. Instead - * return the location of the last redo LSN. While that slightly increases - * the chance that we have to retry, it's where a base backup has to start - * replay at. + * they'll start replay at the last logged checkpoint anyway. Instead, + * return the location of the last redo LSN, where a base backup has to + * start replay at. */ if (SlotIsPhysical(slot)) restart_lsn = GetRedoRecPtr(); @@ -1622,6 +1624,7 @@ ReplicationSlotReserveWal(void) /* prevent WAL removal as fast as possible */ ReplicationSlotsComputeRequiredLSN(); + /* Checkpoint shouldn't remove the required WAL. */ XLByteToSeg(slot->data.restart_lsn, segno, wal_segment_size); if (XLogGetLastRemovedSegno() >= segno) elog(ERROR, "WAL required by replication slot %s has been removed concurrently", @@ -2139,8 +2142,8 @@ CheckPointReplicationSlots(bool is_shutdown) * Additionally, acquiring the Allocation lock is necessary to serialize * the slot flush process with concurrent slot WAL reservation. This * ensures that the WAL position being reserved is either flushed to disk - * or beyond or equal to the redo pointer (See ReplicationSlotReserveWal - * for details). + * or is beyond or equal to the redo pointer of the current checkpoint (See + * ReplicationSlotReserveWal for details). */ LWLockAcquire(ReplicationSlotAllocationLock, LW_SHARED);