From 04a22ae21878fb6c1412f570107b2e71917c9140 Mon Sep 17 00:00:00 2001 From: Joao Foltran Date: Thu, 19 Mar 2026 12:30:06 -0300 Subject: [PATCH v3 1/5] Add auto-revalidation infrastructure for physical replication slots Physical replication slots that are invalidated (e.g., due to WAL removal or idle timeout) currently cannot be reacquired, requiring manual slot recreation. This patch adds the infrastructure for automatic revalidation of physical slots after a standby reconnects and confirms WAL receipt. A new per-slot persistent field 'auto_revalidate' (default: false) controls whether a physical slot is eligible for revalidation. When enabled, the slot can be acquired despite being invalidated, and the invalidation is cleared atomically (under spinlock) with the restart_lsn update upon receiving the first flush ACK from the standby. Only RS_INVAL_WAL_REMOVED and RS_INVAL_IDLE_TIMEOUT are revalidatable via an explicit allowlist, so future invalidation reasons are not automatically eligible. This patch adds the field and revalidation logic but does not yet provide a way to set auto_revalidate=true; that will be added in a subsequent patch. Bump SLOT_VERSION from 5 to 6 for the new persistent field. --- src/backend/replication/slot.c | 2 +- src/backend/replication/walsender.c | 52 ++++++++++++++++++++++++++++- src/include/replication/slot.h | 24 +++++++++++++ 3 files changed, 76 insertions(+), 2 deletions(-) diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index d7fb9f5a67f..7d00cf71481 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -141,7 +141,7 @@ StaticAssertDecl(lengthof(SlotInvalidationCauses) == (RS_INVAL_MAX_CAUSES + 1), sizeof(ReplicationSlotOnDisk) - ReplicationSlotOnDiskConstantSize #define SLOT_MAGIC 0x1051CA1 /* format identifier */ -#define SLOT_VERSION 5 /* version for new files */ +#define SLOT_VERSION 6 /* version for new files */ /* Control array for replication slot management */ ReplicationSlotCtlData *ReplicationSlotCtl = NULL; diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 04aa770d981..09291f7f0a6 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -871,12 +871,34 @@ StartReplication(StartReplicationCmd *cmd) if (cmd->slotname) { - ReplicationSlotAcquire(cmd->slotname, true, true); + ReplicationSlotAcquire(cmd->slotname, true, false); if (SlotIsLogical(MyReplicationSlot)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("cannot use a logical replication slot for physical replication"))); + /* + * Check if the slot is invalidated. Physical slots with + * auto_revalidate can proceed -- they will be revalidated once the + * standby confirms WAL receipt. All other invalidated slots must + * error out as before. + */ + if (!SlotIsValid(MyReplicationSlot)) + { + if (SlotCanBeRevalidated(MyReplicationSlot)) + ereport(WARNING, + errmsg("replication slot \"%s\" is invalidated due to \"%s\", will attempt revalidation", + NameStr(MyReplicationSlot->data.name), + GetSlotInvalidationCauseName(MyReplicationSlot->data.invalidated))); + else + ereport(ERROR, + errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("can no longer access replication slot \"%s\"", + NameStr(MyReplicationSlot->data.name)), + errdetail("This replication slot has been invalidated due to \"%s\".", + GetSlotInvalidationCauseName(MyReplicationSlot->data.invalidated))); + } + /* * We don't need to verify the slot's restart_lsn here; instead we * rely on the caller requesting the starting point to use. If the @@ -2472,6 +2494,7 @@ static void PhysicalConfirmReceivedLocation(XLogRecPtr lsn) { bool changed = false; + bool revalidated = false; ReplicationSlot *slot = MyReplicationSlot; Assert(XLogRecPtrIsValid(lsn)); @@ -2481,6 +2504,19 @@ PhysicalConfirmReceivedLocation(XLogRecPtr lsn) changed = true; slot->data.restart_lsn = lsn; } + + /* + * If the slot is invalidated and eligible for auto-revalidation, clear + * the invalidation now that the standby has confirmed WAL receipt. Both + * restart_lsn and invalidated must be updated under the same spinlock to + * stay atomic with respect to ReplicationSlotsComputeRequiredLSN(). + */ + if (SlotCanBeRevalidated(slot)) + { + slot->data.invalidated = RS_INVAL_NONE; + changed = true; + revalidated = true; + } SpinLockRelease(&slot->mutex); if (changed) @@ -2490,6 +2526,20 @@ PhysicalConfirmReceivedLocation(XLogRecPtr lsn) PhysicalWakeupLogicalWalSnd(); } + /* + * Persist the revalidation to disk immediately so the cleared state + * survives a crash. Normal restart_lsn updates are not saved here + * (the comment below explains why), but a revalidation is a significant + * one-time state change worth persisting right away. + */ + if (revalidated) + { + ReplicationSlotSave(); + ereport(LOG, + errmsg("physical replication slot \"%s\" has been revalidated", + NameStr(slot->data.name))); + } + /* * One could argue that the slot should be saved to disk now, but that'd * be energy wasted - the worst thing lost information could cause here is diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h index 9b29444cbca..ae665b8dbb1 100644 --- a/src/include/replication/slot.h +++ b/src/include/replication/slot.h @@ -159,6 +159,13 @@ typedef struct ReplicationSlotPersistentData * for logical slots on the primary server. */ bool failover; + + /* + * If true, an invalidated physical slot may be automatically revalidated + * once the standby reconnects and confirms WAL receipt (flush ACK). + * Only applicable to physical slots; ignored for logical slots. + */ + bool auto_revalidate; } ReplicationSlotPersistentData; /* @@ -286,6 +293,23 @@ typedef struct ReplicationSlot #define SlotIsPhysical(slot) ((slot)->data.database == InvalidOid) #define SlotIsLogical(slot) ((slot)->data.database != InvalidOid) +#define SlotIsValid(slot) ((slot)->data.invalidated == RS_INVAL_NONE) + +/* + * Can this slot be automatically revalidated? + * + * Only physical slots with auto_revalidate enabled and invalidated by + * an explicitly supported reason are eligible. New invalidation reasons + * must be added here to become revalidatable. + */ +static inline bool +SlotCanBeRevalidated(ReplicationSlot *s) +{ + return SlotIsPhysical(s) && + s->data.auto_revalidate && + (s->data.invalidated == RS_INVAL_WAL_REMOVED || + s->data.invalidated == RS_INVAL_IDLE_TIMEOUT); +} /* * Shared memory control area for all of replication slots. base-commit: fb23cc7e81db181bfb3dcfed6ad0731a3473d4e1 -- 2.50.1 (Apple Git-155)