From 77c3d2d361893de857627e036d0eaaf01cfe91c1 Mon Sep 17 00:00:00 2001 From: Julien Rouhaud Date: Fri, 14 Apr 2023 13:49:09 +0800 Subject: [PATCH v1] Always persist to disk logical slots during a shutdown checkpoint. It's entirely possible for a logical slot to have a confirmed_flush_lsn higher than the last value saved on disk while not being marked as dirty. It's currently not a problem to lose that value during a clean shutdown / restart cycle, but a later patch adding support for pg_upgrade of publications and logical slots will rely on that value being properly persisted to disk. Author: Julien Rouhaud Reviewed-by: FIXME Discussion: FIXME --- src/backend/access/transam/xlog.c | 2 +- src/backend/replication/slot.c | 26 ++++++++++++++++---------- src/include/replication/slot.h | 2 +- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index b540ee293b..8100ca656e 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7011,7 +7011,7 @@ static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags) { CheckPointRelationMap(); - CheckPointReplicationSlots(); + CheckPointReplicationSlots(flags & CHECKPOINT_IS_SHUTDOWN); CheckPointSnapBuild(); CheckPointLogicalRewriteHeap(); CheckPointReplicationOrigin(); diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index 8021aaa0a8..2bbf2af770 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -109,7 +109,8 @@ static void ReplicationSlotDropPtr(ReplicationSlot *slot); /* internal persistency functions */ static void RestoreSlotFromDisk(const char *name); static void CreateSlotOnDisk(ReplicationSlot *slot); -static void SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel); +static void SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel, + bool is_shutdown); /* * Report shared-memory space needed by ReplicationSlotsShmemInit. @@ -783,7 +784,7 @@ ReplicationSlotSave(void) Assert(MyReplicationSlot != NULL); sprintf(path, "pg_replslot/%s", NameStr(MyReplicationSlot->data.name)); - SaveSlotToPath(MyReplicationSlot, path, ERROR); + SaveSlotToPath(MyReplicationSlot, path, ERROR, false); } /* @@ -1565,11 +1566,10 @@ restart: /* * Flush all replication slots to disk. * - * This needn't actually be part of a checkpoint, but it's a convenient - * location. + * is_shutdown is true in case of a shutdown checkpoint. */ void -CheckPointReplicationSlots(void) +CheckPointReplicationSlots(bool is_shutdown) { int i; @@ -1594,7 +1594,7 @@ CheckPointReplicationSlots(void) /* save the slot to disk, locking is handled in SaveSlotToPath() */ sprintf(path, "pg_replslot/%s", NameStr(s->data.name)); - SaveSlotToPath(s, path, LOG); + SaveSlotToPath(s, path, LOG, is_shutdown); } LWLockRelease(ReplicationSlotAllocationLock); } @@ -1700,7 +1700,7 @@ CreateSlotOnDisk(ReplicationSlot *slot) /* Write the actual state file. */ slot->dirty = true; /* signal that we really need to write */ - SaveSlotToPath(slot, tmppath, ERROR); + SaveSlotToPath(slot, tmppath, ERROR, false); /* Rename the directory into place. */ if (rename(tmppath, path) != 0) @@ -1726,7 +1726,8 @@ CreateSlotOnDisk(ReplicationSlot *slot) * Shared functionality between saving and creating a replication slot. */ static void -SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel) +SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel, + bool is_shutdown) { char tmppath[MAXPGPATH]; char path[MAXPGPATH]; @@ -1740,8 +1741,13 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel) slot->just_dirtied = false; SpinLockRelease(&slot->mutex); - /* and don't do anything if there's nothing to write */ - if (!was_dirty) + /* + * and don't do anything if there's nothing to write, unless it's this is + * called for a logical slot during a shutdown checkpoint, as we want to + * persist the confirmed_flush_lsn in that case, even if that's the only + * modification. + */ + if (!was_dirty && !is_shutdown && !SlotIsLogical(slot)) return; LWLockAcquire(&slot->io_in_progress_lock, LW_EXCLUSIVE); diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h index a8a89dc784..7ca37c9f70 100644 --- a/src/include/replication/slot.h +++ b/src/include/replication/slot.h @@ -241,7 +241,7 @@ extern void ReplicationSlotNameForTablesync(Oid suboid, Oid relid, char *syncslo extern void ReplicationSlotDropAtPubNode(WalReceiverConn *wrconn, char *slotname, bool missing_ok); extern void StartupReplicationSlots(void); -extern void CheckPointReplicationSlots(void); +extern void CheckPointReplicationSlots(bool is_shutdown); extern void CheckSlotRequirements(void); extern void CheckSlotPermissions(void); -- 2.37.0