From a2c2c892f92ddf0fe081a4ae3c38533727d2ffee Mon Sep 17 00:00:00 2001 From: alterego655 <824662526@qq.com> Date: Fri, 15 May 2026 13:12:00 +0800 Subject: [PATCH v2] Bound memory usage during manual slot sync retries pg_sync_replication_slots() retries inside a single SQL function call, unlike the slotsync worker, whose cycles are separated by transaction boundaries. Per-cycle allocations made while fetching and synchronizing remote slots could therefore accumulate until the function returned. The expected impact is modest for typical slot counts, but the growth is proportional to the number of retries and remote slots, and the function may wait for an extended period. Use a short-lived memory context for each retry cycle so this memory is reclaimed before the next attempt. Also drop tuple slots created with MakeSingleTupleTableSlot() before clearing the walreceiver result, and free the temporary list of borrowed ReplicationSlot pointers built by drop_local_obsolete_slots(). --- src/backend/replication/logical/slotsync.c | 23 ++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c index ad3747e598c..8b709d2b085 100644 --- a/src/backend/replication/logical/slotsync.c +++ b/src/backend/replication/logical/slotsync.c @@ -1016,6 +1016,7 @@ fetch_remote_slots(WalReceiverConn *wrconn, List *slot_names) ExecClearTuple(tupslot); } + ExecDropSingleTupleTableSlot(tupslot); walrcv_clear_result(res); return remote_slot_list; @@ -1135,7 +1136,7 @@ validate_remote_info(WalReceiverConn *wrconn) errmsg("replication slot \"%s\" specified by \"%s\" does not exist on primary server", PrimarySlotName, "primary_slot_name")); - ExecClearTuple(tupslot); + ExecDropSingleTupleTableSlot(tupslot); walrcv_clear_result(res); if (started_tx) @@ -2006,16 +2007,22 @@ SyncReplicationSlots(WalReceiverConn *wrconn) { List *remote_slots = NIL; List *slot_names = NIL; /* List of slot names to track */ + MemoryContext cycle_ctx; check_and_set_sync_info(MyProcPid); validate_remote_info(wrconn); + cycle_ctx = AllocSetContextCreate(CurrentMemoryContext, + "Slot sync cycle context", + ALLOCSET_DEFAULT_SIZES); + /* Retry until all the slots are sync-ready */ for (;;) { bool slot_persistence_pending = false; bool some_slot_updated = false; + MemoryContext oldctx; /* Check for interrupts and config changes */ CHECK_FOR_INTERRUPTS(); @@ -2026,6 +2033,9 @@ SyncReplicationSlots(WalReceiverConn *wrconn) /* We must be in a valid transaction state */ Assert(IsTransactionState()); + MemoryContextReset(cycle_ctx); + oldctx = MemoryContextSwitchTo(cycle_ctx); + /* * Fetch remote slot info for the given slot_names. If slot_names * is NIL, fetch all failover-enabled slots. Note that we reuse @@ -2042,6 +2052,12 @@ SyncReplicationSlots(WalReceiverConn *wrconn) some_slot_updated = synchronize_slots(wrconn, remote_slots, &slot_persistence_pending); + /* + * slot_names must survive later cycle_ctx resets, so copy it in + * the outer context. + */ + MemoryContextSwitchTo(oldctx); + /* * If slot_persistence_pending is true, extract slot names for * future iterations (only needed if we haven't done it yet) @@ -2049,9 +2065,6 @@ SyncReplicationSlots(WalReceiverConn *wrconn) if (slot_names == NIL && slot_persistence_pending) slot_names = extract_slot_names(remote_slots); - /* Free the current remote_slots list */ - list_free_deep(remote_slots); - /* Done if all slots are persisted i.e are sync-ready */ if (!slot_persistence_pending) break; @@ -2060,6 +2073,8 @@ SyncReplicationSlots(WalReceiverConn *wrconn) wait_for_slot_activity(some_slot_updated); } + MemoryContextDelete(cycle_ctx); + if (slot_names) list_free_deep(slot_names); -- 2.51.0