From cbbe124cd5a397764721f48e27de4020f8b46603 Mon Sep 17 00:00:00 2001 From: alterego655 <824662526@qq.com> Date: Tue, 26 May 2026 09:37:24 +0800 Subject: [PATCH v3 1/2] Stabilize 019_replslot_limit.pl after wait_for_catchup() semantic change wait_for_catchup() now returns as soon as the standby has replayed the target LSN locally, rather than waiting until the primary observes that position via pg_stat_replication. 019_replslot_limit.pl, however, checks primary-side pg_replication_slots state, which depends on the slot's restart_lsn -- and restart_lsn advances only after the primary's walsender processes a standby reply. The previous polling wait_for_catchup() implicitly waited for that round trip; the WAIT FOR LSN-based one does not, so the subtests "check that the slot state changes to 'extended' / 'unreserved'" become flappy (reproducible with small artificial delays in XLogWalRcvSendReply / ProcessStandbyReplyMessage). Replace each wait_for_catchup() in this test with wait_for_slot_catchup('rep1', 'restart', primary->lsn('write')). restart_lsn cannot move ahead of the standby's replayed position, so this single wait transitively covers both the standby replay and the primary's observation of it, which is exactly the precondition the slot-state assertions require. Reported-by: Alexander Lakhin Discussion: https://postgr.es/m/63f6abc9-c0ae-465d-a4e6-667eca6ea008@gmail.com Author: Xuneng Zhou Author: Alexander Korotkov --- src/test/recovery/t/019_replslot_limit.pl | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/test/recovery/t/019_replslot_limit.pl b/src/test/recovery/t/019_replslot_limit.pl index 7b253e64d9c..882ffb66550 100644 --- a/src/test/recovery/t/019_replslot_limit.pl +++ b/src/test/recovery/t/019_replslot_limit.pl @@ -44,8 +44,12 @@ $node_standby->append_conf('postgresql.conf', "primary_slot_name = 'rep1'"); $node_standby->start; -# Wait until standby has replayed enough data -$node_primary->wait_for_catchup($node_standby); +# Wait until the primary has processed standby feedback and advanced the +# slot's restart_lsn. For a physical slot, restart_lsn is updated from +# the standby's reported flush position, so this waits for the primary-side +# slot state that the following wal_status checks depend on. +$node_primary->wait_for_slot_catchup('rep1', 'restart', + $node_primary->lsn('write')); # Stop standby $node_standby->stop; @@ -79,7 +83,8 @@ is($result, "reserved|t", 'check that slot is working'); # The standby can reconnect to primary $node_standby->start; -$node_primary->wait_for_catchup($node_standby); +$node_primary->wait_for_slot_catchup('rep1', 'restart', + $node_primary->lsn('write')); $node_standby->stop; @@ -109,7 +114,8 @@ is($result, "reserved", # The standby can reconnect to primary $node_standby->start; -$node_primary->wait_for_catchup($node_standby); +$node_primary->wait_for_slot_catchup('rep1', 'restart', + $node_primary->lsn('write')); $node_standby->stop; # wal_keep_size overrides max_slot_wal_keep_size @@ -128,7 +134,8 @@ $result = $node_primary->safe_psql('postgres', # The standby can reconnect to primary $node_standby->start; -$node_primary->wait_for_catchup($node_standby); +$node_primary->wait_for_slot_catchup('rep1', 'restart', + $node_primary->lsn('write')); $node_standby->stop; # Advance WAL again without checkpoint, reducing remain by 6 MB. @@ -155,7 +162,8 @@ is($result, "unreserved|t", # The standby still can connect to primary before a checkpoint $node_standby->start; -$node_primary->wait_for_catchup($node_standby); +$node_primary->wait_for_slot_catchup('rep1', 'restart', + $node_primary->lsn('write')); $node_standby->stop; -- 2.51.0