From 62335545e68ac14335467d3cfe5f4b3fdd2d5d25 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Wed, 20 May 2026 15:12:37 +0300 Subject: [PATCH v2] Stabilize 019_replslot_limit.pl after wait_for_catchup() semantic change wait_for_catchup() now returns as soon as the standby has replayed the target LSN locally, rather than waiting until the primary observes that position via pg_stat_replication. 019_replslot_limit.pl, however, checks primary-side pg_replication_slots state, which depends on the slot's restart_lsn -- and restart_lsn advances only after the primary's walsender processes a standby reply. The previous polling wait_for_catchup() implicitly waited for that round trip; the WAIT FOR LSN-based one does not, so the subtests "check that the slot state changes to 'extended' / 'unreserved'" become flappy (reproducible with small artificial delays in XLogWalRcvSendReply / ProcessStandbyReplyMessage). Replace each wait_for_catchup() in this test with wait_for_slot_catchup('rep1', 'restart', primary->lsn('write')). restart_lsn cannot move ahead of the standby's replayed position, so this single wait transitively covers both the standby replay and the primary's observation of it, which is exactly the precondition the slot-state assertions require. Reported-by: Alexander Lakhin Discussion: https://postgr.es/m/63f6abc9-c0ae-465d-a4e6-667eca6ea008@gmail.com Author: Xuneng Zhou Author: Alexander Korotkov --- src/test/recovery/t/019_replslot_limit.pl | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/test/recovery/t/019_replslot_limit.pl b/src/test/recovery/t/019_replslot_limit.pl index 7b253e64d9c..472aa07587f 100644 --- a/src/test/recovery/t/019_replslot_limit.pl +++ b/src/test/recovery/t/019_replslot_limit.pl @@ -44,8 +44,12 @@ $node_standby->append_conf('postgresql.conf', "primary_slot_name = 'rep1'"); $node_standby->start; -# Wait until standby has replayed enough data -$node_primary->wait_for_catchup($node_standby); +# Wait until the primary has processed standby feedback and advanced +# the slot's restart_lsn. restart_lsn moves only after the standby's +# reply reaches the walsender, so this transitively guarantees that +# the standby itself has replayed past the target LSN. +$node_primary->wait_for_slot_catchup('rep1', 'restart', + $node_primary->lsn('write')); # Stop standby $node_standby->stop; @@ -79,7 +83,8 @@ is($result, "reserved|t", 'check that slot is working'); # The standby can reconnect to primary $node_standby->start; -$node_primary->wait_for_catchup($node_standby); +$node_primary->wait_for_slot_catchup('rep1', 'restart', + $node_primary->lsn('write')); $node_standby->stop; @@ -109,7 +114,8 @@ is($result, "reserved", # The standby can reconnect to primary $node_standby->start; -$node_primary->wait_for_catchup($node_standby); +$node_primary->wait_for_slot_catchup('rep1', 'restart', + $node_primary->lsn('write')); $node_standby->stop; # wal_keep_size overrides max_slot_wal_keep_size @@ -128,7 +134,8 @@ $result = $node_primary->safe_psql('postgres', # The standby can reconnect to primary $node_standby->start; -$node_primary->wait_for_catchup($node_standby); +$node_primary->wait_for_slot_catchup('rep1', 'restart', + $node_primary->lsn('write')); $node_standby->stop; # Advance WAL again without checkpoint, reducing remain by 6 MB. @@ -155,7 +162,8 @@ is($result, "unreserved|t", # The standby still can connect to primary before a checkpoint $node_standby->start; -$node_primary->wait_for_catchup($node_standby); +$node_primary->wait_for_slot_catchup('rep1', 'restart', + $node_primary->lsn('write')); $node_standby->stop; -- 2.39.5 (Apple Git-154)