From 68212bb8107a119ffe7f87113121230c87415b49 Mon Sep 17 00:00:00 2001 From: JoongHyuk Shin Date: Sun, 10 May 2026 20:22:15 +0900 Subject: [PATCH v4] Don't call ereport(ERROR) from recovery target GUC assign hooks The five recovery target GUC assign hooks (assign_recovery_target, assign_recovery_target_lsn, assign_recovery_target_name, assign_recovery_target_time, assign_recovery_target_xid) all called error_multiple_recovery_targets(), which invoked ereport(ERROR). The GUC README explicitly states that assign hooks must never fail; raising an error from an assign hook leaves guc.c's internal state inconsistent before the abort. A comment in the code itself acknowledged this as "broken by design." Fix this by removing the conflict check from all five assign hooks and detecting multiple recovery targets in a new CheckRecoveryTargetConflicts() function, called from validateRecoveryParameters() before its early return for !ArchiveRecoveryRequested. The check therefore runs at every startup regardless of recovery mode, preserving the existing behavior of detecting misconfiguration at server start time (rather than only when recovery.signal is added). In each assign hook's empty-string branch, replace the unconditional "recoveryTarget = RECOVERY_TARGET_UNSET" assignment with a narrower "if (recoveryTarget == MY_TYPE)" form. Empty strings for an unrelated recovery_target_* GUC then leave another GUC's already-set target intact (this fixes the cross-GUC clobber raised in v1 review), while still preserving the documented "last value wins" reassignment semantics for the same GUC: assigning a recovery target and then reassigning the same parameter to an empty string unsets the target, matching how the assign hooks behaved before this patch and what 003_recovery_targets.pl already expected. --- src/backend/access/transam/xlogrecovery.c | 127 ++++++++------ src/test/recovery/t/003_recovery_targets.pl | 174 +++++++++++++++++++- 2 files changed, 252 insertions(+), 49 deletions(-) diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index c236e2b7969..953cdb48a3d 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -341,6 +341,7 @@ static void ApplyWalRecord(XLogReaderState *xlogreader, XLogRecord *record, Time static void EnableStandbyMode(void); static void readRecoverySignalFile(void); static void validateRecoveryParameters(void); +static void CheckRecoveryTargetConflicts(void); static bool read_backup_label(XLogRecPtr *checkPointLoc, TimeLineID *backupLabelTLI, bool *backupEndRequired, bool *backupFromStandby); @@ -1067,6 +1068,8 @@ readRecoverySignalFile(void) static void validateRecoveryParameters(void) { + CheckRecoveryTargetConflicts(); + if (!ArchiveRecoveryRequested) return; @@ -1144,6 +1147,63 @@ validateRecoveryParameters(void) } } +/* + * CheckRecoveryTargetConflicts + * + * Validate that at most one of the recovery_target_* GUCs is set to a + * non-empty value. This is called from validateRecoveryParameters() at every + * server startup, regardless of whether archive recovery is requested, so + * misconfiguration is detected at server start time rather than later when + * recovery.signal is added. + * + * The check is a separate function rather than inlined into + * validateRecoveryParameters() because it intentionally runs even when no + * recovery is requested, while the rest of validateRecoveryParameters() is + * recovery-mode-only. Keeping it as a named function makes that separation + * explicit. + * + * The check used to live in the assign hooks of the recovery_target_* GUCs + * (calling ereport(ERROR) on conflict), which violated guc.c's contract that + * assign hooks must never fail. Moving the check here keeps the assign hooks + * contract-compliant. + * + * If a future patch adds a sixth recovery_target_* GUC, both this list and + * the errdetail below must be updated. + */ +static void +CheckRecoveryTargetConflicts(void) +{ + int ntargets = 0; + const char *val; + + /* missing_ok=false guarantees val is non-NULL. */ + val = GetConfigOption("recovery_target", false, false); + if (val[0] != '\0') + ntargets++; + val = GetConfigOption("recovery_target_lsn", false, false); + if (val[0] != '\0') + ntargets++; + val = GetConfigOption("recovery_target_name", false, false); + if (val[0] != '\0') + ntargets++; + val = GetConfigOption("recovery_target_time", false, false); + if (val[0] != '\0') + ntargets++; + val = GetConfigOption("recovery_target_xid", false, false); + if (val[0] != '\0') + ntargets++; + + if (ntargets > 1) + ereport(FATAL, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("multiple recovery targets specified"), + errdetail("At most one of \"recovery_target\", " + "\"recovery_target_lsn\", " + "\"recovery_target_name\", " + "\"recovery_target_time\", " + "\"recovery_target_xid\" can be set."))); +} + /* * read_backup_label: check to see if a backup_label file is present * @@ -4764,32 +4824,27 @@ check_primary_slot_name(char **newval, void **extra, GucSource source) } /* - * Recovery target settings: Only one of the several recovery_target* settings - * may be set. Setting a second one results in an error. The global variable + * Recovery target settings: At most one of the several recovery_target* + * settings may be set to a non-empty value. The global variable * recoveryTarget tracks which kind of recovery target was chosen. Other * variables store the actual target value (for example a string or a xid). - * The assign functions of the parameters check whether a competing parameter - * was already set. But we want to allow setting the same parameter multiple - * times. We also want to allow unsetting a parameter and setting a different - * one, so we unset recoveryTarget when the parameter is set to an empty - * string. + * An empty string for any of these GUCs is treated as "not set", equivalent + * to the GUC's default; an empty value cannot clobber another GUC's + * already-set target. Conflicts between multiple non-empty settings are + * detected in CheckRecoveryTargetConflicts(), called from + * validateRecoveryParameters() at every startup. * - * XXX this code is broken by design. Throwing an error from a GUC assign - * hook breaks fundamental assumptions of guc.c. So long as all the variables - * for which this can happen are PGC_POSTMASTER, the consequences are limited, - * since we'd just abort postmaster startup anyway. Nonetheless it's likely - * that we have odd behaviors such as unexpected GUC ordering dependencies. + * Each assign hook clears recoveryTarget only when its own GUC is reassigned + * to an empty string after the same GUC was previously assigned a non-empty + * value, e.g. + * postgres -c recovery_target_xid=700 -c recovery_target_xid= + * (postgresql.conf collapses duplicate keys so only the last value reaches + * the assign hook; this same-parameter set-then-clear case only arises from + * -c). The clear is restricted to the hook's own target type so that an + * empty value for one recovery_target_* GUC cannot clobber another GUC's + * already-set target. */ -pg_noreturn static void -error_multiple_recovery_targets(void) -{ - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("multiple recovery targets specified"), - errdetail("At most one of \"recovery_target\", \"recovery_target_lsn\", \"recovery_target_name\", \"recovery_target_time\", \"recovery_target_xid\" may be set."))); -} - /* * GUC check_hook for recovery_target */ @@ -4810,13 +4865,9 @@ check_recovery_target(char **newval, void **extra, GucSource source) void assign_recovery_target(const char *newval, void *extra) { - if (recoveryTarget != RECOVERY_TARGET_UNSET && - recoveryTarget != RECOVERY_TARGET_IMMEDIATE) - error_multiple_recovery_targets(); - if (newval && strcmp(newval, "") != 0) recoveryTarget = RECOVERY_TARGET_IMMEDIATE; - else + else if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE) recoveryTarget = RECOVERY_TARGET_UNSET; } @@ -4851,16 +4902,12 @@ check_recovery_target_lsn(char **newval, void **extra, GucSource source) void assign_recovery_target_lsn(const char *newval, void *extra) { - if (recoveryTarget != RECOVERY_TARGET_UNSET && - recoveryTarget != RECOVERY_TARGET_LSN) - error_multiple_recovery_targets(); - if (newval && strcmp(newval, "") != 0) { recoveryTarget = RECOVERY_TARGET_LSN; recoveryTargetLSN = *((XLogRecPtr *) extra); } - else + else if (recoveryTarget == RECOVERY_TARGET_LSN) recoveryTarget = RECOVERY_TARGET_UNSET; } @@ -4886,16 +4933,12 @@ check_recovery_target_name(char **newval, void **extra, GucSource source) void assign_recovery_target_name(const char *newval, void *extra) { - if (recoveryTarget != RECOVERY_TARGET_UNSET && - recoveryTarget != RECOVERY_TARGET_NAME) - error_multiple_recovery_targets(); - if (newval && strcmp(newval, "") != 0) { recoveryTarget = RECOVERY_TARGET_NAME; recoveryTargetName = newval; } - else + else if (recoveryTarget == RECOVERY_TARGET_NAME) recoveryTarget = RECOVERY_TARGET_UNSET; } @@ -4966,13 +5009,9 @@ check_recovery_target_time(char **newval, void **extra, GucSource source) void assign_recovery_target_time(const char *newval, void *extra) { - if (recoveryTarget != RECOVERY_TARGET_UNSET && - recoveryTarget != RECOVERY_TARGET_TIME) - error_multiple_recovery_targets(); - if (newval && strcmp(newval, "") != 0) recoveryTarget = RECOVERY_TARGET_TIME; - else + else if (recoveryTarget == RECOVERY_TARGET_TIME) recoveryTarget = RECOVERY_TARGET_UNSET; } @@ -5094,15 +5133,11 @@ check_recovery_target_xid(char **newval, void **extra, GucSource source) void assign_recovery_target_xid(const char *newval, void *extra) { - if (recoveryTarget != RECOVERY_TARGET_UNSET && - recoveryTarget != RECOVERY_TARGET_XID) - error_multiple_recovery_targets(); - if (newval && strcmp(newval, "") != 0) { recoveryTarget = RECOVERY_TARGET_XID; recoveryTargetXid = *((TransactionId *) extra); } - else + else if (recoveryTarget == RECOVERY_TARGET_XID) recoveryTarget = RECOVERY_TARGET_UNSET; } diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl index 047eb13293a..5979663b0ab 100644 --- a/src/test/recovery/t/003_recovery_targets.pl +++ b/src/test/recovery/t/003_recovery_targets.pl @@ -51,6 +51,49 @@ sub test_recovery_standby return; } +# Start a standby with the given pg_ctl --options string and verify that +# the standby reaches the given LSN and row count. Used to exercise +# scenarios that require the postmaster command line to receive multiple +# "-c name=value" instances of the same GUC, which postgresql.conf cannot +# express because ProcessConfigFile collapses duplicate keys. +sub test_recovery_standby_with_options +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my $test_name = shift; + my $node_name = shift; + my $node_primary = shift; + my $options = shift; + my $num_rows = shift; + my $until_lsn = shift; + + my $node_standby = PostgreSQL::Test::Cluster->new($node_name); + $node_standby->init_from_backup($node_primary, 'my_backup', + has_restoring => 1); + + my $res = run_log( + [ + 'pg_ctl', + '--pgdata' => $node_standby->data_dir, + '--log' => $node_standby->logfile, + '--options' => $options, + 'start', + ]); + ok($res, "server starts for $test_name"); + + $node_standby->poll_query_until('postgres', + "SELECT '$until_lsn'::pg_lsn <= pg_last_wal_replay_lsn()") + or die "Timed out while waiting for standby to catch up"; + + my $count = $node_standby->safe_psql('postgres', + "SELECT count(*) FROM tab_int"); + is($count, qq($num_rows), "check standby content for $test_name"); + + $node_standby->teardown_node; + + return; +} + # Initialize primary node my $node_primary = PostgreSQL::Test::Cluster->new('primary'); $node_primary->init(has_archiving => 1, allows_streaming => 1); @@ -108,6 +151,13 @@ $node_primary->safe_psql('postgres', # Force archiving of WAL file $node_primary->safe_psql('postgres', "SELECT pg_switch_wal()"); +# LSN after the final 6000-row insert and WAL switch. Used by the +# set-then-cleared scenarios below where recovery has no target and must +# replay all archived WAL; polling on $lsn5 would race against the 5001-6000 +# rows. +my $lsn6 = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn()"); + # Test recovery targets my @recovery_params = ("recovery_target = 'immediate'"); test_recovery_standby('immediate target', @@ -125,11 +175,24 @@ test_recovery_standby('name', 'standby_4', $node_primary, \@recovery_params, test_recovery_standby('LSN', 'standby_5', $node_primary, \@recovery_params, "5000", $lsn5); +# Regression: empty-string for one recovery_target_* GUC must not clobber +# another non-empty target. Setting recovery_target_xid + recovery_target_time +# = '' must recover to the xid, not run as no-target recovery. +@recovery_params = ( + "recovery_target_xid = '$recovery_txid'", + "recovery_target_time = ''"); +test_recovery_standby('xid with empty time GUC', + 'standby_xid_empty_time', $node_primary, \@recovery_params, + "2000", $lsn2); + # Multiple targets # -# Multiple conflicting settings are not allowed, but setting the same -# parameter multiple times or unsetting a parameter and setting a -# different one is allowed. +# Multiple conflicting non-empty settings are not allowed. Setting the same +# parameter multiple times is allowed (the last value wins, per ProcessConfigFile +# duplicate handling). An empty string for a recovery_target_* GUC is treated +# as the GUC's default and is a no-op; it does not clear any other already-set +# target. Conflict detection runs in CheckRecoveryTargetConflicts() at every +# server start, regardless of whether recovery is requested. @recovery_params = ( "recovery_target_name = '$recovery_name'", @@ -190,6 +253,111 @@ like( qr/FATAL: .* recovery ended before configured recovery target was reached/, 'recovery end before target reached is a fatal error'); +# Conflicting recovery targets are rejected at every startup, regardless of +# whether recovery.signal is present. Use a throwaway cluster initialized +# from the existing backup so we can leave conflicting GUCs in its +# postgresql.conf without polluting the primary used by later tests. +# init_from_backup is called without has_restoring, so no recovery.signal +# is created and the cluster would normally start as a plain primary; the +# conflicting recovery_target_* GUCs must be rejected anyway. +my $node_no_signal = PostgreSQL::Test::Cluster->new('multi_target_no_signal'); +$node_no_signal->init_from_backup($node_primary, 'my_backup'); +$node_no_signal->append_conf( + 'postgresql.conf', "recovery_target_name = '$recovery_name' +recovery_target_time = '$recovery_time'"); + +my $res_no_signal = run_log( + [ + 'pg_ctl', + '--pgdata' => $node_no_signal->data_dir, + '--log' => $node_no_signal->logfile, + 'start', + ]); +ok(!$res_no_signal, + 'server fails to start with conflicting recovery targets and no recovery.signal'); + +my $logfile_no_signal = slurp_file($node_no_signal->logfile()); +like( + $logfile_no_signal, + qr/multiple recovery targets specified/, + 'expected error message logged without recovery.signal'); + +# Same-GUC last-wins (one source of truth for the GUC's value): assigning a +# recovery_target_* GUC and then assigning the same GUC to an empty string +# leaves no target set and recovery proceeds to the end of WAL. This is the +# "set then unset" form of GUC reassignment; the assign hook clears its own +# type when the new value is empty. postgresql.conf cannot express duplicate +# keys (ProcessConfigFile collapses them), so the postmaster command line is +# used via "pg_ctl --options". Each of the five recovery_target_* assign +# hooks is exercised once. +# +# Note: GUC values below are passed unquoted on the command line. Windows +# cmd.exe does not strip single quotes the way POSIX shells do, so quoted +# values would reach the postmaster verbatim and be rejected. recovery_time +# from now() contains a space; we replace it with the ISO 8601 T separator +# so the value is a single shell token without quoting. +my $recovery_time_t = $recovery_time; +$recovery_time_t =~ s/ /T/; + +test_recovery_standby_with_options( + 'recovery_target_xid set then cleared', + 'standby_xid_set_clear', $node_primary, + "-c recovery_target_xid=$recovery_txid -c recovery_target_xid=", + "6000", $lsn6); + +test_recovery_standby_with_options( + 'recovery_target_time set then cleared', + 'standby_time_set_clear', $node_primary, + "-c recovery_target_time=$recovery_time_t -c recovery_target_time=", + "6000", $lsn6); + +test_recovery_standby_with_options( + 'recovery_target_name set then cleared', + 'standby_name_set_clear', $node_primary, + "-c recovery_target_name=$recovery_name -c recovery_target_name=", + "6000", $lsn6); + +test_recovery_standby_with_options( + 'recovery_target_lsn set then cleared', + 'standby_lsn_set_clear', $node_primary, + "-c recovery_target_lsn=$recovery_lsn -c recovery_target_lsn=", + "6000", $lsn6); + +test_recovery_standby_with_options( + 'recovery_target set then cleared', + 'standby_immediate_set_clear', $node_primary, + "-c recovery_target=immediate -c recovery_target=", + "6000", $lsn6); + +# Same-GUC empty-then-set sanity: assigning an empty string and then a +# non-empty value to the same GUC must end with the target set. The empty +# value seen first must not poison a later non-empty assignment. +my $node_xid_clear_set = + PostgreSQL::Test::Cluster->new('standby_xid_clear_set'); +$node_xid_clear_set->init_from_backup($node_primary, 'my_backup', + has_restoring => 1); + +my $res_xid_clear_set = run_log( + [ + 'pg_ctl', + '--pgdata' => $node_xid_clear_set->data_dir, + '--log' => $node_xid_clear_set->logfile, + '--options' => + "-c recovery_target_xid= -c recovery_target_xid=$recovery_txid", + 'start', + ]); +ok($res_xid_clear_set, + 'server starts with recovery_target_xid cleared then set'); + +$node_xid_clear_set->poll_query_until('postgres', + "SELECT '$lsn2'::pg_lsn <= pg_last_wal_replay_lsn()") + or die "Timed out while waiting for standby to catch up"; +my $count_xid_clear_set = $node_xid_clear_set->safe_psql('postgres', + "SELECT count(*) FROM tab_int"); +is($count_xid_clear_set, "2000", + 'recovery_target_xid honored when cleared then set'); +$node_xid_clear_set->teardown_node; + # Invalid recovery_target_timeline tests my ($result, $stdout, $stderr) = $node_primary->psql('postgres', "ALTER SYSTEM SET recovery_target_timeline TO 'bogus'"); -- 2.52.0