From 3638a7ba5037c46e0106989a6679906c03a2fc6b Mon Sep 17 00:00:00 2001 From: JoongHyuk Shin Date: Sun, 21 Jun 2026 17:45:25 +0900 Subject: [PATCH] Don't call ereport(ERROR) from recovery target GUC assign hooks The five recovery target GUC assign hooks (assign_recovery_target, assign_recovery_target_lsn, assign_recovery_target_name, assign_recovery_target_time, assign_recovery_target_xid) all called error_multiple_recovery_targets(), which invoked ereport(ERROR). The GUC README explicitly states that assign hooks must never fail; raising an error from an assign hook leaves guc.c's internal state inconsistent before the abort. A comment in the code itself acknowledged this as "broken by design." Fix this by removing the conflict check from all five assign hooks and detecting multiple recovery targets in a new CheckRecoveryTargetConflicts() function, called from validateRecoveryParameters() before its early return for !ArchiveRecoveryRequested. The check therefore runs at every startup regardless of recovery mode, preserving the existing behavior of detecting misconfiguration at server start time rather than only when recovery.signal is added. In each assign hook's empty-string branch, replace the unconditional "recoveryTarget = RECOVERY_TARGET_UNSET" assignment with a narrower "if (recoveryTarget == MY_TYPE)" form. Empty strings for an unrelated recovery_target_* GUC then leave another GUC's already-set target intact, while still preserving the documented "last value wins" reassignment semantics for the same GUC. When a conflict is detected, the errdetail now lists which recovery_target_* parameters are actually set, rather than the full list of candidate names, and an errhint points to pg_settings for their values and where each is configured. --- src/backend/access/transam/xlogrecovery.c | 159 +++++++++---- src/test/recovery/t/003_recovery_targets.pl | 251 +++++++++++++++++++- 2 files changed, 361 insertions(+), 49 deletions(-) diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index 4d61795b483..dc8b46a6c81 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -61,6 +61,7 @@ #include "storage/subsystems.h" #include "utils/datetime.h" #include "utils/fmgrprotos.h" +#include "utils/guc.h" #include "utils/guc_hooks.h" #include "utils/pgstat_internal.h" #include "utils/pg_lsn.h" @@ -341,6 +342,7 @@ static void ApplyWalRecord(XLogReaderState *xlogreader, XLogRecord *record, Time static void EnableStandbyMode(void); static void readRecoverySignalFile(void); static void validateRecoveryParameters(void); +static void CheckRecoveryTargetConflicts(void); static bool read_backup_label(XLogRecPtr *checkPointLoc, TimeLineID *backupLabelTLI, bool *backupEndRequired, bool *backupFromStandby); @@ -1067,6 +1069,8 @@ readRecoverySignalFile(void) static void validateRecoveryParameters(void) { + CheckRecoveryTargetConflicts(); + if (!ArchiveRecoveryRequested) return; @@ -1144,6 +1148,94 @@ validateRecoveryParameters(void) } } +/* + * CheckRecoveryTargetConflicts + * + * Validate that at most one of the recovery_target_* GUCs is set to a + * non-empty value. This is called from validateRecoveryParameters() at every + * server startup, regardless of whether archive recovery is requested, so + * misconfiguration is detected at server start time rather than later when + * recovery.signal is added. + * + * The check is a separate function rather than inlined into + * validateRecoveryParameters() because it intentionally runs even when no + * recovery is requested, while the rest of validateRecoveryParameters() is + * recovery-mode-only. Keeping it as a named function makes that separation + * explicit. + * + * The check used to live in the assign hooks of the recovery_target_* GUCs + * (calling ereport(ERROR) on conflict), which violated guc.c's contract that + * assign hooks must never fail. Moving the check here keeps the assign hooks + * contract-compliant. + * + * If a future patch adds a sixth recovery_target_* GUC, it must be added to the + * list of GetConfigOption() checks below; the errdetail builds its list of set + * parameters dynamically, so it needs no change. + */ +static void +CheckRecoveryTargetConflicts(void) +{ + int ntargets = 0; + const char *val; + StringInfoData buf; + + initStringInfo(&buf); + + /* + * These GUCs are all PGC_STRING, so GetConfigOption() returns "" (not + * NULL) when a parameter is unset. Collect the name of each one that is + * set into buf, separated by ", ", for use in the errdetail below. + */ + val = GetConfigOption("recovery_target", false, false); + if (val[0] != '\0') + { + ntargets++; + if (buf.len > 0) + appendStringInfoString(&buf, ", "); + appendStringInfoString(&buf, "\"recovery_target\""); + } + val = GetConfigOption("recovery_target_lsn", false, false); + if (val[0] != '\0') + { + ntargets++; + if (buf.len > 0) + appendStringInfoString(&buf, ", "); + appendStringInfoString(&buf, "\"recovery_target_lsn\""); + } + val = GetConfigOption("recovery_target_name", false, false); + if (val[0] != '\0') + { + ntargets++; + if (buf.len > 0) + appendStringInfoString(&buf, ", "); + appendStringInfoString(&buf, "\"recovery_target_name\""); + } + val = GetConfigOption("recovery_target_time", false, false); + if (val[0] != '\0') + { + ntargets++; + if (buf.len > 0) + appendStringInfoString(&buf, ", "); + appendStringInfoString(&buf, "\"recovery_target_time\""); + } + val = GetConfigOption("recovery_target_xid", false, false); + if (val[0] != '\0') + { + ntargets++; + if (buf.len > 0) + appendStringInfoString(&buf, ", "); + appendStringInfoString(&buf, "\"recovery_target_xid\""); + } + + if (ntargets > 1) + ereport(FATAL, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("multiple recovery targets specified"), + errdetail("Recovery targets %s are set, but only one can be set.", + buf.data), + errhint("See pg_settings for the parameter values and where each is set."))); +} + /* * read_backup_label: check to see if a backup_label file is present * @@ -4769,32 +4861,27 @@ check_primary_slot_name(char **newval, void **extra, GucSource source) } /* - * Recovery target settings: Only one of the several recovery_target* settings - * may be set. Setting a second one results in an error. The global variable + * Recovery target settings: At most one of the several recovery_target* + * settings may be set to a non-empty value. The global variable * recoveryTarget tracks which kind of recovery target was chosen. Other * variables store the actual target value (for example a string or a xid). - * The assign functions of the parameters check whether a competing parameter - * was already set. But we want to allow setting the same parameter multiple - * times. We also want to allow unsetting a parameter and setting a different - * one, so we unset recoveryTarget when the parameter is set to an empty - * string. + * An empty string for any of these GUCs is treated as "not set", equivalent + * to the GUC's default; an empty value cannot clobber another GUC's + * already-set target. Conflicts between multiple non-empty settings are + * detected in CheckRecoveryTargetConflicts(), called from + * validateRecoveryParameters() at every startup. * - * XXX this code is broken by design. Throwing an error from a GUC assign - * hook breaks fundamental assumptions of guc.c. So long as all the variables - * for which this can happen are PGC_POSTMASTER, the consequences are limited, - * since we'd just abort postmaster startup anyway. Nonetheless it's likely - * that we have odd behaviors such as unexpected GUC ordering dependencies. + * Each assign hook clears recoveryTarget only when its own GUC is reassigned + * to an empty string after the same GUC was previously assigned a non-empty + * value, e.g. + * postgres -c recovery_target_xid=700 -c recovery_target_xid= + * (postgresql.conf collapses duplicate keys so only the last value reaches + * the assign hook; this same-parameter set-then-clear case only arises from + * -c). The clear is restricted to the hook's own target type so that an + * empty value for one recovery_target_* GUC cannot clobber another GUC's + * already-set target. */ -pg_noreturn static void -error_multiple_recovery_targets(void) -{ - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("multiple recovery targets specified"), - errdetail("At most one of \"recovery_target\", \"recovery_target_lsn\", \"recovery_target_name\", \"recovery_target_time\", \"recovery_target_xid\" may be set."))); -} - /* * GUC check_hook for recovery_target */ @@ -4815,13 +4902,9 @@ check_recovery_target(char **newval, void **extra, GucSource source) void assign_recovery_target(const char *newval, void *extra) { - if (recoveryTarget != RECOVERY_TARGET_UNSET && - recoveryTarget != RECOVERY_TARGET_IMMEDIATE) - error_multiple_recovery_targets(); - if (newval && strcmp(newval, "") != 0) recoveryTarget = RECOVERY_TARGET_IMMEDIATE; - else + else if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE) recoveryTarget = RECOVERY_TARGET_UNSET; } @@ -4856,16 +4939,12 @@ check_recovery_target_lsn(char **newval, void **extra, GucSource source) void assign_recovery_target_lsn(const char *newval, void *extra) { - if (recoveryTarget != RECOVERY_TARGET_UNSET && - recoveryTarget != RECOVERY_TARGET_LSN) - error_multiple_recovery_targets(); - if (newval && strcmp(newval, "") != 0) { recoveryTarget = RECOVERY_TARGET_LSN; recoveryTargetLSN = *((XLogRecPtr *) extra); } - else + else if (recoveryTarget == RECOVERY_TARGET_LSN) recoveryTarget = RECOVERY_TARGET_UNSET; } @@ -4891,16 +4970,12 @@ check_recovery_target_name(char **newval, void **extra, GucSource source) void assign_recovery_target_name(const char *newval, void *extra) { - if (recoveryTarget != RECOVERY_TARGET_UNSET && - recoveryTarget != RECOVERY_TARGET_NAME) - error_multiple_recovery_targets(); - if (newval && strcmp(newval, "") != 0) { recoveryTarget = RECOVERY_TARGET_NAME; recoveryTargetName = newval; } - else + else if (recoveryTarget == RECOVERY_TARGET_NAME) recoveryTarget = RECOVERY_TARGET_UNSET; } @@ -4971,13 +5046,9 @@ check_recovery_target_time(char **newval, void **extra, GucSource source) void assign_recovery_target_time(const char *newval, void *extra) { - if (recoveryTarget != RECOVERY_TARGET_UNSET && - recoveryTarget != RECOVERY_TARGET_TIME) - error_multiple_recovery_targets(); - if (newval && strcmp(newval, "") != 0) recoveryTarget = RECOVERY_TARGET_TIME; - else + else if (recoveryTarget == RECOVERY_TARGET_TIME) recoveryTarget = RECOVERY_TARGET_UNSET; } @@ -5099,15 +5170,11 @@ check_recovery_target_xid(char **newval, void **extra, GucSource source) void assign_recovery_target_xid(const char *newval, void *extra) { - if (recoveryTarget != RECOVERY_TARGET_UNSET && - recoveryTarget != RECOVERY_TARGET_XID) - error_multiple_recovery_targets(); - if (newval && strcmp(newval, "") != 0) { recoveryTarget = RECOVERY_TARGET_XID; recoveryTargetXid = *((TransactionId *) extra); } - else + else if (recoveryTarget == RECOVERY_TARGET_XID) recoveryTarget = RECOVERY_TARGET_UNSET; } diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl index 047eb13293a..6e8a2c557c9 100644 --- a/src/test/recovery/t/003_recovery_targets.pl +++ b/src/test/recovery/t/003_recovery_targets.pl @@ -51,6 +51,49 @@ sub test_recovery_standby return; } +# Start a standby with the given pg_ctl --options string and verify that +# the standby reaches the given LSN and row count. Used to exercise +# scenarios that require the postmaster command line to receive multiple +# "-c name=value" instances of the same GUC, which postgresql.conf cannot +# express because ProcessConfigFile collapses duplicate keys. +sub test_recovery_standby_with_options +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my $test_name = shift; + my $node_name = shift; + my $node_primary = shift; + my $options = shift; + my $num_rows = shift; + my $until_lsn = shift; + + my $node_standby = PostgreSQL::Test::Cluster->new($node_name); + $node_standby->init_from_backup($node_primary, 'my_backup', + has_restoring => 1); + + my $res = run_log( + [ + 'pg_ctl', + '--pgdata' => $node_standby->data_dir, + '--log' => $node_standby->logfile, + '--options' => $options, + 'start', + ]); + ok($res, "server starts for $test_name"); + + $node_standby->poll_query_until('postgres', + "SELECT '$until_lsn'::pg_lsn <= pg_last_wal_replay_lsn()") + or die "Timed out while waiting for standby to catch up"; + + my $count = $node_standby->safe_psql('postgres', + "SELECT count(*) FROM tab_int"); + is($count, qq($num_rows), "check standby content for $test_name"); + + $node_standby->teardown_node; + + return; +} + # Initialize primary node my $node_primary = PostgreSQL::Test::Cluster->new('primary'); $node_primary->init(has_archiving => 1, allows_streaming => 1); @@ -108,6 +151,13 @@ $node_primary->safe_psql('postgres', # Force archiving of WAL file $node_primary->safe_psql('postgres', "SELECT pg_switch_wal()"); +# LSN after the final 6000-row insert and WAL switch. Used by the +# set-then-cleared scenarios below where recovery has no target and must +# replay all archived WAL; polling on $lsn5 would race against the 5001-6000 +# rows. +my $lsn6 = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn()"); + # Test recovery targets my @recovery_params = ("recovery_target = 'immediate'"); test_recovery_standby('immediate target', @@ -125,11 +175,24 @@ test_recovery_standby('name', 'standby_4', $node_primary, \@recovery_params, test_recovery_standby('LSN', 'standby_5', $node_primary, \@recovery_params, "5000", $lsn5); +# Regression: empty-string for one recovery_target_* GUC must not clobber +# another non-empty target. Setting recovery_target_xid + recovery_target_time +# = '' must recover to the xid, not run as no-target recovery. +@recovery_params = ( + "recovery_target_xid = '$recovery_txid'", + "recovery_target_time = ''"); +test_recovery_standby('xid with empty time GUC', + 'standby_xid_empty_time', $node_primary, \@recovery_params, + "2000", $lsn2); + # Multiple targets # -# Multiple conflicting settings are not allowed, but setting the same -# parameter multiple times or unsetting a parameter and setting a -# different one is allowed. +# Multiple conflicting non-empty settings are not allowed. Setting the same +# parameter multiple times is allowed (the last value wins, per ProcessConfigFile +# duplicate handling). An empty string for a recovery_target_* GUC is treated +# as the GUC's default and is a no-op; it does not clear any other already-set +# target. Conflict detection runs in CheckRecoveryTargetConflicts() at every +# server start, regardless of whether recovery is requested. @recovery_params = ( "recovery_target_name = '$recovery_name'", @@ -159,6 +222,21 @@ like( $logfile, qr/multiple recovery targets specified/, 'multiple conflicting settings'); +# errdetail lists exactly the parameters that are set, in GUC-check order +# (recovery_target_name is checked before recovery_target_time). +like( + $logfile, + qr/Recovery targets "recovery_target_name", "recovery_target_time" are set/, + 'errdetail lists the set parameters in order'); +# The parameter values must not be echoed (only the names are listed). +unlike( + $logfile, + qr/are set[^\n]*=/, + 'errdetail does not echo parameter values'); +like( + $logfile, + qr/HINT:.*pg_settings/, + 'errhint points to pg_settings'); # Check behavior when recovery ends before target is reached @@ -190,6 +268,173 @@ like( qr/FATAL: .* recovery ended before configured recovery target was reached/, 'recovery end before target reached is a fatal error'); +# Conflicting recovery targets are rejected at every startup, regardless of +# whether recovery.signal is present. Use a throwaway cluster initialized +# from the existing backup so we can leave conflicting GUCs in its +# postgresql.conf without polluting the primary used by later tests. +# init_from_backup is called without has_restoring, so no recovery.signal +# is created and the cluster would normally start as a plain primary; the +# conflicting recovery_target_* GUCs must be rejected anyway. +my $node_no_signal = PostgreSQL::Test::Cluster->new('multi_target_no_signal'); +$node_no_signal->init_from_backup($node_primary, 'my_backup'); +$node_no_signal->append_conf( + 'postgresql.conf', "recovery_target_name = '$recovery_name' +recovery_target_time = '$recovery_time'"); + +my $res_no_signal = run_log( + [ + 'pg_ctl', + '--pgdata' => $node_no_signal->data_dir, + '--log' => $node_no_signal->logfile, + 'start', + ]); +ok(!$res_no_signal, + 'server fails to start with conflicting recovery targets and no recovery.signal'); + +my $logfile_no_signal = slurp_file($node_no_signal->logfile()); +like( + $logfile_no_signal, + qr/multiple recovery targets specified/, + 'expected error message logged without recovery.signal'); +like( + $logfile_no_signal, + qr/Recovery targets "recovery_target_name", "recovery_target_time" are set/, + 'errdetail lists the set parameters in order without recovery.signal'); +unlike( + $logfile_no_signal, + qr/are set[^\n]*=/, + 'errdetail does not echo parameter values without recovery.signal'); +like( + $logfile_no_signal, + qr/HINT:.*pg_settings/, + 'errhint points to pg_settings without recovery.signal'); + +# Conflict involving the bare recovery_target (no suffix): it is checked first, +# so its name appears at the head of the list. The list entries are quoted, so +# the regex anchors on the closing quote to ensure "recovery_target" does not +# match a prefix of "recovery_target_xid". +my $node_bare = PostgreSQL::Test::Cluster->new('multi_target_bare'); +$node_bare->init_from_backup($node_primary, 'my_backup'); +$node_bare->append_conf('postgresql.conf', + "recovery_target = 'immediate'\nrecovery_target_xid = '$recovery_txid'"); + +my $res_bare = run_log( + [ + 'pg_ctl', + '--pgdata' => $node_bare->data_dir, + '--log' => $node_bare->logfile, + 'start', + ]); +ok(!$res_bare, + 'server fails to start with bare recovery_target conflict'); + +my $logfile_bare = slurp_file($node_bare->logfile()); +like( + $logfile_bare, + qr/Recovery targets "recovery_target", "recovery_target_xid" are set/, + 'errdetail lists the bare recovery_target first, without prefix mismatch'); + +# Three conflicting targets: exercises the ", " separator between more than two +# entries (no trailing separator after the last entry). +my $node_three = PostgreSQL::Test::Cluster->new('multi_target_three'); +$node_three->init_from_backup($node_primary, 'my_backup'); +$node_three->append_conf('postgresql.conf', + "recovery_target_name = '$recovery_name'\n" + . "recovery_target_time = '$recovery_time'\n" + . "recovery_target_xid = '$recovery_txid'"); + +my $res_three = run_log( + [ + 'pg_ctl', + '--pgdata' => $node_three->data_dir, + '--log' => $node_three->logfile, + 'start', + ]); +ok(!$res_three, + 'server fails to start with three conflicting recovery targets'); + +my $logfile_three = slurp_file($node_three->logfile()); +like( + $logfile_three, + qr/Recovery targets "recovery_target_name", "recovery_target_time", "recovery_target_xid" are set/, + 'errdetail lists three set parameters with correct separators'); + +# Same-GUC last-wins (one source of truth for the GUC's value): assigning a +# recovery_target_* GUC and then assigning the same GUC to an empty string +# leaves no target set and recovery proceeds to the end of WAL. This is the +# "set then unset" form of GUC reassignment; the assign hook clears its own +# type when the new value is empty. postgresql.conf cannot express duplicate +# keys (ProcessConfigFile collapses them), so the postmaster command line is +# used via "pg_ctl --options". Each of the five recovery_target_* assign +# hooks is exercised once. +# +# Note: GUC values below are passed unquoted on the command line. Windows +# cmd.exe does not strip single quotes the way POSIX shells do, so quoted +# values would reach the postmaster verbatim and be rejected. recovery_time +# from now() contains a space; we replace it with the ISO 8601 T separator +# so the value is a single shell token without quoting. +my $recovery_time_t = $recovery_time; +$recovery_time_t =~ s/ /T/; + +test_recovery_standby_with_options( + 'recovery_target_xid set then cleared', + 'standby_xid_set_clear', $node_primary, + "-c recovery_target_xid=$recovery_txid -c recovery_target_xid=", + "6000", $lsn6); + +test_recovery_standby_with_options( + 'recovery_target_time set then cleared', + 'standby_time_set_clear', $node_primary, + "-c recovery_target_time=$recovery_time_t -c recovery_target_time=", + "6000", $lsn6); + +test_recovery_standby_with_options( + 'recovery_target_name set then cleared', + 'standby_name_set_clear', $node_primary, + "-c recovery_target_name=$recovery_name -c recovery_target_name=", + "6000", $lsn6); + +test_recovery_standby_with_options( + 'recovery_target_lsn set then cleared', + 'standby_lsn_set_clear', $node_primary, + "-c recovery_target_lsn=$recovery_lsn -c recovery_target_lsn=", + "6000", $lsn6); + +test_recovery_standby_with_options( + 'recovery_target set then cleared', + 'standby_immediate_set_clear', $node_primary, + "-c recovery_target=immediate -c recovery_target=", + "6000", $lsn6); + +# Same-GUC empty-then-set sanity: assigning an empty string and then a +# non-empty value to the same GUC must end with the target set. The empty +# value seen first must not poison a later non-empty assignment. +my $node_xid_clear_set = + PostgreSQL::Test::Cluster->new('standby_xid_clear_set'); +$node_xid_clear_set->init_from_backup($node_primary, 'my_backup', + has_restoring => 1); + +my $res_xid_clear_set = run_log( + [ + 'pg_ctl', + '--pgdata' => $node_xid_clear_set->data_dir, + '--log' => $node_xid_clear_set->logfile, + '--options' => + "-c recovery_target_xid= -c recovery_target_xid=$recovery_txid", + 'start', + ]); +ok($res_xid_clear_set, + 'server starts with recovery_target_xid cleared then set'); + +$node_xid_clear_set->poll_query_until('postgres', + "SELECT '$lsn2'::pg_lsn <= pg_last_wal_replay_lsn()") + or die "Timed out while waiting for standby to catch up"; +my $count_xid_clear_set = $node_xid_clear_set->safe_psql('postgres', + "SELECT count(*) FROM tab_int"); +is($count_xid_clear_set, "2000", + 'recovery_target_xid honored when cleared then set'); +$node_xid_clear_set->teardown_node; + # Invalid recovery_target_timeline tests my ($result, $stdout, $stderr) = $node_primary->psql('postgres', "ALTER SYSTEM SET recovery_target_timeline TO 'bogus'"); -- 2.52.0