From a0ef72e61a31d6519ead6f4d9fb9efe2a2c94990 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Mon, 27 Jan 2020 12:14:36 +0100 Subject: [PATCH v6] Fail if recovery target is not reached Before, if a recovery target is configured, but the archive ended before the target was reached, recovery would end and the server would promote without further notice. That was deemed to be pretty wrong. With this change, if the recovery target is not reached, it is a fatal error. Discussion: https://www.postgresql.org/message-id/flat/993736dd3f1713ec1f63fc3b653839f5@lako.no --- doc/src/sgml/config.sgml | 5 ++++ src/backend/access/transam/xlog.c | 19 +++++++++--- src/test/perl/PostgresNode.pm | 33 +++++++++++++++++++-- src/test/recovery/t/003_recovery_targets.pl | 24 ++++++++++++++- 4 files changed, 73 insertions(+), 8 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index e07dc01e80..c1128f89ec 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -3571,6 +3571,11 @@ Recovery Target If is not enabled, a setting of pause will act the same as shutdown. + + In any case, if a recovery target is configured but the archive + recovery ends before the target is reached, the server will shut down + with a fatal error. + diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 882d5e8a73..be4c923ab1 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -6200,7 +6200,7 @@ StartupXLOG(void) XLogCtlInsert *Insert; CheckPoint checkPoint; bool wasShutdown; - bool reachedStopPoint = false; + bool reachedRecoveryTarget = false; bool haveBackupLabel = false; bool haveTblspcMap = false; XLogRecPtr RecPtr, @@ -7103,7 +7103,7 @@ StartupXLOG(void) */ if (recoveryStopsBefore(xlogreader)) { - reachedStopPoint = true; /* see below */ + reachedRecoveryTarget = true; break; } @@ -7258,7 +7258,7 @@ StartupXLOG(void) /* Exit loop if we reached inclusive recovery target */ if (recoveryStopsAfter(xlogreader)) { - reachedStopPoint = true; + reachedRecoveryTarget = true; break; } @@ -7270,7 +7270,7 @@ StartupXLOG(void) * end of main redo apply loop */ - if (reachedStopPoint) + if (reachedRecoveryTarget) { if (!reachedConsistency) ereport(FATAL, @@ -7327,7 +7327,18 @@ StartupXLOG(void) /* there are no WAL records following the checkpoint */ ereport(LOG, (errmsg("redo is not required"))); + } + + /* + * This check is intentionally after the above log messages that + * indicate how far recovery went. + */ + if (ArchiveRecoveryRequested && + recoveryTarget != RECOVERY_TARGET_UNSET && + !reachedRecoveryTarget) + ereport(FATAL, + (errmsg("recovery ended before configured recovery target was reached"))); } /* diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm index 2e0cf4a2f3..be44e8784f 100644 --- a/src/test/perl/PostgresNode.pm +++ b/src/test/perl/PostgresNode.pm @@ -653,6 +653,9 @@ Restoring WAL segments from archives using restore_command can be enabled by passing the keyword parameter has_restoring => 1. This is disabled by default. +Is has_restoring is used, standby mode is used by default. To use +recovery mode instead, pass the keyword parameter standby => 0. + The backup is copied, leaving the original unmodified. pg_hba.conf is unconditionally set to enable replication connections. @@ -669,6 +672,7 @@ sub init_from_backup $params{has_streaming} = 0 unless defined $params{has_streaming}; $params{has_restoring} = 0 unless defined $params{has_restoring}; + $params{standby} = 1 unless defined $params{standby}; print "# Initializing node \"$node_name\" from backup \"$backup_name\" of node \"$root_name\"\n"; @@ -699,7 +703,7 @@ port = $port "unix_socket_directories = '$host'"); } $self->enable_streaming($root_node) if $params{has_streaming}; - $self->enable_restoring($root_node) if $params{has_restoring}; + $self->enable_restoring($root_node, $params{standby}) if $params{has_restoring}; return; } @@ -939,7 +943,7 @@ primary_conninfo='$root_connstr' # Internal routine to enable archive recovery command on a standby node sub enable_restoring { - my ($self, $root_node) = @_; + my ($self, $root_node, $standby) = @_; my $path = TestLib::perl2host($root_node->archive_dir); my $name = $self->name; @@ -961,7 +965,30 @@ sub enable_restoring 'postgresql.conf', qq( restore_command = '$copy_command' )); - $self->set_standby_mode(); + if ($standby) + { + $self->set_standby_mode(); + } + else + { + $self->set_recovery_mode(); + } + return; +} + +=pod + +=item $node->set_recovery_mode() + +Place recovery.signal file. + +=cut + +sub set_recovery_mode +{ + my ($self) = @_; + + $self->append_conf('recovery.signal', ''); return; } diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl index d8fbd50011..fd14bab208 100644 --- a/src/test/recovery/t/003_recovery_targets.pl +++ b/src/test/recovery/t/003_recovery_targets.pl @@ -3,7 +3,8 @@ use warnings; use PostgresNode; use TestLib; -use Test::More tests => 8; +use Test::More tests => 9; +use Time::HiRes qw(usleep); # Create and test a standby from given backup, with a certain recovery target. # Choose $until_lsn later than the transaction commit that causes the row @@ -145,3 +146,24 @@ sub test_recovery_standby my $logfile = slurp_file($node_standby->logfile()); ok($logfile =~ qr/multiple recovery targets specified/, 'multiple conflicting settings'); + +# Check behavior when recovery ends before target is reached + +$node_standby = get_new_node('standby_8'); +$node_standby->init_from_backup($node_master, 'my_backup', + has_restoring => 1, standby => 0); +$node_standby->append_conf('postgresql.conf', + "recovery_target_name = 'does_not_exist'"); + +run_log(['pg_ctl', '-D', $node_standby->data_dir, + '-l', $node_standby->logfile, 'start']); + +# wait up to 10 seconds for postgres to terminate +foreach my $i (0..100) +{ + last if ! -f $node_standby->data_dir . '/postmaster.pid'; + usleep(100_000); +} +$logfile = slurp_file($node_standby->logfile()); +ok($logfile =~ qr/FATAL: recovery ended before configured recovery target was reached/, + 'recovery end before target reached is a fatal error'); base-commit: 3e4818e9dd5be294d97ca67012528cb1c0b0ccaa -- 2.25.0