From a8f1f7b84d38d5130f3bd8426be39cbc2c6866c3 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 30 Aug 2023 14:00:01 +0200 Subject: [PATCH v1] pg_resetwal: More tests, and some output improvements - More tests and test coverage - Update an obsolete comment - Regroup --help output - Improve documentation about pg_resetwal -f option - Improve error output --- doc/src/sgml/ref/pg_resetwal.sgml | 70 ++++++++++++--- src/bin/pg_resetwal/pg_resetwal.c | 58 +++++++------ src/bin/pg_resetwal/t/001_basic.pl | 114 +++++++++++++++++++++++++ src/bin/pg_resetwal/t/002_corrupted.pl | 4 + 4 files changed, 209 insertions(+), 37 deletions(-) diff --git a/doc/src/sgml/ref/pg_resetwal.sgml b/doc/src/sgml/ref/pg_resetwal.sgml index fd539f5604..d47f9552b8 100644 --- a/doc/src/sgml/ref/pg_resetwal.sgml +++ b/doc/src/sgml/ref/pg_resetwal.sgml @@ -52,21 +52,33 @@ Description - After running this command, it should be possible to start the server, + Some options, such as (see below), can also + be used to modify certain global settings of a database cluster without the + need to rerun initdb. This can be done safely on an + otherwise sound database cluster, if none of the dangerous modes mentioned + below are used. + + + + If pg_resetwal is used on a data directory where the + server has been cleanly shut down and the control file is sound, then it + will have no effect on the contents of the database system, except that no + longer used WAL files are cleared away. Any other use is potentially + dangerous and must be done with great care. pg_resetwal + will require the (force) option to be specified before + working on a data directory in an unclean shutdown state or with a corrupt + control file. + + + + After running this command on a data directory with corrupted WAL or a + corrupt control file, it should be possible to start the server, but bear in mind that the database might contain inconsistent data due to partially-committed transactions. You should immediately dump your data, run initdb, and restore. After restore, check for inconsistencies and repair as needed. - - This utility can only be run by the user who installed the server, because - it requires read/write access to the data directory. - For safety reasons, you must specify the data directory on the command line. - pg_resetwal does not use the environment variable - PGDATA. - - If pg_resetwal complains that it cannot determine valid data for pg_control, you can force it to proceed anyway @@ -82,19 +94,41 @@ Description execute any data-modifying operations in the database before you dump, as any such action is likely to make the corruption worse. + + + This utility can only be run by the user who installed the server, because + it requires read/write access to the data directory. + Options + + datadir + + + + + Specifies the location of the database directory. + For safety reasons, you must specify the data directory on the command + line. pg_resetwal does not use the environment + variable PGDATA. + + + + - Force pg_resetwal to proceed even if it cannot determine - valid data for pg_control, as explained above. + Force pg_resetwal to proceed even in situations where + it could be dangerous, as explained above. Specifically, this option is + required to proceed if the server had not been cleanly shut down, or if + pg_resetwal cannot determine valid data for + pg_control. @@ -132,7 +166,8 @@ Options pg_resetwal is unable to determine appropriate values by reading pg_control. Safe values can be determined as described below. For values that take numeric arguments, hexadecimal - values can be specified by using the prefix 0x. + values can be specified by using the prefix 0x. Note + that these instructions only apply with the standard block size of 8 kB. @@ -155,6 +190,7 @@ Options greatest file name in the same directory. The file names are in hexadecimal. + @@ -238,6 +274,7 @@ Options names are in hexadecimal, so the easiest way to do this is to specify the option value in hexadecimal and append four zeroes. + @@ -272,6 +309,7 @@ Options The file names are in hexadecimal. There is no simple recipe such as the ones for other options of appending zeroes. + @@ -284,6 +322,12 @@ Options linkend="app-initdb"/> for more information. + + This option can also be used to change the WAL segment size of an + existing database cluster, avoiding the need to + re-initdb. + + While pg_resetwal will set the WAL starting address @@ -314,6 +358,7 @@ Options in pg_xact, -u 0x700000 will work (five trailing zeroes provide the proper multiplier). + @@ -335,6 +380,7 @@ Options in pg_xact, -x 0x1200000 will work (five trailing zeroes provide the proper multiplier). + diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c index 9bebc2a995..bb0df8221f 100644 --- a/src/bin/pg_resetwal/pg_resetwal.c +++ b/src/bin/pg_resetwal/pg_resetwal.c @@ -6,8 +6,7 @@ * * The theory of operation is fairly simple: * 1. Read the existing pg_control (which will include the last - * checkpoint record). If it is an old format then update to - * current format. + * checkpoint record). * 2. If pg_control is corrupt, attempt to intuit reasonable values, * by scanning the old xlog if necessary. * 3. Modify pg_control to reflect a "shutdown" state with a checkpoint @@ -212,6 +211,7 @@ main(int argc, char *argv[]) exit(1); } + // FIXME: why 2? if (set_oldest_commit_ts_xid < 2 && set_oldest_commit_ts_xid != 0) pg_fatal("transaction ID (-c) must be either 0 or greater than or equal to 2"); @@ -346,6 +346,10 @@ main(int argc, char *argv[]) get_restricted_token(); + if (chdir(DataDir) < 0) + pg_fatal("could not change directory to \"%s\": %m", + DataDir); + /* Set mask based on PGDATA permissions */ if (!GetDataDirectoryCreatePerm(DataDir)) pg_fatal("could not read permissions of directory \"%s\": %m", @@ -353,10 +357,6 @@ main(int argc, char *argv[]) umask(pg_mode_mask); - if (chdir(DataDir) < 0) - pg_fatal("could not change directory to \"%s\": %m", - DataDir); - /* Check that data directory matches our server version */ CheckDataVersion(); @@ -459,20 +459,22 @@ main(int argc, char *argv[]) if (minXlogSegNo > newXlogSegNo) newXlogSegNo = minXlogSegNo; + if (noupdate) + { + PrintNewControlValues(); + exit(0); + } + /* * If we had to guess anything, and -f was not given, just print the - * guessed values and exit. Also print if -n is given. + * guessed values and exit. */ - if ((guessed && !force) || noupdate) + if (guessed && !force) { PrintNewControlValues(); - if (!noupdate) - { - printf(_("\nIf these values seem acceptable, use -f to force reset.\n")); - exit(1); - } - else - exit(0); + pg_log_error("not proceeding because control file values were guessed"); + pg_log_error_hint("If these values seem acceptable, use -f to force reset."); + exit(1); } /* @@ -480,9 +482,9 @@ main(int argc, char *argv[]) */ if (ControlFile.state != DB_SHUTDOWNED && !force) { - printf(_("The database server was not shut down cleanly.\n" - "Resetting the write-ahead log might cause data to be lost.\n" - "If you want to proceed anyway, use -f to force reset.\n")); + pg_log_error("database server was not shut down cleanly"); + pg_log_error_detail("Resetting the write-ahead log might cause data to be lost."); + pg_log_error_hint("If you want to proceed anyway, use -f to force reset."); exit(1); } @@ -1129,24 +1131,30 @@ static void usage(void) { printf(_("%s resets the PostgreSQL write-ahead log.\n\n"), progname); - printf(_("Usage:\n %s [OPTION]... DATADIR\n\n"), progname); - printf(_("Options:\n")); + printf(_("Usage:\n")); + printf(_(" %s [OPTION]... DATADIR\n"), progname); + + printf(_("\nOptions:\n")); + printf(_(" [-D, --pgdata=]DATADIR data directory\n")); + printf(_(" -f, --force force update to be done even after unclean shutdown or\n" + " if pg_control values had to be guessed\n")); + printf(_(" -n, --dry-run no update, just show what would be done\n")); + printf(_(" -V, --version output version information, then exit\n")); + printf(_(" -?, --help show this help, then exit\n")); + + printf(_("\nOptions to override control file values:\n")); printf(_(" -c, --commit-timestamp-ids=XID,XID\n" " set oldest and newest transactions bearing\n" " commit timestamp (zero means no change)\n")); - printf(_(" [-D, --pgdata=]DATADIR data directory\n")); printf(_(" -e, --epoch=XIDEPOCH set next transaction ID epoch\n")); - printf(_(" -f, --force force update to be done\n")); printf(_(" -l, --next-wal-file=WALFILE set minimum starting location for new WAL\n")); printf(_(" -m, --multixact-ids=MXID,MXID set next and oldest multitransaction ID\n")); - printf(_(" -n, --dry-run no update, just show what would be done\n")); printf(_(" -o, --next-oid=OID set next OID\n")); printf(_(" -O, --multixact-offset=OFFSET set next multitransaction offset\n")); printf(_(" -u, --oldest-transaction-id=XID set oldest transaction ID\n")); - printf(_(" -V, --version output version information, then exit\n")); printf(_(" -x, --next-transaction-id=XID set next transaction ID\n")); printf(_(" --wal-segsize=SIZE size of WAL segments, in megabytes\n")); - printf(_(" -?, --help show this help, then exit\n")); + printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); } diff --git a/src/bin/pg_resetwal/t/001_basic.pl b/src/bin/pg_resetwal/t/001_basic.pl index 7e5efbf56b..001bb455ef 100644 --- a/src/bin/pg_resetwal/t/001_basic.pl +++ b/src/bin/pg_resetwal/t/001_basic.pl @@ -14,6 +14,7 @@ my $node = PostgreSQL::Test::Cluster->new('main'); $node->init; +$node->append_conf('postgresql.conf', 'track_commit_timestamp = on'); command_like([ 'pg_resetwal', '-n', $node->data_dir ], qr/checkpoint/, 'pg_resetwal -n produces output'); @@ -29,4 +30,117 @@ 'check PGDATA permissions'); } +command_ok([ 'pg_resetwal', '-D', $node->data_dir ], 'pg_resetwal runs'); +$node->start; +is($node->safe_psql("postgres", "SELECT 1;"), 1, 'server running and working after reset'); + +command_fails_like([ 'pg_resetwal', $node->data_dir ], qr/lock file .* exists/, 'fails if server running'); + +$node->stop('immediate'); +command_fails_like([ 'pg_resetwal', $node->data_dir ], qr/database server was not shut down cleanly/, 'does not run after immediate shutdown'); +command_ok([ 'pg_resetwal', '-f', $node->data_dir ], 'runs after immediate shutdown with force'); +$node->start; +is($node->safe_psql("postgres", "SELECT 1;"), 1, 'server running and working after forced reset'); + +$node->stop; + +# check various command-line handling + +command_fails_like([ 'pg_resetwal', 'foo' ], qr/error: could not change directory/, 'fails with nonexistent data directory'); +command_fails_like([ 'pg_resetwal', 'foo', 'bar'], qr/too many command-line arguments/, 'fails with too many command-line arguments'); + +$ENV{PGDATA} = $node->data_dir; # not used +command_fails_like([ 'pg_resetwal' ], qr/no data directory specified/, 'fails with too few command-line arguments'); + +# error cases +# -c +command_fails_like([ 'pg_resetwal', '-c', 'foo', $node->data_dir], qr/error: invalid argument for option -c/, 'fails with incorrect -c option'); +command_fails_like([ 'pg_resetwal', '-c', '10,bar', $node->data_dir], qr/error: invalid argument for option -c/, 'fails with incorrect -c option part 2'); +command_fails_like([ 'pg_resetwal', '-c', '1,10', $node->data_dir], qr/greater than/, 'fails with -c value 1 part 1'); +command_fails_like([ 'pg_resetwal', '-c', '10,1', $node->data_dir], qr/greater than/, 'fails with -c value 1 part 2'); +# -e +command_fails_like([ 'pg_resetwal', '-e', 'foo', $node->data_dir], qr/error: invalid argument for option -e/, 'fails with incorrect -e option'); +command_fails_like([ 'pg_resetwal', '-e', '-1', $node->data_dir], qr/must not be -1/, 'fails with -e value -1'); +# -l +command_fails_like([ 'pg_resetwal', '-l', 'foo', $node->data_dir], qr/error: invalid argument for option -l/, 'fails with incorrect -l option'); +# -m +command_fails_like([ 'pg_resetwal', '-m', 'foo', $node->data_dir], qr/error: invalid argument for option -m/, 'fails with incorrect -m option'); +command_fails_like([ 'pg_resetwal', '-m', '10,bar', $node->data_dir], qr/error: invalid argument for option -m/, 'fails with incorrect -m option part 2'); +command_fails_like([ 'pg_resetwal', '-m', '0,10', $node->data_dir], qr/must not be 0/, 'fails with -m value 0 part 1'); +command_fails_like([ 'pg_resetwal', '-m', '10,0', $node->data_dir], qr/must not be 0/, 'fails with -m value 0 part 2'); +# -o +command_fails_like([ 'pg_resetwal', '-o', 'foo', $node->data_dir], qr/error: invalid argument for option -o/, 'fails with incorrect -o option'); +command_fails_like([ 'pg_resetwal', '-o', '0', $node->data_dir], qr/must not be 0/, 'fails with -o value 0'); +# -O +command_fails_like([ 'pg_resetwal', '-O', 'foo', $node->data_dir], qr/error: invalid argument for option -O/, 'fails with incorrect -O option'); +command_fails_like([ 'pg_resetwal', '-O', '-1', $node->data_dir], qr/must not be -1/, 'fails with -O value -1'); +# --wal-segsize +command_fails_like([ 'pg_resetwal', '--wal-segsize', 'foo', $node->data_dir], qr/error: invalid value/, 'fails with incorrect --wal-segsize option'); +command_fails_like([ 'pg_resetwal', '--wal-segsize', '13', $node->data_dir], qr/must be a power/, 'fails with invalid --wal-segsize value'); +# -u +command_fails_like([ 'pg_resetwal', '-u', 'foo', $node->data_dir], qr/error: invalid argument for option -u/, 'fails with incorrect -u option'); +command_fails_like([ 'pg_resetwal', '-u', '1', $node->data_dir], qr/must be greater than/, 'fails with -u value too small'); +# -x +command_fails_like([ 'pg_resetwal', '-x', 'foo', $node->data_dir], qr/error: invalid argument for option -x/, 'fails with incorrect -x option'); +command_fails_like([ 'pg_resetwal', '-x', '1', $node->data_dir], qr/must be greater than/, 'fails with -x value too small'); + +# run with control override options + +my $out = (run_command([ 'pg_resetwal', '-n', $node->data_dir ]))[0]; +$out =~ /^Database block size: *(\d+)$/m or die; +my $blcksz = $1; + +my @cmd = ('pg_resetwal', '-D', $node->data_dir); + +# some not-so-critical hardcoded values +push @cmd, '-e', 1; +push @cmd, '-l', '00000001000000320000004B'; +push @cmd, '-o', 100_000; +push @cmd, '--wal-segsize', 1; + +# these use the guidance from the documentation + +sub get_slru_files +{ + opendir(my $dh, $node->data_dir . '/' . $_[0]) or die $!; + my @files = sort grep { /[0-9A-F]+/ } readdir $dh; + closedir $dh; + return @files; +} + +my (@files, $mult); + +@files = get_slru_files('pg_commit_ts'); +$mult = 32 * $blcksz * 4; # FIXME +# -c argument is "old,new" +push @cmd, + '-c', sprintf("%d,%d", + hex($files[0]) == 0 ? 2 : hex($files[0]), hex($files[-1])); + +@files = get_slru_files('pg_multixact/offsets'); +$mult = 32 * $blcksz / 4; +# -m argument is "new,old" +push @cmd, + '-m', sprintf("%d,%d", + (hex($files[-1]) + 1) * $mult, + hex($files[0]) == 0 ? 1 : hex($files[0] * $mult)); + +@files = get_slru_files('pg_multixact/members'); +$mult = 32 * int($blcksz/20) * 4; +push @cmd, + '-O', (hex($files[-1]) + 1) * $mult; + +@files = get_slru_files('pg_xact'); +$mult = 32 * $blcksz * 4; +push @cmd, + '-u', (hex($files[0]) == 0 ? 3 : hex($files[0]) * $mult), + '-x', ((hex($files[-1]) + 1) * $mult); + +command_ok([@cmd, '-n'], 'runs with control override options, dry run'); +command_ok(\@cmd, 'runs with control override options'); +command_like([ 'pg_resetwal', '-n', $node->data_dir ], qr/^Latest checkpoint's NextOID: *100000$/m, 'spot check that control changes were applied'); + +$node->start; +ok(1, 'server started after reset'); + done_testing(); diff --git a/src/bin/pg_resetwal/t/002_corrupted.pl b/src/bin/pg_resetwal/t/002_corrupted.pl index 6d19a1efd5..ddbd0556bb 100644 --- a/src/bin/pg_resetwal/t/002_corrupted.pl +++ b/src/bin/pg_resetwal/t/002_corrupted.pl @@ -55,4 +55,8 @@ ], 'processes zero WAL segment size'); +# now try to run it +command_fails_like([ 'pg_resetwal', $node->data_dir ], qr/not proceeding because control file values were guessed/, 'does not run when control file values were guessed'); +command_ok([ 'pg_resetwal', '-f', $node->data_dir ], 'runs with force when control file values were guessed'); + done_testing(); base-commit: 6844d3275ac6b3c35d824f49362d3fe59b30f26b -- 2.41.0