Author: Noah Misch Commit: Noah Misch Introduce PG_TEST_TIMEOUT_DEFAULT for TAP suite can't-happen timeouts. Slow hosts may avoid load-induced, spurious failures by setting environment variable PG_TEST_TIMEOUT_DEFAULT to some number of seconds greater than 180. Developers may see faster failures by setting that environment variable to some lesser number of seconds. In tests, write $PostgreSQL::Test::Utils::timeout_default wherever the convention has been to write 180. This change raises the default for some briefer timeouts. Back-patch to v10 (all supported versions). Reviewed by FIXME. Discussion: https://postgr.es/m/FIXME diff --git a/contrib/amcheck/t/002_cic.pl b/contrib/amcheck/t/002_cic.pl index d604def..b8e4ac7 100644 --- a/contrib/amcheck/t/002_cic.pl +++ b/contrib/amcheck/t/002_cic.pl @@ -18,7 +18,8 @@ my ($node, $result); # $node = PostgreSQL::Test::Cluster->new('CIC_test'); $node->init; -$node->append_conf('postgresql.conf', 'lock_timeout = 180000'); +$node->append_conf('postgresql.conf', + 'lock_timeout = ' . (1000 * $PostgreSQL::Test::Utils::timeout_default)); $node->start; $node->safe_psql('postgres', q(CREATE EXTENSION amcheck)); $node->safe_psql('postgres', q(CREATE TABLE tbl(i int))); diff --git a/contrib/amcheck/t/003_cic_2pc.pl b/contrib/amcheck/t/003_cic_2pc.pl index f668ed3..e66ccd9 100644 --- a/contrib/amcheck/t/003_cic_2pc.pl +++ b/contrib/amcheck/t/003_cic_2pc.pl @@ -22,7 +22,8 @@ my ($node, $result); $node = PostgreSQL::Test::Cluster->new('CIC_2PC_test'); $node->init; $node->append_conf('postgresql.conf', 'max_prepared_transactions = 10'); -$node->append_conf('postgresql.conf', 'lock_timeout = 180000'); +$node->append_conf('postgresql.conf', + 'lock_timeout = ' . (1000 * $PostgreSQL::Test::Utils::timeout_default)); $node->start; $node->safe_psql('postgres', q(CREATE EXTENSION amcheck)); $node->safe_psql('postgres', q(CREATE TABLE tbl(i int))); @@ -38,7 +39,7 @@ $node->safe_psql('postgres', q(CREATE TABLE tbl(i int))); my $main_in = ''; my $main_out = ''; -my $main_timer = IPC::Run::timeout(180); +my $main_timer = IPC::Run::timeout($PostgreSQL::Test::Utils::timeout_default); my $main_h = $node->background_psql('postgres', \$main_in, \$main_out, @@ -52,7 +53,7 @@ pump $main_h until $main_out =~ /syncpoint1/ || $main_timer->is_expired; my $cic_in = ''; my $cic_out = ''; -my $cic_timer = IPC::Run::timeout(180); +my $cic_timer = IPC::Run::timeout($PostgreSQL::Test::Utils::timeout_default); my $cic_h = $node->background_psql('postgres', \$cic_in, \$cic_out, $cic_timer, on_error_stop => 1); @@ -113,9 +114,10 @@ PREPARE TRANSACTION 'persists_forever'; )); $node->restart; -my $reindex_in = ''; -my $reindex_out = ''; -my $reindex_timer = IPC::Run::timeout(180); +my $reindex_in = ''; +my $reindex_out = ''; +my $reindex_timer = + IPC::Run::timeout($PostgreSQL::Test::Utils::timeout_default); my $reindex_h = $node->background_psql('postgres', \$reindex_in, \$reindex_out, $reindex_timer, on_error_stop => 1); diff --git a/src/bin/pg_ctl/t/004_logrotate.pl b/src/bin/pg_ctl/t/004_logrotate.pl index d290452..d73ce03 100644 --- a/src/bin/pg_ctl/t/004_logrotate.pl +++ b/src/bin/pg_ctl/t/004_logrotate.pl @@ -39,7 +39,7 @@ sub check_log_pattern my $node = shift; my $lfname = fetch_file_name($logfiles, $format); - my $max_attempts = 180 * 10; + my $max_attempts = 10 * $PostgreSQL::Test::Utils::timeout_default; my $logcontents; for (my $attempts = 0; $attempts < $max_attempts; $attempts++) @@ -78,7 +78,7 @@ $node->start(); $node->psql('postgres', 'SELECT 1/0'); # might need to retry if logging collector process is slow... -my $max_attempts = 180 * 10; +my $max_attempts = 10 * $PostgreSQL::Test::Utils::timeout_default; my $current_logfiles; for (my $attempts = 0; $attempts < $max_attempts; $attempts++) diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index dd065c7..6906d91 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -295,7 +295,8 @@ my %pgdump_runs = ( '--no-sync', "--file=$tempdir/only_dump_test_table.sql", '--table=dump_test.test_table', - '--lock-wait-timeout=1000000', + '--lock-wait-timeout=' + . (1000 * $PostgreSQL::Test::Utils::timeout_default), 'postgres', ], }, diff --git a/src/bin/psql/t/010_tab_completion.pl b/src/bin/psql/t/010_tab_completion.pl index 005961f..a549106 100644 --- a/src/bin/psql/t/010_tab_completion.pl +++ b/src/bin/psql/t/010_tab_completion.pl @@ -94,7 +94,7 @@ close $FH; my $in = ''; my $out = ''; -my $timer = timer(5); +my $timer = timer($PostgreSQL::Test::Utils::timeout_default); my $h = $node->interactive_psql('postgres', \$in, \$out, $timer); @@ -111,7 +111,7 @@ sub check_completion # reset output collector $out = ""; # restart per-command timer - $timer->start(5); + $timer->start($PostgreSQL::Test::Utils::timeout_default); # send the data to be sent $in .= $send; # wait ... @@ -442,7 +442,7 @@ check_completion("blarg \t\t", qr//, "check completion failure path"); clear_query(); # send psql an explicit \q to shut it down, else pty won't close properly -$timer->start(5); +$timer->start($PostgreSQL::Test::Utils::timeout_default); $in .= "\\q\n"; finish $h or die "psql returned $?"; $timer->reset; diff --git a/src/bin/psql/t/020_cancel.pl b/src/bin/psql/t/020_cancel.pl index 3224f8e..d57d342 100644 --- a/src/bin/psql/t/020_cancel.pl +++ b/src/bin/psql/t/020_cancel.pl @@ -46,12 +46,13 @@ SKIP: { my $psql_pid; until (-s "$tempdir/psql.pid" and ($psql_pid = PostgreSQL::Test::Utils::slurp_file("$tempdir/psql.pid")) =~ /^\d+\n/s) { - ($count++ < 180 * 100) or die "pid file did not appear"; + ($count++ < 100 * $PostgreSQL::Test::Utils::timeout_default) + or die "pid file did not appear"; usleep(10_000) } # Send sleep command and wait until the server has registered it - $stdin = "select pg_sleep(180);\n"; + $stdin = "select pg_sleep($PostgreSQL::Test::Utils::timeout_default);\n"; pump $h while length $stdin; $node->poll_query_until('postgres', q{SELECT (SELECT count(*) FROM pg_stat_activity WHERE query ~ '^select pg_sleep') > 0;}) or die "timed out"; diff --git a/src/bin/scripts/t/080_pg_isready.pl b/src/bin/scripts/t/080_pg_isready.pl index e8436dc..c45ca66 100644 --- a/src/bin/scripts/t/080_pg_isready.pl +++ b/src/bin/scripts/t/080_pg_isready.pl @@ -18,8 +18,8 @@ my $node = PostgreSQL::Test::Cluster->new('main'); $node->init; $node->start; -# use a long timeout for the benefit of very slow buildfarm machines -$node->command_ok([qw(pg_isready --timeout=60)], +$node->command_ok( + [ 'pg_isready', "--timeout=$PostgreSQL::Test::Utils::timeout_default" ], 'succeeds with server running'); done_testing(); diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index ed70eff..c6fe4ca 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -36,7 +36,8 @@ PostgreSQL::Test::Cluster - class representing PostgreSQL server instance my ($stdout, $stderr, $timed_out); my $cmdret = $node->psql('postgres', 'SELECT pg_sleep(600)', stdout => \$stdout, stderr => \$stderr, - timeout => 180, timed_out => \$timed_out, + timeout => $PostgreSQL::Test::Utils::timeout_default, + timed_out => \$timed_out, extra_params => ['--single-transaction'], on_error_die => 1) print "Sleep timed out" if $timed_out; @@ -1725,7 +1726,8 @@ e.g. my ($stdout, $stderr, $timed_out); my $cmdret = $node->psql('postgres', 'SELECT pg_sleep(600)', stdout => \$stdout, stderr => \$stderr, - timeout => 180, timed_out => \$timed_out, + timeout => $PostgreSQL::Test::Utils::timeout_default, + timed_out => \$timed_out, extra_params => ['--single-transaction']) will set $cmdret to undef and $timed_out to a true value. @@ -1905,7 +1907,8 @@ scalar reference. This allows the caller to act on other parts of the system while idling this backend. The specified timer object is attached to the harness, as well. It's caller's -responsibility to select the timeout length, and to restart the timer after +responsibility to set the timeout length (usually +$PostgreSQL::Test::Utils::timeout_default), and to restart the timer after each command if the timeout is per-command. psql is invoked in tuples-only unaligned mode with reading of B<.psqlrc> @@ -1993,9 +1996,10 @@ The process's stdin is sourced from the $stdin scalar reference, and its stdout and stderr go to the $stdout scalar reference. ptys are used so that psql thinks it's being called interactively. -The specified timer object is attached to the harness, as well. -It's caller's responsibility to select the timeout length, and to -restart the timer after each command if the timeout is per-command. +The specified timer object is attached to the harness, as well. It's caller's +responsibility to set the timeout length (usually +$PostgreSQL::Test::Utils::timeout_default), and to restart the timer after +each command if the timeout is per-command. psql is invoked in tuples-only unaligned mode with reading of B<.psqlrc> disabled. That may be overridden by passing extra psql parameters. @@ -2311,7 +2315,7 @@ sub connect_fails Run B<$query> repeatedly, until it returns the B<$expected> result ('t', or SQL boolean true, by default). Continues polling if B returns an error result. -Times out after 180 seconds. +Times out after $PostgreSQL::Test::Utils::timeout_default seconds. Returns 1 if successful, 0 if timed out. =cut @@ -2329,7 +2333,7 @@ sub poll_query_until '-d', $self->connstr($dbname) ]; my ($stdout, $stderr); - my $max_attempts = 180 * 10; + my $max_attempts = 10 * $PostgreSQL::Test::Utils::timeout_default; my $attempts = 0; while ($attempts < $max_attempts) @@ -2353,8 +2357,8 @@ sub poll_query_until $attempts++; } - # The query result didn't change in 180 seconds. Give up. Print the - # output from the last attempt, hopefully that's useful for debugging. + # Give up. Print the output from the last attempt, hopefully that's useful + # for debugging. diag qq(poll_query_until timed out executing this query: $query expecting this output: @@ -2667,7 +2671,7 @@ sub wait_for_slot_catchup Waits for the contents of the server log file, starting at the given offset, to match the supplied regular expression. Checks the entire log if no offset is -given. Times out after 180 seconds. +given. Times out after $PostgreSQL::Test::Utils::timeout_default seconds. If successful, returns the length of the entire log file, in bytes. @@ -2678,7 +2682,7 @@ sub wait_for_log my ($self, $regexp, $offset) = @_; $offset = 0 unless defined $offset; - my $max_attempts = 180 * 10; + my $max_attempts = 10 * $PostgreSQL::Test::Utils::timeout_default; my $attempts = 0; while ($attempts < $max_attempts) @@ -2693,7 +2697,6 @@ sub wait_for_log $attempts++; } - # The logs didn't match within 180 seconds. Give up. croak "timed out waiting for match: $regexp"; } diff --git a/src/test/perl/PostgreSQL/Test/Utils.pm b/src/test/perl/PostgreSQL/Test/Utils.pm index 57fcb24..e8bf2e0 100644 --- a/src/test/perl/PostgreSQL/Test/Utils.pm +++ b/src/test/perl/PostgreSQL/Test/Utils.pm @@ -91,8 +91,8 @@ our @EXPORT = qw( $use_unix_sockets ); -our ($windows_os, $is_msys2, $use_unix_sockets, $tmp_check, $log_path, - $test_logfile); +our ($windows_os, $is_msys2, $use_unix_sockets, $timeout_default, + $tmp_check, $log_path, $test_logfile); BEGIN { @@ -157,6 +157,10 @@ BEGIN # supported, but it can be overridden if desired. $use_unix_sockets = (!$windows_os || defined $ENV{PG_TEST_USE_UNIX_SOCKETS}); + + $timeout_default = $ENV{PG_TEST_TIMEOUT_DEFAULT}; + $timeout_default = 180 + if not defined $timeout_default or $timeout_default eq ''; } =pod diff --git a/src/test/perl/README b/src/test/perl/README index 0511c55..4b160cc 100644 --- a/src/test/perl/README +++ b/src/test/perl/README @@ -23,6 +23,12 @@ tmp_check/log/ to get more info. Files named 'regress_log_XXX' are log output from the perl test scripts themselves, and should be examined first. Other files are postmaster logs, and may be helpful as additional data. +The tests default to a timeout of 180 seconds for many individual operations. +Slow hosts may avoid load-induced, spurious failures by setting environment +variable PG_TEST_TIMEOUT_DEFAULT to some number of seconds greater than 180. +Developers may see faster failures by setting that environment variable to +some lesser number of seconds. + Data directories will also be left behind for analysis when a test fails; they are named according to the test filename. But if the environment variable PG_TEST_NOCLEAN is set, data directories will be retained diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl index 25dd5ee..e8e1a42 100644 --- a/src/test/recovery/t/003_recovery_targets.pl +++ b/src/test/recovery/t/003_recovery_targets.pl @@ -172,8 +172,8 @@ run_log( $node_standby->logfile, 'start' ]); -# wait up to 180s for postgres to terminate -foreach my $i (0 .. 1800) +# wait for postgres to terminate +foreach my $i (0 .. 10 * $PostgreSQL::Test::Utils::timeout_default) { last if !-f $node_standby->data_dir . '/postmaster.pid'; usleep(100_000); diff --git a/src/test/recovery/t/006_logical_decoding.pl b/src/test/recovery/t/006_logical_decoding.pl index fa6bd45..9cec279 100644 --- a/src/test/recovery/t/006_logical_decoding.pl +++ b/src/test/recovery/t/006_logical_decoding.pl @@ -107,7 +107,8 @@ $node_primary->safe_psql('postgres', ); my $stdout_recv = $node_primary->pg_recvlogical_upto( - 'postgres', 'test_slot', $endpos, 180, + 'postgres', 'test_slot', $endpos, + $PostgreSQL::Test::Utils::timeout_default, 'include-xids' => '0', 'skip-empty-xacts' => '1'); chomp($stdout_recv); @@ -119,7 +120,8 @@ $node_primary->poll_query_until('postgres', ) or die "slot never became inactive"; $stdout_recv = $node_primary->pg_recvlogical_upto( - 'postgres', 'test_slot', $endpos, 180, + 'postgres', 'test_slot', $endpos, + $PostgreSQL::Test::Utils::timeout_default, 'include-xids' => '0', 'skip-empty-xacts' => '1'); chomp($stdout_recv); diff --git a/src/test/recovery/t/010_logical_decoding_timelines.pl b/src/test/recovery/t/010_logical_decoding_timelines.pl index 6e8b0b1..01ff31e 100644 --- a/src/test/recovery/t/010_logical_decoding_timelines.pl +++ b/src/test/recovery/t/010_logical_decoding_timelines.pl @@ -157,7 +157,7 @@ like( ($ret, $stdout, $stderr) = $node_replica->psql( 'postgres', "SELECT data FROM pg_logical_slot_peek_changes('before_basebackup', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');", - timeout => 180); + timeout => $PostgreSQL::Test::Utils::timeout_default); is($ret, 0, 'replay from slot before_basebackup succeeds'); my $final_expected_output_bb = q(BEGIN @@ -186,7 +186,7 @@ my $endpos = $node_replica->safe_psql('postgres', $stdout = $node_replica->pg_recvlogical_upto( 'postgres', 'before_basebackup', - $endpos, 180, + $endpos, $PostgreSQL::Test::Utils::timeout_default, 'include-xids' => '0', 'skip-empty-xacts' => '1'); diff --git a/src/test/recovery/t/013_crash_restart.pl b/src/test/recovery/t/013_crash_restart.pl index 3b740eb..a0bce59 100644 --- a/src/test/recovery/t/013_crash_restart.pl +++ b/src/test/recovery/t/013_crash_restart.pl @@ -18,11 +18,7 @@ use PostgreSQL::Test::Utils; use Test::More; use Config; -# To avoid hanging while expecting some specific input from a psql -# instance being driven by us, add a timeout high enough that it -# should never trigger even on very slow machines, unless something -# is really wrong. -my $psql_timeout = IPC::Run::timer(60); +my $psql_timeout = IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default); my $node = PostgreSQL::Test::Cluster->new('primary'); $node->init(allows_streaming => 1); diff --git a/src/test/recovery/t/017_shm.pl b/src/test/recovery/t/017_shm.pl index 678a252..f636efc 100644 --- a/src/test/recovery/t/017_shm.pl +++ b/src/test/recovery/t/017_shm.pl @@ -132,7 +132,7 @@ my $slow_client = IPC::Run::start( \$stdout, '2>', \$stderr, - IPC::Run::timeout(900)); # five times the poll_query_until timeout + IPC::Run::timeout(5 * $PostgreSQL::Test::Utils::timeout_default)); ok( $gnat->poll_query_until( 'postgres', "SELECT 1 FROM pg_stat_activity WHERE query = '$slow_query'", '1'), @@ -143,10 +143,11 @@ $gnat->kill9; unlink($gnat->data_dir . '/postmaster.pid'); $gnat->rotate_logfile; # on Windows, can't open old log for writing log_ipcs(); -# Reject ordinary startup. Retry for the same reasons poll_start() does. +# Reject ordinary startup. Retry for the same reasons poll_start() does, +# every 0.1s for at least $PostgreSQL::Test::Utils::timeout_default seconds. my $pre_existing_msg = qr/pre-existing shared memory block/; { - my $max_attempts = 180 * 10; # Retry every 0.1s for at least 180s. + my $max_attempts = 10 * $PostgreSQL::Test::Utils::timeout_default; my $attempts = 0; while ($attempts < $max_attempts) { @@ -193,7 +194,7 @@ sub poll_start { my ($node) = @_; - my $max_attempts = 180 * 10; + my $max_attempts = 10 * $PostgreSQL::Test::Utils::timeout_default; my $attempts = 0; while ($attempts < $max_attempts) @@ -209,8 +210,8 @@ sub poll_start $attempts++; } - # No success within 180 seconds. Try one last time without fail_ok, which - # will BAIL_OUT unless it succeeds. + # Try one last time without fail_ok, which will BAIL_OUT unless it + # succeeds. $node->start && return 1; return 0; } diff --git a/src/test/recovery/t/019_replslot_limit.pl b/src/test/recovery/t/019_replslot_limit.pl index 4257bd4..0da7b83 100644 --- a/src/test/recovery/t/019_replslot_limit.pl +++ b/src/test/recovery/t/019_replslot_limit.pl @@ -291,7 +291,7 @@ my @result = SELECT pg_switch_wal(); CHECKPOINT; SELECT 'finished';", - timeout => '60')); + timeout => $PostgreSQL::Test::Utils::timeout_default)); is($result[1], 'finished', 'check if checkpoint command is not blocked'); $node_primary2->stop; @@ -343,7 +343,7 @@ $logstart = get_log_size($node_primary3); kill 'STOP', $senderpid, $receiverpid; advance_wal($node_primary3, 2); -my $max_attempts = 180; +my $max_attempts = $PostgreSQL::Test::Utils::timeout_default; while ($max_attempts-- >= 0) { if (find_in_log( @@ -366,7 +366,7 @@ $node_primary3->poll_query_until('postgres', "lost") or die "timed out waiting for slot to be lost"; -$max_attempts = 180; +$max_attempts = $PostgreSQL::Test::Utils::timeout_default; while ($max_attempts-- >= 0) { if (find_in_log( diff --git a/src/test/recovery/t/021_row_visibility.pl b/src/test/recovery/t/021_row_visibility.pl index e274351..8d97db5 100644 --- a/src/test/recovery/t/021_row_visibility.pl +++ b/src/test/recovery/t/021_row_visibility.pl @@ -32,11 +32,8 @@ $node_standby->init_from_backup($node_primary, $backup_name, $node_standby->append_conf('postgresql.conf', 'max_prepared_transactions=10'); $node_standby->start; -# To avoid hanging while expecting some specific input from a psql -# instance being driven by us, add a timeout high enough that it -# should never trigger even on very slow machines, unless something -# is really wrong. -my $psql_timeout = IPC::Run::timer(300); +my $psql_timeout = + IPC::Run::timer(2 * $PostgreSQL::Test::Utils::timeout_default); # One psql to primary and standby each, for all queries. That allows # to check uncommitted changes being replicated and such. diff --git a/src/test/recovery/t/022_crash_temp_files.pl b/src/test/recovery/t/022_crash_temp_files.pl index 6ab3092..d295061 100644 --- a/src/test/recovery/t/022_crash_temp_files.pl +++ b/src/test/recovery/t/022_crash_temp_files.pl @@ -15,11 +15,7 @@ if ($Config{osname} eq 'MSWin32') exit; } -# To avoid hanging while expecting some specific input from a psql -# instance being driven by us, add a timeout high enough that it -# should never trigger even on very slow machines, unless something -# is really wrong. -my $psql_timeout = IPC::Run::timer(60); +my $psql_timeout = IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default); my $node = PostgreSQL::Test::Cluster->new('node_crash'); $node->init(); diff --git a/src/test/recovery/t/024_archive_recovery.pl b/src/test/recovery/t/024_archive_recovery.pl index c10bb5b..ce347e0 100644 --- a/src/test/recovery/t/024_archive_recovery.pl +++ b/src/test/recovery/t/024_archive_recovery.pl @@ -81,8 +81,8 @@ sub test_recovery_wal_level_minimal $recovery_node->logfile, 'start' ]); - # Wait up to 180s for postgres to terminate - foreach my $i (0 .. 1800) + # wait for postgres to terminate + foreach my $i (0 .. 10 * $PostgreSQL::Test::Utils::timeout_default) { last if !-f $recovery_node->data_dir . '/postmaster.pid'; usleep(100_000); diff --git a/src/test/subscription/t/015_stream.pl b/src/test/subscription/t/015_stream.pl index 9f221fc..6561b18 100644 --- a/src/test/subscription/t/015_stream.pl +++ b/src/test/subscription/t/015_stream.pl @@ -58,7 +58,7 @@ is($result, qq(2|2|2), 'check initial data was copied to subscriber'); my $in = ''; my $out = ''; -my $timer = IPC::Run::timeout(180); +my $timer = IPC::Run::timeout($PostgreSQL::Test::Utils::timeout_default); my $h = $node_publisher->background_psql('postgres', \$in, \$out, $timer, on_error_stop => 0);