From 57bb79b1bc8faac646131336abcc1596711c5f32 Mon Sep 17 00:00:00 2001 From: tomas Date: Thu, 28 Aug 2025 22:10:25 +0200 Subject: [PATCH] TAP fixes --- .../t/006_concurrent_pgbench.pl | 88 ++++++++++++++----- 1 file changed, 68 insertions(+), 20 deletions(-) diff --git a/src/test/modules/test_checksums/t/006_concurrent_pgbench.pl b/src/test/modules/test_checksums/t/006_concurrent_pgbench.pl index b33ca6e0c26..374eac7e6a3 100644 --- a/src/test/modules/test_checksums/t/006_concurrent_pgbench.pl +++ b/src/test/modules/test_checksums/t/006_concurrent_pgbench.pl @@ -23,11 +23,14 @@ my $node_primary_loglocation = 0; my $node_standby_1; my $node_standby_1_loglocation = 0; +my $pgbench_primary = undef; +my $pgbench_standby = undef; + # The number of full test iterations which will be performed. The exact number # of tests performed and the wall time taken is non-deterministic as the test # performs a lot of randomized actions, but 50 iterations will be a long test # run regardless. -my $TEST_ITERATIONS = 50; +my $TEST_ITERATIONS = 1000; # Variables which record the current state of the cluster my $data_checksum_state = 'off'; @@ -55,7 +58,7 @@ if ($ENV{enable_injection_points} ne 'yes') # whether to turn things off during testing. sub cointoss { - return int(rand(2) == 1); + return int(rand() < 0.5); } # Helper for injecting random sleeps here and there in the testrun. The sleep @@ -73,11 +76,16 @@ sub background_ro_pgbench { my ($port, $stdin, $stdout, $stderr) = @_; - my $pgbench_primary = IPC::Run::start( - [ 'pgbench', '-p', $port, '-S', '-T', '600', '-c', '10', 'postgres' ], - '<' => \$stdin, - '>' => \$stdout, - '2>' => \$stderr, + if ($pgbench_standby) + { + $pgbench_standby->finish; + } + + $pgbench_standby = IPC::Run::start( + [ 'pgbench', '-n', '-p', $port, '-S', '-T', '600', '-c', '10', 'postgres' ], + '<' => '/dev/null', + '>' => '/dev/null', + '2>' => '/dev/null', IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default)); } @@ -87,11 +95,16 @@ sub background_rw_pgbench { my ($port, $stdin, $stdout, $stderr) = @_; - my $pgbench_primary = IPC::Run::start( + if ($pgbench_primary) + { + $pgbench_primary->finish; + } + + $pgbench_primary = IPC::Run::start( [ 'pgbench', '-p', $port, '-T', '600', '-c', '10', 'postgres' ], - '<' => \$stdin, - '>' => \$stdout, - '2>' => \$stderr, + '<' => '/dev/null', + '>' => '/dev/null', + '2>' => '/dev/null', IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default)); } @@ -224,6 +237,9 @@ background_rw_pgbench( $node_primary->port, $pgb_primary_stdin, $pgb_primary_stdout, $pgb_primary_stderr); +my $primary_shutdown_clean = 0; +my $standby_shutdown_clean = 0; + # Main test suite. This loop will start a pgbench run on the cluster and while # that's running flip the state of data checksums concurrently. It will then # randomly restart thec cluster (in fast or immediate) mode and then check for @@ -246,9 +262,11 @@ for (my $i = 0; $i < $TEST_ITERATIONS; $i++) $node_primary_loglocation = -s $node_primary->logfile; # If data checksums are enabled, take the opportunity to verify them - # while the cluster is offline - $node_primary->checksum_verify_offline() - unless $data_checksum_state eq 'off'; + # while the cluster is offline (but only if stopped in a clean way, + # not after immediate shutdown) + #$node_primary->checksum_verify_offline() + # unless $data_checksum_state eq 'off' or !$primary_shutdown_clean; + random_sleep(); $node_primary->start; # Start a pgbench in the background against the primary @@ -270,9 +288,11 @@ for (my $i = 0; $i < $TEST_ITERATIONS; $i++) $node_standby_1_loglocation = -s $node_standby_1->logfile; # If data checksums are enabled, take the opportunity to verify them - # while the cluster is offline - $node_standby_1->checksum_verify_offline() - unless $data_checksum_state eq 'off'; + # while the cluster is offline (but only if stopped in a clean way, + # not after immediate shutdown) + #$node_standby_1->checksum_verify_offline() + # unless $data_checksum_state eq 'off' or !$standby_shutdown_clean; + random_sleep(); $node_standby_1->start; # Start a select-only pgbench in the background on the standby @@ -287,13 +307,41 @@ for (my $i = 0; $i < $TEST_ITERATIONS; $i++) my $result = $node_primary->safe_psql('postgres', "SELECT count(*) FROM t WHERE a > 1"); is($result, '100000', 'ensure data pages can be read back on primary'); + random_sleep(); + $node_primary->wait_for_catchup($node_standby_1, 'write'); - # Potentially powercycle the cluster - $node_primary->stop($stop_modes[ int(rand(100)) ]) if cointoss(); random_sleep(); - $node_standby_1->stop($stop_modes[ int(rand(100)) ]) if cointoss(); + + # Potentially powercycle the cluster (the nodes independently) + # XXX should maybe try stopping nodes in the opposite order too? + if (cointoss()) + { + my $mode = $stop_modes[ int(rand(100)) ]; + $node_primary->stop($mode); + $primary_shutdown_clean = ($mode eq 'fast'); + } + + random_sleep(); + + if (cointoss()) + { + my $mode = $stop_modes[ int(rand(100)) ]; + $node_standby_1->stop($mode); + $standby_shutdown_clean = ($mode eq 'fast'); + } +} + +# make sure the nodes are running +if (!$node_primary->is_alive) +{ + $node_primary->start; +} + +if (!$node_standby_1->is_alive) +{ + $node_standby_1->start; } # Testrun is over, ensure that data reads back as expected and perform a final -- 2.39.5