commit f502fa990e877c6e4d1d03518e524519d8e88806 Author: Tomas Vondra Date: Wed Apr 15 12:50:06 2026 +0200 POC: Test checksum state transitions using step through injection points diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index be92b6af20f..5b481784543 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -4762,6 +4762,9 @@ SetDataChecksumsOnInProgress(void) uint32 data_checksum_version; elog(LOG, "SetDataChecksumsOnInProgress / start"); + + INJECTION_POINT("datachecksums-enable-inprogress-checksums-delay", NULL); + /* * The state transition is performed in a critical section with * checkpoints held off to provide crash safety. @@ -4782,6 +4785,8 @@ SetDataChecksumsOnInProgress(void) MyProc->delayChkptFlags &= ~DELAY_CHKPT_START; END_CRIT_SECTION(); + INJECTION_POINT("datachecksums-enable-inprogress-checksums-after-xlogctl", NULL); + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); elog(LOG, "SetDataChecksumsOnInProgress ControlFile->data_checksum_version %u => %u", @@ -4791,6 +4796,8 @@ SetDataChecksumsOnInProgress(void) UpdateControlFile(); LWLockRelease(ControlFileLock); + INJECTION_POINT("datachecksums-enable-inprogress-checksums-after-controlfile", NULL); + elog(LOG, "SetDataChecksumsOnInProgress / EmitAndWaitDataChecksumsBarrier(PG_DATA_CHECKSUM_INPROGRESS_ON)"); EmitAndWaitDataChecksumsBarrier(PG_DATA_CHECKSUM_INPROGRESS_ON); @@ -4862,6 +4869,8 @@ SetDataChecksumsOn(void) MyProc->delayChkptFlags &= ~DELAY_CHKPT_START; END_CRIT_SECTION(); + INJECTION_POINT("datachecksums-enable-checksums-after-xlogctl", NULL); + /* * Update the controlfile before waiting since if we have an immediate * shutdown while waiting we want to come back up with checksums enabled. @@ -4875,9 +4884,13 @@ SetDataChecksumsOn(void) UpdateControlFile(); LWLockRelease(ControlFileLock); + INJECTION_POINT("datachecksums-enable-checksums-after-controlfile", NULL); + elog(LOG, "SetDataChecksumsOn / RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT | CHECKPOINT_FAST)"); RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT | CHECKPOINT_FAST); + INJECTION_POINT("datachecksums-enable-checksums-after-checkpoint", NULL); + elog(LOG, "SetDataChecksumsOn / EmitAndWaitDataChecksumsBarrier(PG_DATA_CHECKSUM_VERSION)"); EmitAndWaitDataChecksumsBarrier(PG_DATA_CHECKSUM_VERSION); @@ -4924,6 +4937,8 @@ SetDataChecksumsOff(void) { SpinLockRelease(&XLogCtl->info_lck); + INJECTION_POINT("datachecksums-disable-inprogress-checksums-delay", NULL); + START_CRIT_SECTION(); MyProc->delayChkptFlags |= DELAY_CHKPT_START; @@ -4934,24 +4949,30 @@ SetDataChecksumsOff(void) XLogCtl->data_checksum_version = PG_DATA_CHECKSUM_INPROGRESS_OFF; SpinLockRelease(&XLogCtl->info_lck); - elog(LOG, "SetDataChecksumsOff / XLogCtl->data_checksum_version %u = %u", + elog(LOG, "SetDataChecksumsOff / XLogCtl->data_checksum_version %u => %u", data_checksum_version, PG_DATA_CHECKSUM_INPROGRESS_OFF); MyProc->delayChkptFlags &= ~DELAY_CHKPT_START; END_CRIT_SECTION(); + INJECTION_POINT("datachecksums-disable-inprogress-checksums-after-xlogctl", NULL); + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); - elog(LOG, "SetDataChecksumsOff / ControlFile->data_checksum_version %u = %u", + elog(LOG, "SetDataChecksumsOff / ControlFile->data_checksum_version %u => %u", ControlFile->data_checksum_version, PG_DATA_CHECKSUM_INPROGRESS_OFF); ControlFile->data_checksum_version = PG_DATA_CHECKSUM_INPROGRESS_OFF; UpdateControlFile(); LWLockRelease(ControlFileLock); + INJECTION_POINT("datachecksums-disable-inprogress-checksums-after-controlfile", NULL); + elog(LOG, "SetDataChecksumsOff / RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT | CHECKPOINT_FAST)"); RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT | CHECKPOINT_FAST); + INJECTION_POINT("datachecksums-disable-inprogress-checksums-after-checkpoint", NULL); + elog(LOG, "SetDataChecksumsOff / EmitAndWaitDataChecksumsBarrier(PG_DATA_CHECKSUM_INPROGRESS_OFF)"); EmitAndWaitDataChecksumsBarrier(PG_DATA_CHECKSUM_INPROGRESS_OFF); @@ -4971,6 +4992,8 @@ SetDataChecksumsOff(void) SpinLockRelease(&XLogCtl->info_lck); } + INJECTION_POINT("datachecksums-disable-checksums-delay", NULL); + START_CRIT_SECTION(); /* Ensure that we don't incur a checkpoint during disabling checksums */ MyProc->delayChkptFlags |= DELAY_CHKPT_START; @@ -4988,6 +5011,8 @@ SetDataChecksumsOff(void) MyProc->delayChkptFlags &= ~DELAY_CHKPT_START; END_CRIT_SECTION(); + INJECTION_POINT("datachecksums-disable-checksums-after-xlogctl", NULL); + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); elog(LOG, "SetDataChecksumsOff / ControlFile->data_checksum_version %u => %u", @@ -4997,9 +5022,13 @@ SetDataChecksumsOff(void) UpdateControlFile(); LWLockRelease(ControlFileLock); + INJECTION_POINT("datachecksums-disable-checksums-after-controlfile", NULL); + elog(LOG, "SetDataChecksumsOff / RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT | CHECKPOINT_FAST)"); RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT | CHECKPOINT_FAST); + INJECTION_POINT("datachecksums-disable-checksums-after-checkpoint", NULL); + elog(LOG, "SetDataChecksumsOff / EmitAndWaitDataChecksumsBarrier(PG_DATA_CHECKSUM_OFF)"); EmitAndWaitDataChecksumsBarrier(PG_DATA_CHECKSUM_OFF); diff --git a/src/test/modules/test_checksums/t/010_injection_2.pl b/src/test/modules/test_checksums/t/010_injection_2.pl new file mode 100644 index 00000000000..a8ae9ffd151 --- /dev/null +++ b/src/test/modules/test_checksums/t/010_injection_2.pl @@ -0,0 +1,209 @@ + +# Copyright (c) 2026, PostgreSQL Global Development Group + +# Test suite for testing enabling data checksums in an online cluster with +# injection point tests injecting failures into the processing + +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +use FindBin; +use lib $FindBin::RealBin; + +use DataChecksums::Utils; + +# This test suite is expensive, or very expensive, to execute. There are two +# PG_TEST_EXTRA options for running it, "checksum" for a pared-down test suite +# an "checksum_extended" for the full suite. The full suite can run for hours +# on slow or constrained systems. +my $extended = undef; +if ($ENV{PG_TEST_EXTRA}) +{ + $extended = 1 if ($ENV{PG_TEST_EXTRA} =~ /\bchecksum_extended\b/); + plan skip_all => 'Expensive data checksums test disabled' + unless ($ENV{PG_TEST_EXTRA} =~ /\bchecksum(_extended)?\b/); +} + +if ($ENV{enable_injection_points} ne 'yes') +{ + plan skip_all => 'Injection points not supported by this build'; +} + +# --------------------------------------------------------------------------- +# Test cluster setup +# + +# Initiate testcluster +my $node = PostgreSQL::Test::Cluster->new('injection_node'); +$node->init(no_data_checksums => 1); +$node->start; + +# Set up test environment +$node->safe_psql('postgres', 'CREATE EXTENSION test_checksums;'); +$node->safe_psql('postgres', 'CREATE EXTENSION injection_points;'); + +my $pgbench = undef; +my $scalefactor = ($extended ? 10 : 1); +my $node_loglocation = 0; + +$node->command_ok( + [ + 'pgbench', '-p', $node->port, '-i', + '-s', $scalefactor, '-q', 'postgres' + ]); + +# Start a pgbench run in the background against the server specified via the +# port passed as parameter. +sub background_rw_pgbench +{ + my $port = shift; + + # If a previous pgbench is still running, start by shutting it down. + $pgbench->finish if $pgbench; + + my $clients = 1; + my $runtime = 2; + + if ($extended) + { + # Randomize the number of pgbench clients a bit (range 1-16) + $clients = 1 + int(rand(15)); + $runtime = 600; + } + my @cmd = ('pgbench', '-p', $port, '-T', $runtime, '-c', $clients); + + # Randomize whether we spawn connections or not + push(@cmd, '-C') if ($extended && cointoss); + # Finally add the database name to use + push(@cmd, 'postgres'); + + $pgbench = IPC::Run::start( + \@cmd, + '<' => '/dev/null', + '>' => '/dev/null', + '2>' => '/dev/null', + IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default)); +} + +# Test checksum transition. The function has these arguments: +# +# - start checksum state (enabled/disabled) +# - first - first checksum change +# - second - second checksum change +# - point - injection point the first change should wait on +# - final - expected checksum state at the end +# +# The test puts the instance into the initial checksum state, triggers two +# checksum changes, and verifies the final state is as expected. The first +# state change is paused on a selected injection point, and unpaused after +# the second change gets initiated. +# +# The injection point is triggered only by the datachecksum launcher, and +# there can be only one such process. So there's no risk of hitting the +# injection point by both changes. +sub test_checksum_transition +{ + my ($start, $first, $second, $point, $final) = @_; + + $node->safe_psql('postgres', + "SELECT '========== " . $start . " / " . $first . " / " . $second . " / " . $point . " / " . $final . " =========='"); + + note($start . " / " . $first . " / " . $second . " / " . $point . " / " . $final); + + note('changing checksums into initial state: ' . $start); + + enable_data_checksums($node, wait => 'on') if ($start eq 'enabled'); + disable_data_checksums($node, wait => 'off') if ($start eq 'disabled'); + + note('attaching injection point: ' . $point); + $node->safe_psql('postgres', + "SELECT injection_points_attach('" . $point . "','wait');" + ); + + note("triggering first checksum change: " . $first); + + enable_data_checksums($node) if ($first eq 'enable'); + disable_data_checksums($node) if ($first eq 'disable'); + + note("waiting for the injection point to be hit"); + $node->poll_query_until( + 'postgres', + "SELECT COUNT(*) FROM pg_catalog.pg_stat_activity WHERE wait_event = '" . $point . "'", + '1'); + + note("triggering second checksum change: " . $second); + + enable_data_checksums($node) if ($second eq 'enable'); + disable_data_checksums($node) if ($second eq 'disable'); + + note("waking and detaching injection point"); + $node->safe_psql('postgres', + "SELECT injection_points_wakeup('" . $point . "');"); + + note("detaching injection point"); + $node->safe_psql('postgres', + "SELECT injection_points_detach('" . $point . "');"); + + note('wait for the checksum launcher to exit'); + $node->poll_query_until('postgres', + "SELECT count(*) = 0 " + . "FROM pg_catalog.pg_stat_activity " + . "WHERE backend_type = 'datachecksum launcher';"); + + test_checksum_state($node, $final); + + # Since the log isn't being written to now, parse the log and check + # for instances of checksum verification failures. + my $log = PostgreSQL::Test::Utils::slurp_file($node->logfile, + $node_loglocation); + unlike( + $log, + qr/page verification failed,.+\d$/, + "no checksum validation errors in primary log (during WAL recovery)" + ); + $node_loglocation = -s $node->logfile; +} + +# Start the test suite with pgbench running. +background_rw_pgbench($node->port); + +test_checksum_transition('disabled', 'enable', 'disable', 'datachecksums-enable-inprogress-checksums-delay', 'off'); +test_checksum_transition('disabled', 'enable', 'disable', 'datachecksums-enable-inprogress-checksums-after-xlogctl', 'off'); +test_checksum_transition('disabled', 'enable', 'disable', 'datachecksums-enable-inprogress-checksums-after-controlfile', 'off'); +test_checksum_transition('disabled', 'enable', 'disable', 'datachecksums-enable-checksums-delay', 'off'); +test_checksum_transition('disabled', 'enable', 'disable', 'datachecksums-enable-checksums-after-xlogctl', 'off'); +test_checksum_transition('disabled', 'enable', 'disable', 'datachecksums-enable-checksums-after-controlfile', 'off'); +test_checksum_transition('disabled', 'enable', 'disable', 'datachecksums-enable-checksums-after-checkpoint', 'off'); + +test_checksum_transition('disabled', 'enable', 'enable', 'datachecksums-enable-inprogress-checksums-delay', 'on'); +test_checksum_transition('disabled', 'enable', 'enable', 'datachecksums-enable-inprogress-checksums-after-xlogctl', 'on'); +test_checksum_transition('disabled', 'enable', 'enable', 'datachecksums-enable-inprogress-checksums-after-controlfile', 'on'); +test_checksum_transition('disabled', 'enable', 'enable', 'datachecksums-enable-checksums-delay', 'on'); +test_checksum_transition('disabled', 'enable', 'enable', 'datachecksums-enable-checksums-after-xlogctl', 'on'); +test_checksum_transition('disabled', 'enable', 'enable', 'datachecksums-enable-checksums-after-controlfile', 'on'); +test_checksum_transition('disabled', 'enable', 'enable', 'datachecksums-enable-checksums-after-checkpoint', 'on'); + +test_checksum_transition('enabled', 'disable', 'disable', 'datachecksums-disable-inprogress-checksums-delay', 'off'); +test_checksum_transition('enabled', 'disable', 'disable', 'datachecksums-disable-inprogress-checksums-after-xlogctl', 'off'); +test_checksum_transition('enabled', 'disable', 'disable', 'datachecksums-disable-inprogress-checksums-after-controlfile', 'off'); +test_checksum_transition('enabled', 'disable', 'disable', 'datachecksums-disable-inprogress-checksums-after-checkpoint', 'off'); +test_checksum_transition('enabled', 'disable', 'disable', 'datachecksums-disable-checksums-delay', 'off'); +test_checksum_transition('enabled', 'disable', 'disable', 'datachecksums-disable-checksums-after-xlogctl', 'off'); +test_checksum_transition('enabled', 'disable', 'disable', 'datachecksums-disable-checksums-after-controlfile', 'off'); +test_checksum_transition('enabled', 'disable', 'disable', 'datachecksums-disable-checksums-after-checkpoint', 'off'); + +test_checksum_transition('enabled', 'disable', 'enable', 'datachecksums-disable-inprogress-checksums-delay', 'on'); +test_checksum_transition('enabled', 'disable', 'enable', 'datachecksums-disable-inprogress-checksums-after-xlogctl', 'on'); +test_checksum_transition('enabled', 'disable', 'enable', 'datachecksums-disable-inprogress-checksums-after-controlfile', 'on'); +test_checksum_transition('enabled', 'disable', 'enable', 'datachecksums-disable-inprogress-checksums-after-checkpoint', 'on'); +test_checksum_transition('enabled', 'disable', 'enable', 'datachecksums-disable-checksums-delay', 'on'); +test_checksum_transition('enabled', 'disable', 'enable', 'datachecksums-disable-checksums-after-xlogctl', 'on'); +test_checksum_transition('enabled', 'disable', 'enable', 'datachecksums-disable-checksums-after-controlfile', 'on'); +test_checksum_transition('enabled', 'disable', 'enable', 'datachecksums-disable-checksums-after-checkpoint', 'on'); + +$node->stop; +done_testing(); diff --git a/src/test/modules/test_checksums/t/011_injection_checkpoint.pl b/src/test/modules/test_checksums/t/011_injection_checkpoint.pl new file mode 100644 index 00000000000..ce1553e8ecd --- /dev/null +++ b/src/test/modules/test_checksums/t/011_injection_checkpoint.pl @@ -0,0 +1,196 @@ + +# Copyright (c) 2026, PostgreSQL Global Development Group + +# Test suite for testing enabling data checksums in an online cluster with +# injection point tests injecting failures into the processing + +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +use FindBin; +use lib $FindBin::RealBin; + +use DataChecksums::Utils; + +# This test suite is expensive, or very expensive, to execute. There are two +# PG_TEST_EXTRA options for running it, "checksum" for a pared-down test suite +# an "checksum_extended" for the full suite. The full suite can run for hours +# on slow or constrained systems. +my $extended = undef; +if ($ENV{PG_TEST_EXTRA}) +{ + $extended = 1 if ($ENV{PG_TEST_EXTRA} =~ /\bchecksum_extended\b/); + plan skip_all => 'Expensive data checksums test disabled' + unless ($ENV{PG_TEST_EXTRA} =~ /\bchecksum(_extended)?\b/); +} + +if ($ENV{enable_injection_points} ne 'yes') +{ + plan skip_all => 'Injection points not supported by this build'; +} + +# --------------------------------------------------------------------------- +# Test cluster setup +# + +# Initiate testcluster +my $node = PostgreSQL::Test::Cluster->new('injection_node'); +$node->init(no_data_checksums => 1); +$node->start; + +# Set up test environment +$node->safe_psql('postgres', 'CREATE EXTENSION test_checksums;'); +$node->safe_psql('postgres', 'CREATE EXTENSION injection_points;'); + +my $pgbench = undef; +my $scalefactor = ($extended ? 10 : 1); +my $node_loglocation = 0; + +$node->command_ok( + [ + 'pgbench', '-p', $node->port, '-i', + '-s', $scalefactor, '-q', 'postgres' + ]); + +# Start a pgbench run in the background against the server specified via the +# port passed as parameter. +sub background_rw_pgbench +{ + my $port = shift; + + # If a previous pgbench is still running, start by shutting it down. + $pgbench->finish if $pgbench; + + my $clients = 1; + my $runtime = 2; + + if ($extended) + { + # Randomize the number of pgbench clients a bit (range 1-16) + $clients = 1 + int(rand(15)); + $runtime = 600; + } + my @cmd = ('pgbench', '-p', $port, '-T', $runtime, '-c', $clients); + + # Randomize whether we spawn connections or not + push(@cmd, '-C') if ($extended && cointoss); + # Finally add the database name to use + push(@cmd, 'postgres'); + + $pgbench = IPC::Run::start( + \@cmd, + '<' => '/dev/null', + '>' => '/dev/null', + '2>' => '/dev/null', + IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default)); +} + +# Test checksum transition concurrent with a checkpoint. +# +# The function has these arguments: +# +# - start checksum state (enabled/disabled) +# - change - checksum change to initiate +# - point - injection point the first change should wait on +# - final - expected checksum state at the end +# +# The test puts the instance into the initial checksum state, triggers a +# checksum change concurrent with a checkpoint, and verifies the final state +# is as expected. The state change is paused on a selected injection +# point, and unpaused after performing a checkpoint. +# +# Finally, the instance is restarted (in either fast ot immediate mode), +# the final checksum state is validated against the expected value, and +# the server log is checked for checksum failures. +sub test_checksum_transition +{ + my ($start, $change, $point, $final) = @_; + + # Start the test suite with pgbench running. + background_rw_pgbench($node->port); + + $node->safe_psql('postgres', + "SELECT '========== " . $start . " / " . $change . " / " . $point . " / " . $final . " =========='"); + + note($start . " / " . $change . " / " . $point . " / " . $final); + + note('changing checksums into initial state: ' . $start); + + enable_data_checksums($node, wait => 'on') if ($start eq 'enabled'); + disable_data_checksums($node, wait => 'off') if ($start eq 'disabled'); + + note('attaching injection point: ' . $point); + $node->safe_psql('postgres', + "SELECT injection_points_attach('" . $point . "','wait');" + ); + + note("triggering checksum change: " . $change); + + enable_data_checksums($node) if ($change eq 'enable'); + disable_data_checksums($node) if ($change eq 'disable'); + + note("waiting for the injection point to be hit"); + $node->poll_query_until( + 'postgres', + "SELECT COUNT(*) FROM pg_catalog.pg_stat_activity WHERE wait_event = '" . $point . "'", + '1'); + + note('checkpoint'); + $node->safe_psql('postgres', "CHECKPOINT"); + + note("waking and detaching injection point"); + $node->safe_psql('postgres', + "SELECT injection_points_wakeup('" . $point . "');"); + + note("detaching injection point"); + $node->safe_psql('postgres', + "SELECT injection_points_detach('" . $point . "');"); + + note('wait for the checksum launcher to exit'); + $node->poll_query_until('postgres', + "SELECT count(*) = 0 " + . "FROM pg_catalog.pg_stat_activity " + . "WHERE backend_type = 'datachecksum launcher';"); + + test_checksum_state($node, $final); + + $node->stop(stopmode()); + $node->start; + + test_checksum_state($node, $final); + + # Since the log isn't being written to now, parse the log and check + # for instances of checksum verification failures. + my $log = PostgreSQL::Test::Utils::slurp_file($node->logfile, + $node_loglocation); + unlike( + $log, + qr/page verification failed,.+\d$/, + "no checksum validation errors in primary log (during WAL recovery)" + ); + $node_loglocation = -s $node->logfile; +} + +test_checksum_transition('disabled', 'enable', 'datachecksums-enable-inprogress-checksums-delay', 'on'); +test_checksum_transition('disabled', 'enable', 'datachecksums-enable-inprogress-checksums-after-xlogctl', 'on'); +test_checksum_transition('disabled', 'enable', 'datachecksums-enable-inprogress-checksums-after-controlfile', 'on'); +test_checksum_transition('disabled', 'enable', 'datachecksums-enable-checksums-delay', 'on'); +test_checksum_transition('disabled', 'enable', 'datachecksums-enable-checksums-after-xlogctl', 'on'); +test_checksum_transition('disabled', 'enable', 'datachecksums-enable-checksums-after-controlfile', 'on'); +test_checksum_transition('disabled', 'enable', 'datachecksums-enable-checksums-after-checkpoint', 'on'); + +test_checksum_transition('enabled', 'disable', 'datachecksums-disable-inprogress-checksums-delay', 'off'); +test_checksum_transition('enabled', 'disable', 'datachecksums-disable-inprogress-checksums-after-xlogctl', 'off'); +test_checksum_transition('enabled', 'disable', 'datachecksums-disable-inprogress-checksums-after-controlfile', 'off'); +test_checksum_transition('enabled', 'disable', 'datachecksums-disable-inprogress-checksums-after-checkpoint', 'off'); +test_checksum_transition('enabled', 'disable', 'datachecksums-disable-checksums-delay', 'off'); +test_checksum_transition('enabled', 'disable', 'datachecksums-disable-checksums-after-xlogctl', 'off'); +test_checksum_transition('enabled', 'disable', 'datachecksums-disable-checksums-after-controlfile', 'off'); +test_checksum_transition('enabled', 'disable', 'datachecksums-disable-checksums-after-checkpoint', 'off'); + +$node->stop; +done_testing(); diff --git a/src/test/modules/test_checksums/t/012_injection_checkpoint_crash.pl b/src/test/modules/test_checksums/t/012_injection_checkpoint_crash.pl new file mode 100644 index 00000000000..98e9c805c41 --- /dev/null +++ b/src/test/modules/test_checksums/t/012_injection_checkpoint_crash.pl @@ -0,0 +1,214 @@ + +# Copyright (c) 2026, PostgreSQL Global Development Group + +# Test suite for testing enabling data checksums in an online cluster with +# injection point tests injecting failures into the processing + +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +use FindBin; +use lib $FindBin::RealBin; + +use DataChecksums::Utils; + +# This test suite is expensive, or very expensive, to execute. There are two +# PG_TEST_EXTRA options for running it, "checksum" for a pared-down test suite +# an "checksum_extended" for the full suite. The full suite can run for hours +# on slow or constrained systems. +my $extended = undef; +if ($ENV{PG_TEST_EXTRA}) +{ + $extended = 1 if ($ENV{PG_TEST_EXTRA} =~ /\bchecksum_extended\b/); + plan skip_all => 'Expensive data checksums test disabled' + unless ($ENV{PG_TEST_EXTRA} =~ /\bchecksum(_extended)?\b/); +} + +if ($ENV{enable_injection_points} ne 'yes') +{ + plan skip_all => 'Injection points not supported by this build'; +} + +# --------------------------------------------------------------------------- +# Test cluster setup +# + +# Initiate testcluster +my $node = PostgreSQL::Test::Cluster->new('injection_node'); +$node->init(no_data_checksums => 1); +$node->start; + +# Set up test environment +$node->safe_psql('postgres', 'CREATE EXTENSION test_checksums;'); +$node->safe_psql('postgres', 'CREATE EXTENSION injection_points;'); + +my $pgbench = undef; +my $scalefactor = ($extended ? 10 : 1); +my $node_loglocation = 0; + +$node->command_ok( + [ + 'pgbench', '-p', $node->port, '-i', + '-s', $scalefactor, '-q', 'postgres' + ]); + +# Start a pgbench run in the background against the server specified via the +# port passed as parameter. +sub background_rw_pgbench +{ + my $port = shift; + + # If a previous pgbench is still running, start by shutting it down. + $pgbench->finish if $pgbench; + + my $clients = 1; + my $runtime = 2; + + if ($extended) + { + # Randomize the number of pgbench clients a bit (range 1-16) + $clients = 1 + int(rand(15)); + $runtime = 600; + } + my @cmd = ('pgbench', '-p', $port, '-T', $runtime, '-c', $clients); + + # Randomize whether we spawn connections or not + push(@cmd, '-C') if ($extended && cointoss); + # Finally add the database name to use + push(@cmd, 'postgres'); + + $pgbench = IPC::Run::start( + \@cmd, + '<' => '/dev/null', + '>' => '/dev/null', + '2>' => '/dev/null', + IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default)); +} + +# Test checksum transition concurrent with a checkpoint. +# +# The function has these arguments: +# +# - start checksum state (enabled/disabled) +# - change - checksum change to initiate +# - point1 - injection point before checkpoint +# - point2 - injection point after checkpoint +# - final - expected checksum state at the end +# +# The test puts the instance into the initial checksum state, triggers a +# checksum change that pauses on a selected injection point. Then performs +# a checkpoint, unpauses the change so that it proceeds to a second +# injection point. +# +# Then the instance is restarted in immediate mode to simulate failure, +# and the final checksum state is validated against the expected value. +# The server log is checked for checksum failures. +sub test_checksum_transition +{ + my ($start, $change, $point1, $point2, $final) = @_; + + # Start the test suite with pgbench running. + background_rw_pgbench($node->port); + + $node->safe_psql('postgres', + "SELECT '========== " . $start . " / " . $change . " / " . $point1 . " / " . $final . " =========='"); + + note($start . " / " . $change . " / " . $point1 . " / " . $final); + + note('changing checksums into initial state: ' . $start); + + enable_data_checksums($node, wait => 'on') if ($start eq 'enabled'); + disable_data_checksums($node, wait => 'off') if ($start eq 'disabled'); + + note('attaching injection point: ' . $point1); + $node->safe_psql('postgres', + "SELECT injection_points_attach('" . $point1 . "','wait');" + ); + + if (defined($point2)) + { + note('attaching injection point: ' . $point2); + $node->safe_psql('postgres', + "SELECT injection_points_attach('" . $point2 . "','wait');" + ); + } + + note("triggering checksum change: " . $change); + + enable_data_checksums($node) if ($change eq 'enable'); + disable_data_checksums($node) if ($change eq 'disable'); + + note("waiting for the injection point to be hit"); + $node->poll_query_until( + 'postgres', + "SELECT COUNT(*) FROM pg_catalog.pg_stat_activity WHERE wait_event = '" . $point1 . "'", + '1'); + + note('checkpoint'); + $node->safe_psql('postgres', "CHECKPOINT"); + + note("waking and detaching injection point"); + $node->safe_psql('postgres', + "SELECT injection_points_wakeup('" . $point1 . "');"); + + note("detaching injection point"); + $node->safe_psql('postgres', + "SELECT injection_points_detach('" . $point1 . "');"); + + if (defined($point2)) + { + note("waiting for the injection point to be hit"); + $node->poll_query_until( + 'postgres', + "SELECT COUNT(*) FROM pg_catalog.pg_stat_activity WHERE wait_event = '" . $point2 . "'", + '1'); + } + else + { + note('wait for the checksum launcher to exit'); + $node->poll_query_until('postgres', + "SELECT count(*) = 0 " + . "FROM pg_catalog.pg_stat_activity " + . "WHERE backend_type = 'datachecksum launcher';"); + } + + $node->stop('immediate'); + $node->start; + + test_checksum_state($node, $final); + + # Since the log isn't being written to now, parse the log and check + # for instances of checksum verification failures. + my $log = PostgreSQL::Test::Utils::slurp_file($node->logfile, + $node_loglocation); + unlike( + $log, + qr/page verification failed,.+\d$/, + "no checksum validation errors in primary log (during WAL recovery)" + ); + $node_loglocation = -s $node->logfile; +} + +test_checksum_transition('disabled', 'enable', 'datachecksums-enable-inprogress-checksums-delay', 'datachecksums-enable-inprogress-checksums-after-xlogctl', 'off'); +test_checksum_transition('disabled', 'enable', 'datachecksums-enable-inprogress-checksums-after-xlogctl', 'datachecksums-enable-inprogress-checksums-after-controlfile', 'off'); +test_checksum_transition('disabled', 'enable', 'datachecksums-enable-inprogress-checksums-after-controlfile', 'datachecksums-enable-checksums-delay', 'off'); +test_checksum_transition('disabled', 'enable', 'datachecksums-enable-checksums-delay', 'datachecksums-enable-checksums-after-xlogctl', 'on'); +test_checksum_transition('disabled', 'enable', 'datachecksums-enable-checksums-after-xlogctl', 'datachecksums-enable-checksums-after-controlfile', 'on'); +test_checksum_transition('disabled', 'enable', 'datachecksums-enable-checksums-after-controlfile', 'datachecksums-enable-checksums-after-checkpoint', 'on'); +test_checksum_transition('disabled', 'enable', 'datachecksums-enable-checksums-after-checkpoint', undef, 'on'); + +test_checksum_transition('enabled', 'disable', 'datachecksums-disable-inprogress-checksums-delay', 'datachecksums-disable-inprogress-checksums-after-xlogctl', 'off'); +test_checksum_transition('enabled', 'disable', 'datachecksums-disable-inprogress-checksums-after-xlogctl', 'datachecksums-disable-inprogress-checksums-after-controlfile', 'off'); +test_checksum_transition('enabled', 'disable', 'datachecksums-disable-inprogress-checksums-after-controlfile', 'datachecksums-disable-inprogress-checksums-after-checkpoint', 'off'); +test_checksum_transition('enabled', 'disable', 'datachecksums-disable-inprogress-checksums-after-checkpoint', 'datachecksums-disable-checksums-delay', 'off'); +test_checksum_transition('enabled', 'disable', 'datachecksums-disable-checksums-delay', 'datachecksums-disable-checksums-after-xlogctl', 'off'); +test_checksum_transition('enabled', 'disable', 'datachecksums-disable-checksums-after-xlogctl', 'datachecksums-disable-checksums-after-controlfile', 'off'); +test_checksum_transition('enabled', 'disable', 'datachecksums-disable-checksums-after-controlfile', 'datachecksums-disable-checksums-after-checkpoint', 'off'); +test_checksum_transition('enabled', 'disable', 'datachecksums-disable-checksums-after-checkpoint', undef, 'off'); + +$node->stop; +done_testing();