From 15eda37bed7543a314b5dc811b155545cb4b0778 Mon Sep 17 00:00:00 2001 From: Daniel Gustafsson Date: Thu, 8 Jan 2026 14:45:16 +0100 Subject: [PATCH v20250203 2/2] Review comments --- doc/src/sgml/func/func-admin.sgml | 12 +- doc/src/sgml/glossary.sgml | 4 +- doc/src/sgml/wal.sgml | 6 + src/backend/access/rmgrdesc/xlogdesc.c | 46 ++- src/backend/access/transam/xlog.c | 292 ++++++++-------- src/backend/access/transam/xlogfuncs.c | 13 +- src/backend/backup/basebackup.c | 25 +- src/backend/catalog/system_functions.sql | 14 +- src/backend/catalog/system_views.sql | 5 +- src/backend/postmaster/datachecksumsworker.c | 326 +++++++++--------- src/backend/postmaster/walsummarizer.c | 5 +- src/backend/storage/page/bufpage.c | 12 + src/backend/utils/init/postinit.c | 2 +- src/bin/pg_controldata/pg_controldata.c | 2 +- src/include/access/xlog.h | 6 +- src/include/access/xlog_internal.h | 8 +- src/include/catalog/pg_control.h | 6 +- src/include/catalog/pg_proc.dat | 15 +- src/include/commands/progress.h | 5 +- src/include/postmaster/datachecksumsworker.h | 5 +- src/include/storage/checksum.h | 3 +- src/include/storage/proc.h | 5 +- src/test/modules/test_checksums/Makefile | 2 +- src/test/modules/test_checksums/meson.build | 2 +- .../modules/test_checksums/t/001_basic.pl | 2 +- .../modules/test_checksums/t/002_restarts.pl | 2 +- .../test_checksums/t/003_standby_restarts.pl | 2 +- .../modules/test_checksums/t/004_offline.pl | 2 +- .../modules/test_checksums/t/005_injection.pl | 77 ++--- .../test_checksums/t/006_pgbench_single.pl | 30 +- .../test_checksums/t/007_pgbench_standby.pl | 72 ++-- .../test_checksums/t/DataChecksums/Utils.pm | 34 +- .../test_checksums/test_checksums--1.0.sql | 14 +- .../modules/test_checksums/test_checksums.c | 79 +++-- src/test/perl/PostgreSQL/Test/Cluster.pm | 9 - src/test/regress/expected/rules.out | 7 +- src/tools/pgindent/typedefs.list | 1 + 37 files changed, 558 insertions(+), 594 deletions(-) diff --git a/doc/src/sgml/func/func-admin.sgml b/doc/src/sgml/func/func-admin.sgml index 315f4093398..7762b774bf2 100644 --- a/doc/src/sgml/func/func-admin.sgml +++ b/doc/src/sgml/func/func-admin.sgml @@ -3122,7 +3122,7 @@ SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8'); pg_enable_data_checksums - pg_enable_data_checksums ( cost_delay int, cost_limit int, fast bool ) + pg_enable_data_checksums ( cost_delay int, cost_limit int ) void @@ -3137,9 +3137,6 @@ SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8'); If cost_delay and cost_limit are specified, the process is throttled using the same principles as Cost-based Vacuum Delay. - If fast is specified as true - then a fast checkpoint will be issued when data checksums have been - enabled, which may cause a spike in I/O. @@ -3149,7 +3146,7 @@ SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8'); pg_disable_data_checksums - pg_disable_data_checksums ( fast bool ) + pg_disable_data_checksums () void @@ -3159,11 +3156,6 @@ SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8'); stopped validating data checksums, the data checksum mode will be set to off. - - If fast is specified as true - then a fast checkpoint will be issued when data checksums have been - enabled, which may cause a spike in I/O. - diff --git a/doc/src/sgml/glossary.sgml b/doc/src/sgml/glossary.sgml index 4895ae76b81..4f3120b7826 100644 --- a/doc/src/sgml/glossary.sgml +++ b/doc/src/sgml/glossary.sgml @@ -580,7 +580,7 @@ Data Checksums Worker - An auxiliary process + A background worker which enables data checksums in a specific database. @@ -590,7 +590,7 @@ Data Checksums Worker Launcher - An auxiliary process + A background worker which starts processes for enabling data checksums in each database, or disables data checksums cluster-wide. diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml index 4b5df81caf2..df41fb54cb6 100644 --- a/doc/src/sgml/wal.sgml +++ b/doc/src/sgml/wal.sgml @@ -342,6 +342,12 @@ the mode will automatically be set to off. + + Disabling data checksums while data checksums are actively being enabled + will abort the current processing. There is no way to restart from where + processing was interrupted. + + Impact on system of online operations diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index 1a70cdccb5c..8a854312ea1 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -55,6 +55,25 @@ get_wal_level_string(int wal_level) return wal_level_str; } +static const char * +get_checksum_version_string(ChecksumType checksum) +{ + switch (checksum) + { + case PG_DATA_CHECKSUM_VERSION: + return "on"; + case PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION: + return "inprogress-off"; + case PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION: + return "inprogress-on"; + case PG_DATA_CHECKSUM_OFF: + return "off"; + } + + Assert(false); + return "?"; +} + void xlog_desc(StringInfo buf, XLogReaderState *record) { @@ -70,7 +89,8 @@ xlog_desc(StringInfo buf, XLogReaderState *record) "tli %u; prev tli %u; fpw %s; wal_level %s; logical decoding %s; xid %u:%u; oid %u; multi %u; offset %" PRIu64 "; " "oldest xid %u in DB %u; oldest multi %u in DB %u; " "oldest/newest commit timestamp xid: %u/%u; " - "oldest running xid %u; %s", + "oldest running xid %u; " + "checksums %s; %s", LSN_FORMAT_ARGS(checkpoint->redo), checkpoint->ThisTimeLineID, checkpoint->PrevTimeLineID, @@ -89,6 +109,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record) checkpoint->oldestCommitTsXid, checkpoint->newestCommitTsXid, checkpoint->oldestActiveXid, + get_checksum_version_string(checkpoint->dataChecksumVersion), (info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online"); } else if (info == XLOG_NEXTOID) @@ -164,10 +185,12 @@ xlog_desc(StringInfo buf, XLogReaderState *record) } else if (info == XLOG_CHECKPOINT_REDO) { - int wal_level; + xl_checkpoint_redo xlrec; - memcpy(&wal_level, rec, sizeof(int)); - appendStringInfo(buf, "wal_level %s", get_wal_level_string(wal_level)); + memcpy(&xlrec, rec, sizeof(xl_checkpoint_redo)); + appendStringInfo(buf, "wal_level %s; checksums %s", + get_wal_level_string(xlrec.wal_level), + get_checksum_version_string(xlrec.data_checksum_version)); } else if (info == XLOG_LOGICAL_DECODING_STATUS_CHANGE) { @@ -181,20 +204,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record) xl_checksum_state xlrec; memcpy(&xlrec, rec, sizeof(xl_checksum_state)); - switch (xlrec.new_checksumtype) - { - case PG_DATA_CHECKSUM_VERSION: - appendStringInfoString(buf, "on"); - break; - case PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION: - appendStringInfoString(buf, "inprogress-off"); - break; - case PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION: - appendStringInfoString(buf, "inprogress-on"); - break; - default: - appendStringInfoString(buf, "off"); - } + appendStringInfoString(buf, get_checksum_version_string(xlrec.new_checksumtype)); } } diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 6d437c2bc11..28928e2195d 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -288,11 +288,6 @@ static XLogRecPtr RedoRecPtr; */ static bool doPageWrites; -/* - * Force creating a restartpoint on the next CHECKPOINT after XLOG_CHECKSUMS. - */ -static bool checksumRestartPoint = false; - /*---------- * Shared-memory data structures for XLOG control * @@ -596,6 +591,10 @@ static ControlFileData *ControlFile = NULL; * looked up in the checksum_barriers struct. The struct member for the target * state defines which state the backend must currently be in, and which it * must not be in. + * + * The reason for this explicit checking is to ensure that processing cannot + * be started such that it breaks the assumptions of the state machine. See + * datachecksumsworker.c for a lengthy discussion on these states. */ typedef struct ChecksumBarrierCondition { @@ -611,13 +610,12 @@ typedef struct ChecksumBarrierCondition int barrier_ne_sz; } ChecksumBarrierCondition; -static const ChecksumBarrierCondition checksum_barriers[] = +static const ChecksumBarrierCondition checksum_barriers[4] = { {PG_DATA_CHECKSUM_OFF, {PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION, PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION}, 2, {PG_DATA_CHECKSUM_VERSION}, 1}, {PG_DATA_CHECKSUM_VERSION, {PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION}, 1, {0}, 0}, - {PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION, {PG_DATA_CHECKSUM_ANY_VERSION}, 1, {PG_DATA_CHECKSUM_VERSION}, 1}, + {PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION, {0}, 0, {PG_DATA_CHECKSUM_VERSION}, 1}, {PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION, {PG_DATA_CHECKSUM_VERSION}, 1, {0}, 0}, - {-1} }; /* @@ -699,20 +697,9 @@ static bool updateMinRecoveryPoint = true; * this is only updated when absorbing a procsignal barrier during interrupt * processing. The reason for keeping a copy in backend-private memory is to * avoid locking for interrogating the data checksum state. Possible values - * are the data checksum versions defined in storage/bufpage.h as well as zero - * when data checksums are disabled. - */ -static uint32 LocalDataChecksumVersion = 0; - -/* - * Flag to remember if the procsignalbarrier being absorbed for checksums is - * the first one. The first procsignalbarrier can in rare cases be for the - * state we've initialized, i.e. a duplicate. This may happen for any - * data_checksum_version value when the process is spawned between the update - * of XLogCtl->data_checksum_version and the barrier being emitted. This can - * only happen on the very first barrier so mark that with this flag. + * are the data checksum versions defined in storage/checksum.h. */ -static bool InitialDataChecksumTransition = true; +static ChecksumType LocalDataChecksumVersion = 0; /* * Variable backing the GUC, keep it in sync with LocalDataChecksumVersion. @@ -921,9 +908,7 @@ XLogInsertRecord(XLogRecData *rdata, Assert(RedoRecPtr < Insert->RedoRecPtr); RedoRecPtr = Insert->RedoRecPtr; } - doPageWrites = (Insert->fullPageWrites || - Insert->runningBackups > 0 || - DataChecksumsNeedWrite()); + doPageWrites = (Insert->fullPageWrites || Insert->runningBackups > 0); if (doPageWrites && (!prevDoPageWrites || @@ -4808,10 +4793,9 @@ DataChecksumsOffInProgress(void) * state transition. */ void -SetDataChecksumsOnInProgress(bool immediate_checkpoint) +SetDataChecksumsOnInProgress(void) { uint64 barrier; - int flags; Assert(ControlFile != NULL); @@ -4819,19 +4803,20 @@ SetDataChecksumsOnInProgress(bool immediate_checkpoint) * The state transition is performed in a critical section with * checkpoints held off to provide crash safety. */ - MyProc->delayChkptFlags |= DELAY_CHKPT_START; START_CRIT_SECTION(); + MyProc->delayChkptFlags |= DELAY_CHKPT_START; XLogChecksums(PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION); SpinLockAcquire(&XLogCtl->info_lck); + Assert(XLogCtl->data_checksum_version == PG_DATA_CHECKSUM_OFF); XLogCtl->data_checksum_version = PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION; SpinLockRelease(&XLogCtl->info_lck); barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON); - END_CRIT_SECTION(); MyProc->delayChkptFlags &= ~DELAY_CHKPT_START; + END_CRIT_SECTION(); /* * Await state change in all backends to ensure that all backends are in @@ -4839,18 +4824,6 @@ SetDataChecksumsOnInProgress(bool immediate_checkpoint) * checksums. */ WaitForProcSignalBarrier(barrier); - - /* - * force checkpoint to persist the current checksum state in control file - * etc. - * - * XXX is this needed? There's already a checkpoint at the end of - * ProcessAllDatabases, maybe this is redundant? - */ - flags = CHECKPOINT_FORCE | CHECKPOINT_WAIT; - if (immediate_checkpoint) - flags = flags | CHECKPOINT_FAST; - RequestCheckpoint(flags); } /* @@ -4877,10 +4850,9 @@ SetDataChecksumsOnInProgress(bool immediate_checkpoint) * state transition. */ void -SetDataChecksumsOn(bool immediate_checkpoint) +SetDataChecksumsOn(void) { uint64 barrier; - int flags; Assert(ControlFile != NULL); @@ -4893,14 +4865,14 @@ SetDataChecksumsOn(bool immediate_checkpoint) if (XLogCtl->data_checksum_version != PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION) { SpinLockRelease(&XLogCtl->info_lck); - elog(ERROR, "checksums not in \"inprogress-on\" mode"); + elog(PANIC, "checksums not in \"inprogress-on\" mode"); } SpinLockRelease(&XLogCtl->info_lck); - MyProc->delayChkptFlags |= DELAY_CHKPT_START; INJECTION_POINT("datachecksums-enable-checksums-delay", NULL); START_CRIT_SECTION(); + MyProc->delayChkptFlags |= DELAY_CHKPT_START; XLogChecksums(PG_DATA_CHECKSUM_VERSION); @@ -4910,8 +4882,8 @@ SetDataChecksumsOn(bool immediate_checkpoint) barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_ON); - END_CRIT_SECTION(); MyProc->delayChkptFlags &= ~DELAY_CHKPT_START; + END_CRIT_SECTION(); /* * Await state transition of "on" in all backends. When done we know that @@ -4920,13 +4892,7 @@ SetDataChecksumsOn(bool immediate_checkpoint) */ WaitForProcSignalBarrier(barrier); - INJECTION_POINT("datachecksums-enable-checksums-pre-checkpoint", NULL); - - /* XXX is this needed? */ - flags = CHECKPOINT_FORCE | CHECKPOINT_WAIT; - if (immediate_checkpoint) - flags = flags | CHECKPOINT_FAST; - RequestCheckpoint(flags); + INJECTION_POINT("datachecksums-enable-checksums-done", NULL); } /* @@ -4943,10 +4909,9 @@ SetDataChecksumsOn(bool immediate_checkpoint) * state transition. */ void -SetDataChecksumsOff(bool immediate_checkpoint) +SetDataChecksumsOff(void) { uint64 barrier; - int flags; Assert(ControlFile != NULL); @@ -4970,8 +4935,8 @@ SetDataChecksumsOff(bool immediate_checkpoint) { SpinLockRelease(&XLogCtl->info_lck); - MyProc->delayChkptFlags |= DELAY_CHKPT_START; START_CRIT_SECTION(); + MyProc->delayChkptFlags |= DELAY_CHKPT_START; XLogChecksums(PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION); @@ -4981,8 +4946,8 @@ SetDataChecksumsOff(bool immediate_checkpoint) barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_OFF); - END_CRIT_SECTION(); MyProc->delayChkptFlags &= ~DELAY_CHKPT_START; + END_CRIT_SECTION(); /* * Update local state in all backends to ensure that any backend in @@ -4990,20 +4955,6 @@ SetDataChecksumsOff(bool immediate_checkpoint) */ WaitForProcSignalBarrier(barrier); - /* - * force checkpoint to persist the current checksum state in control - * file etc. - * - * XXX is this safe? What if the crash/shutdown happens while waiting - * for the checkpoint? Also, should we persist the checksum first and - * only then flip the flag in XLogCtl? - */ - INJECTION_POINT("datachecksums-disable-checksums-pre-checkpoint", NULL); - flags = CHECKPOINT_FORCE | CHECKPOINT_WAIT; - if (immediate_checkpoint) - flags = flags | CHECKPOINT_FAST; - RequestCheckpoint(flags); - /* * At this point we know that no backends are verifying data checksums * during reading. Next, we can safely move to state "off" to also @@ -5020,13 +4971,11 @@ SetDataChecksumsOff(bool immediate_checkpoint) SpinLockRelease(&XLogCtl->info_lck); } - /* - * Ensure that we don't incur a checkpoint during disabling checksums. - */ - MyProc->delayChkptFlags |= DELAY_CHKPT_START; START_CRIT_SECTION(); + /* Ensure that we don't incur a checkpoint during disabling checksums */ + MyProc->delayChkptFlags |= DELAY_CHKPT_START; - XLogChecksums(0); + XLogChecksums(PG_DATA_CHECKSUM_OFF); SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->data_checksum_version = 0; @@ -5034,15 +4983,10 @@ SetDataChecksumsOff(bool immediate_checkpoint) barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_OFF); - END_CRIT_SECTION(); MyProc->delayChkptFlags &= ~DELAY_CHKPT_START; + END_CRIT_SECTION(); WaitForProcSignalBarrier(barrier); - - flags = CHECKPOINT_FORCE | CHECKPOINT_WAIT; - if (immediate_checkpoint) - flags = flags | CHECKPOINT_FAST; - RequestCheckpoint(flags); } /* @@ -5057,36 +5001,55 @@ SetDataChecksumsOff(bool immediate_checkpoint) bool AbsorbDataChecksumsBarrier(int target_state) { - const ChecksumBarrierCondition *condition = checksum_barriers; + const ChecksumBarrierCondition *condition = NULL; int current = LocalDataChecksumVersion; bool found = false; + /* + * If the target state matches the current state then the barrier has been + * repeated. + */ + if (current == target_state) + return true; + + /* + * If the cluster is in recovery we skip the validation of current state + * since the replay is trusted. + */ + if (InRecovery) + { + SetLocalDataChecksumVersion(target_state); + return true; + } + /* * Find the barrier condition definition for the target state. Not finding * a condition would be a grave programmer error as the states are a * discrete set. */ - while (condition->target != target_state && condition->target != -1) - condition++; - if (unlikely(condition->target == -1)) - ereport(ERROR, - errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("invalid target state %i for data checksum barrier", - target_state)); + for (int i = 0; i < lengthof(checksum_barriers); i++) + { + if (checksum_barriers[i].target == target_state) + condition = &checksum_barriers[i]; + } + Assert(condition); /* * The current state MUST be equal to one of the EQ states defined in this - * barrier condition, or equal to the target_state if - and only if - - * InitialDataChecksumTransition is true. + * barrier condition. If the EQ states array is zero then that imples that + * the current state can match any state, so fastpath check for that + * first. */ - for (int i = 0; i < condition->barrier_eq_sz; i++) + if (condition->barrier_eq_sz == 0) + found = true; + else { - if (current == condition->barrier_eq[i] || - condition->barrier_eq[i] == PG_DATA_CHECKSUM_ANY_VERSION) - found = true; + for (int i = 0; i < condition->barrier_eq_sz; i++) + { + if (current == condition->barrier_eq[i]) + found = true; + } } - if (InitialDataChecksumTransition && current == target_state) - found = true; /* * The current state MUST NOT be equal to any of the NE states defined in @@ -5099,7 +5062,7 @@ AbsorbDataChecksumsBarrier(int target_state) } /* - * If the relevent state criteria aren't satisfied, throw an error which + * If the relevant state criteria aren't satisfied, throw an error which * will be caught by the procsignal machinery for a later retry. */ if (!found) @@ -5109,7 +5072,6 @@ AbsorbDataChecksumsBarrier(int target_state) current, target_state)); SetLocalDataChecksumVersion(target_state); - InitialDataChecksumTransition = false; return true; } @@ -5141,14 +5103,17 @@ SetLocalDataChecksumVersion(uint32 data_checksum_version) const char * show_data_checksums(void) { - if (LocalDataChecksumVersion == PG_DATA_CHECKSUM_VERSION) - return "on"; - else if (LocalDataChecksumVersion == PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION) - return "inprogress-on"; - else if (LocalDataChecksumVersion == PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION) - return "inprogress-off"; - else - return "off"; + switch (LocalDataChecksumVersion) + { + case PG_DATA_CHECKSUM_VERSION: + return "on"; + case PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION: + return "inprogress-on"; + case PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION: + return "inprogress-off"; + case PG_DATA_CHECKSUM_OFF: + return "off"; + } } /* @@ -6305,6 +6270,11 @@ StartupXLOG(void) XLogCtl->SharedRecoveryState = RECOVERY_STATE_ARCHIVE; else XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH; + + XLogCtl->data_checksum_version = checkPoint.dataChecksumVersion; + SetLocalDataChecksumVersion(checkPoint.dataChecksumVersion); + ControlFile->data_checksum_version = checkPoint.dataChecksumVersion; + SpinLockRelease(&XLogCtl->info_lck); /* @@ -6780,7 +6750,7 @@ StartupXLOG(void) */ if (XLogCtl->data_checksum_version == PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION) { - XLogChecksums(0); + XLogChecksums(PG_DATA_CHECKSUM_OFF); SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->data_checksum_version = 0; @@ -6788,8 +6758,8 @@ StartupXLOG(void) SpinLockRelease(&XLogCtl->info_lck); ereport(WARNING, - (errmsg("data checksums state has been set of off"), - errhint("If checksums were being enabled during shutdown then processing must be manually restarted."))); + errmsg("data checksums state has been set to off"), + errhint("If checksums were being enabled during shutdown then processing must be manually restarted.")); } /* @@ -6800,7 +6770,7 @@ StartupXLOG(void) */ if (XLogCtl->data_checksum_version == PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION) { - XLogChecksums(0); + XLogChecksums(PG_DATA_CHECKSUM_OFF); SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->data_checksum_version = 0; @@ -7685,7 +7655,7 @@ CreateCheckPoint(int flags) * Get the current data_checksum_version value from xlogctl, valid at the * time of the checkpoint. */ - checkPoint.data_checksum_version = XLogCtl->data_checksum_version; + checkPoint.dataChecksumVersion = XLogCtl->data_checksum_version; if (shutdown) { @@ -7739,9 +7709,18 @@ CreateCheckPoint(int flags) */ if (!shutdown) { + xl_checkpoint_redo redo_rec; + + WALInsertLockAcquire(); + redo_rec.wal_level = wal_level; + SpinLockAcquire(&XLogCtl->info_lck); + redo_rec.data_checksum_version = XLogCtl->data_checksum_version; + SpinLockRelease(&XLogCtl->info_lck); + WALInsertLockRelease(); + /* Include WAL level in record for WAL summarizer's benefit. */ XLogBeginInsert(); - XLogRegisterData(&wal_level, sizeof(wal_level)); + XLogRegisterData(&redo_rec, sizeof(xl_checkpoint_redo)); (void) XLogInsert(RM_XLOG_ID, XLOG_CHECKPOINT_REDO); /* @@ -7797,6 +7776,10 @@ CreateCheckPoint(int flags) checkPoint.nextOid += TransamVariables->oidCount; LWLockRelease(OidGenLock); + SpinLockAcquire(&XLogCtl->info_lck); + checkPoint.dataChecksumVersion = XLogCtl->data_checksum_version; + SpinLockRelease(&XLogCtl->info_lck); + checkPoint.logicalDecodingEnabled = IsLogicalDecodingEnabled(); MultiXactGetCheckptMulti(shutdown, @@ -7947,7 +7930,7 @@ CreateCheckPoint(int flags) ControlFile->minRecoveryPointTLI = 0; /* make sure we start with the checksum version as of the checkpoint */ - ControlFile->data_checksum_version = checkPoint.data_checksum_version; + ControlFile->data_checksum_version = checkPoint.dataChecksumVersion; /* * Persist unloggedLSN value. It's reset on crash recovery, so this goes @@ -8094,7 +8077,9 @@ CreateEndOfRecoveryRecord(void) ControlFile->minRecoveryPointTLI = xlrec.ThisTimeLineID; /* start with the latest checksum version (as of the end of recovery) */ + SpinLockAcquire(&XLogCtl->info_lck); ControlFile->data_checksum_version = XLogCtl->data_checksum_version; + SpinLockRelease(&XLogCtl->info_lck); UpdateControlFile(); LWLockRelease(ControlFileLock); @@ -8439,7 +8424,7 @@ CreateRestartPoint(int flags) } /* we shall start with the latest checksum version */ - ControlFile->data_checksum_version = lastCheckPoint.data_checksum_version; + ControlFile->data_checksum_version = lastCheckPoint.dataChecksumVersion; UpdateControlFile(); } @@ -8869,8 +8854,6 @@ XLogChecksums(uint32 new_type) XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xl_checksum_state)); - INJECTION_POINT("datachecksums-xlogchecksums-pre-xloginsert", &new_type); - recptr = XLogInsert(RM_XLOG_ID, XLOG_CHECKSUMS); XLogFlush(recptr); } @@ -9001,6 +8984,11 @@ xlog_redo(XLogReaderState *record) MultiXactAdvanceOldest(checkPoint.oldestMulti, checkPoint.oldestMultiDB); + SpinLockAcquire(&XLogCtl->info_lck); + XLogCtl->data_checksum_version = checkPoint.dataChecksumVersion; + SetLocalDataChecksumVersion(checkPoint.dataChecksumVersion); + SpinLockRelease(&XLogCtl->info_lck); + /* * No need to set oldestClogXid here as well; it'll be set when we * redo an xl_clog_truncate if it changed since initialization. @@ -9060,6 +9048,7 @@ xlog_redo(XLogReaderState *record) /* ControlFile->checkPointCopy always tracks the latest ckpt XID */ LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; + ControlFile->data_checksum_version = checkPoint.dataChecksumVersion; LWLockRelease(ControlFileLock); /* @@ -9124,8 +9113,14 @@ xlog_redo(XLogReaderState *record) /* ControlFile->checkPointCopy always tracks the latest ckpt XID */ LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; + ControlFile->data_checksum_version = checkPoint.dataChecksumVersion; LWLockRelease(ControlFileLock); + SpinLockAcquire(&XLogCtl->info_lck); + XLogCtl->data_checksum_version = checkPoint.dataChecksumVersion; + SetLocalDataChecksumVersion(checkPoint.dataChecksumVersion); + SpinLockRelease(&XLogCtl->info_lck); + /* TLI should not change in an on-line checkpoint */ (void) GetCurrentReplayRecPtr(&replayTLI); if (checkPoint.ThisTimeLineID != replayTLI) @@ -9292,7 +9287,43 @@ xlog_redo(XLogReaderState *record) } else if (info == XLOG_CHECKPOINT_REDO) { - /* nothing to do here, just for informational purposes */ + xl_checkpoint_redo redo_rec; + bool new_state = false; + uint64 barrier; + + memcpy(&redo_rec, XLogRecGetData(record), sizeof(xl_checkpoint_redo)); + + SpinLockAcquire(&XLogCtl->info_lck); + XLogCtl->data_checksum_version = redo_rec.data_checksum_version; + if (redo_rec.data_checksum_version != ControlFile->data_checksum_version) + new_state = true; + SpinLockRelease(&XLogCtl->info_lck); + + if (new_state) + { + switch (redo_rec.data_checksum_version) + { + case PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION: + barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON); + WaitForProcSignalBarrier(barrier); + break; + + case PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION: + barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_OFF); + WaitForProcSignalBarrier(barrier); + break; + + case PG_DATA_CHECKSUM_VERSION: + barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_ON); + WaitForProcSignalBarrier(barrier); + break; + + case PG_DATA_CHECKSUM_OFF: + barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_OFF); + WaitForProcSignalBarrier(barrier); + break; + } + } } else if (info == XLOG_LOGICAL_DECODING_STATUS_CHANGE) { @@ -9382,34 +9413,11 @@ xlog_redo(XLogReaderState *record) WaitForProcSignalBarrier(barrier); break; - default: - Assert(state.new_checksumtype == 0); + case PG_DATA_CHECKSUM_OFF: barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_OFF); WaitForProcSignalBarrier(barrier); break; } - - /* - * Force creating a restartpoint for the first CHECKPOINT after seeing - * XLOG_CHECKSUMS in WAL - */ - checksumRestartPoint = true; - } - - if (checksumRestartPoint && - (info == XLOG_CHECKPOINT_ONLINE || - info == XLOG_CHECKPOINT_REDO || - info == XLOG_CHECKPOINT_SHUTDOWN)) - { - int flags; - - elog(LOG, "forcing creation of a restartpoint after XLOG_CHECKSUMS"); - - /* We explicitly want an immediate checkpoint here */ - flags = CHECKPOINT_FORCE | CHECKPOINT_WAIT | CHECKPOINT_FAST; - RequestCheckpoint(flags); - - checksumRestartPoint = false; } } diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c index b991d1997d6..742ddcf124a 100644 --- a/src/backend/access/transam/xlogfuncs.c +++ b/src/backend/access/transam/xlogfuncs.c @@ -757,17 +757,12 @@ pg_promote(PG_FUNCTION_ARGS) Datum disable_data_checksums(PG_FUNCTION_ARGS) { - bool fast = PG_GETARG_BOOL(0); - - ereport(LOG, - errmsg("disable_data_checksums, fast: %d", fast)); - if (!superuser()) ereport(ERROR, errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to change data checksum state")); - StartDataChecksumsWorkerLauncher(DISABLE_DATACHECKSUMS, 0, 0, fast); + StartDataChecksumsWorkerLauncher(DISABLE_DATACHECKSUMS, 0, 0); PG_RETURN_VOID(); } @@ -781,10 +776,6 @@ enable_data_checksums(PG_FUNCTION_ARGS) { int cost_delay = PG_GETARG_INT32(0); int cost_limit = PG_GETARG_INT32(1); - bool fast = PG_GETARG_BOOL(2); - - ereport(LOG, - errmsg("enable_data_checksums, cost_delay: %d cost_limit: %d fast: %d", cost_delay, cost_limit, fast)); if (!superuser()) ereport(ERROR, @@ -801,7 +792,7 @@ enable_data_checksums(PG_FUNCTION_ARGS) errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("cost limit must be greater than zero")); - StartDataChecksumsWorkerLauncher(ENABLE_DATACHECKSUMS, cost_delay, cost_limit, fast); + StartDataChecksumsWorkerLauncher(ENABLE_DATACHECKSUMS, cost_delay, cost_limit); PG_RETURN_VOID(); } diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c index 7a668355f4d..2eb0a3af453 100644 --- a/src/backend/backup/basebackup.c +++ b/src/backend/backup/basebackup.c @@ -1613,9 +1613,7 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename, * enabled for this cluster, and if this is a relation file, then verify * the checksum. */ - if (!noverify_checksums && - DataChecksumsNeedWrite() && - RelFileNumberIsValid(relfilenumber)) + if (!noverify_checksums && RelFileNumberIsValid(relfilenumber)) verify_checksum = true; /* @@ -1748,7 +1746,7 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename, * If the amount of data we were able to read was not a multiple of * BLCKSZ, we cannot verify checksums, which are block-level. */ - if (verify_checksum && (cnt % BLCKSZ != 0)) + if (verify_checksum && DataChecksumsNeedVerify() && (cnt % BLCKSZ != 0)) { ereport(WARNING, (errmsg("could not verify checksum in file \"%s\", block " @@ -1843,9 +1841,10 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename, * 'blkno' is the block number of the first page in the bbsink's buffer * relative to the start of the relation. * - * 'verify_checksum' indicates whether we should try to verify checksums - * for the blocks we read. If we do this, we'll update *checksum_failures - * and issue warnings as appropriate. + * 'verify_checksum' determins if the user has asked to verify checksums, but + * since data checksums can be disabled, or become disabled, we need to check + * state before verifying individual pages. If we do this, we'll update + * *checksum_failures and issue warnings as appropriate. */ static off_t read_file_data_into_buffer(bbsink *sink, const char *readfilename, int fd, @@ -1871,6 +1870,13 @@ read_file_data_into_buffer(bbsink *sink, const char *readfilename, int fd, int reread_cnt; uint16 expected_checksum; + /* + * The data checksum state can change at any point, so we need to + * re-check before each page. + */ + if (!DataChecksumsNeedVerify()) + return cnt; + page = sink->bbs_buffer + BLCKSZ * i; /* If the page is OK, go on to the next one. */ @@ -1893,7 +1899,12 @@ read_file_data_into_buffer(bbsink *sink, const char *readfilename, int fd, * allows us to wait until we can be certain that no write to the * block is in progress. Since we don't have any such thing right now, * we just do this and hope for the best. + * + * The data checksum state may also have changed concurrently so check + * again. */ + if (!DataChecksumsNeedVerify()) + return cnt; reread_cnt = basebackup_read_file(fd, sink->bbs_buffer + BLCKSZ * i, BLCKSZ, offset + BLCKSZ * i, diff --git a/src/backend/catalog/system_functions.sql b/src/backend/catalog/system_functions.sql index 2ab882c363d..b4c58652af1 100644 --- a/src/backend/catalog/system_functions.sql +++ b/src/backend/catalog/system_functions.sql @@ -659,20 +659,12 @@ AS 'pg_replication_origin_session_setup'; CREATE OR REPLACE FUNCTION pg_enable_data_checksums(cost_delay integer DEFAULT 0, - cost_limit integer DEFAULT 100, - fast boolean DEFAULT false) + cost_limit integer DEFAULT 100) RETURNS void STRICT VOLATILE LANGUAGE internal PARALLEL RESTRICTED AS 'enable_data_checksums'; -CREATE OR REPLACE FUNCTION - pg_disable_data_checksums(fast boolean DEFAULT false) -RETURNS void -STRICT VOLATILE LANGUAGE internal -PARALLEL RESTRICTED -AS 'disable_data_checksums'; - -- -- The default permissions for functions mean that anyone can execute them. -- A number of functions shouldn't be executable by just anyone, but rather @@ -798,9 +790,9 @@ REVOKE EXECUTE ON FUNCTION pg_ls_logicalmapdir() FROM PUBLIC; REVOKE EXECUTE ON FUNCTION pg_ls_replslotdir(text) FROM PUBLIC; -REVOKE EXECUTE ON FUNCTION pg_enable_data_checksums(integer, integer, boolean) FROM public; +REVOKE EXECUTE ON FUNCTION pg_enable_data_checksums(integer, integer) FROM public; -REVOKE EXECUTE ON FUNCTION pg_disable_data_checksums(boolean) FROM public; +REVOKE EXECUTE ON FUNCTION pg_disable_data_checksums() FROM public; -- -- We also set up some things as accessible to standard roles. diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 919c97e1040..47c661fe678 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1391,9 +1391,8 @@ CREATE VIEW pg_stat_progress_data_checksums AS CASE S.param1 WHEN 0 THEN 'enabling' WHEN 1 THEN 'disabling' WHEN 2 THEN 'waiting on temporary tables' - WHEN 3 THEN 'waiting on checkpoint' - WHEN 4 THEN 'waiting on barrier' - WHEN 5 THEN 'done' + WHEN 3 THEN 'waiting on barrier' + WHEN 4 THEN 'done' END AS phase, CASE S.param2 WHEN -1 THEN NULL ELSE S.param2 END AS databases_total, S.param3 AS databases_done, diff --git a/src/backend/postmaster/datachecksumsworker.c b/src/backend/postmaster/datachecksumsworker.c index 57311760b2b..24e6af63f6a 100644 --- a/src/backend/postmaster/datachecksumsworker.c +++ b/src/backend/postmaster/datachecksumsworker.c @@ -14,35 +14,43 @@ * Checksums can be either enabled or disabled cluster-wide, with on/off being * the end state for data_checksums. * - * Enabling checksums - * ------------------ + * 1. Enabling checksums + * --------------------- * When enabling checksums in an online cluster, data_checksums will be set to * "inprogress-on" which signals that write operations MUST compute and write * the checksum on the data page, but during reading the checksum SHALL NOT be - * verified. This ensures that all objects created during checksumming will - * have checksums set, but no reads will fail due to incorrect checksum. The - * DataChecksumsWorker will compile a list of databases which exist at the - * start of checksumming, and all of these which haven't been dropped during - * the processing MUST have been processed successfully in order for checksums - * to be enabled. Any new relation created during processing will see the - * in-progress state and will automatically be checksummed. + * verified. This ensures that all objects created during when checksums are + * being enabled will have checksums set, but reads wont fail due to missing or + * invalid checksums. Invalid checksums can be present in case the cluster had + * checksums enabled, then disabled them and updated the page while they were + * disabled. + * + * The DataChecksumsWorker will compile a list of databases which exist at the + * start of checksumming, and once all are processed will regenerate the list + * and start over processing any new entries. Once there are no new entries on + * the list, processing will end. All databases MUST BE successfully processed + * in order for data checksums to be enabled, the only exception are databases + * which are dropped before having been processed. + + * Any new relation in a processed database, created during processing, will + * see the in-progress state and will automatically be checksummed. * * For each database, all relations which have storage are read and every data * page is marked dirty to force a write with the checksum. This will generate * a lot of WAL as the entire database is read and written. * - * If the processing is interrupted by a cluster restart, it will be restarted - * from the beginning again as state isn't persisted. + * If the processing is interrupted by a cluster crash or restart, it needs to + * be restarted from the beginning again as state isn't persisted. * - * Disabling checksums - * ------------------- + * 2. Disabling checksums + * ---------------------- * When disabling checksums, data_checksums will be set to "inprogress-off" * which signals that checksums are written but no longer verified. This ensure * that backends which have yet to move from the "on" state will still be able * to process data checksum validation. * - * Synchronization and Correctness - * ------------------------------- + * 3. Synchronization and Correctness + * ---------------------------------- * The processes involved in enabling, or disabling, data checksums in an * online cluster must be properly synchronized with the normal backends * serving concurrent queries to ensure correctness. Correctness is defined @@ -57,7 +65,7 @@ * backends must wait on the procsignalbarrier to be acknowledged by all * before proceeding to validate data checksums. * - * There are two levels of synchronization required for changing data_checksums + * There are two steps of synchronization required for changing data_checksums * in an online cluster: (i) changing state in the active backends ("on", * "off", "inprogress-on" and "inprogress-off"), and (ii) ensuring no * incompatible objects and processes are left in a database when workers end. @@ -65,81 +73,89 @@ * latter with ensuring that any concurrent activity cannot break the data * checksum contract during processing. * - * Synchronizing the state change is done with procsignal barriers, where the - * WAL logging backend updating the global state in the controlfile will wait - * for all other backends to absorb the barrier. Barrier absorption will happen - * during interrupt processing, which means that connected backends will change - * state at different times. To prevent data checksum state changes when - * writing and verifying checksums, interrupts shall be held off before - * interrogating state and resumed when the IO operation has been performed. + * Synchronizing the state change is done with procsignal barriers, before + * updating the controlfile with the state all other backends must absorb the + * barrier. Barrier absorption will happen during interrupt processing, which + * means that connected backends will change state at different times. If + * waiting for a barrier is done during startup, for example during replay, it + * is important to realize that any locks held by the startup process might + * cause deadblocks if backends end up waiting for those locks while startup + * is waiting for a procsignalbarrier. + * + * 3.1 When Enabling Data Checksums + * -------------------------------- + * A process which fails to observe data checksums being enabled can induce two + * types of errors: failing to write the checksum when modifying the page and + * failing to validate the data checksum on the page when reading it. * - * When Enabling Data Checksums - * ---------------------------- - * A process which fails to observe data checksums being enabled can induce - * two types of errors: failing to write the checksum when modifying the page - * and failing to validate the data checksum on the page when reading it. + * When processing starts all backends belong to one of the below sets, with + * one set being empty: * - * When processing starts all backends belong to one of the below sets, with - * one set being empty: + * Bd: Backends in "off" state + * Bi: Backends in "inprogress-on" state * - * Bd: Backends in "off" state - * Bi: Backends in "inprogress-on" state + * If processing is started in an online cluster then all backends are in Bd. + * If processing was halted by the cluster shutting down (due to a crash or + * intentional restart), the controlfile state "inprogress-on" will be observed + * on system startup and all backends will be placed in Bd. The controlfile + * state will also be set of "off". * - * If processing is started in an online cluster then all backends are in Bd. - * If processing was halted by the cluster shutting down, the controlfile - * state "inprogress-on" will be observed on system startup and all backends - * will be placed in Bd. Backends transition Bd -> Bi via a procsignalbarrier - * which is emitted by the DataChecksumsLauncher. When all backends have - * acknowledged the barrier then Bd will be empty and the next phase can - * begin: calculating and writing data checksums with DataChecksumsWorkers. - * When the DataChecksumsWorker processes have finished writing checksums on - * all pages and enables data checksums cluster-wide via another - * procsignalbarrier, there are four sets of backends where Bd shall be an - * empty set: + * Backends transition Bd -> Bi via a procsignalbarrier which is emitted by the + * DataChecksumsLauncher. When all backends have acknowledged the barrier then + * Bd will be empty and the next phase can begin: calculating and writing data + * checksums with DataChecksumsWorkers. When the DataChecksumsWorker processes + * have finished writing checksums on all pages, data checksums are enabled + * cluster-wide via another procsignalbarrier. There are four sets of backends + * where Bd shall be an empty set: * - * Bg: Backend updating the global state and emitting the procsignalbarrier - * Bd: Backends in "off" state - * Be: Backends in "on" state - * Bi: Backends in "inprogress-on" state + * Bg: Backend updating the global state and emitting the procsignalbarrier + * Bd: Backends in "off" state + * Be: Backends in "on" state + * Bi: Backends in "inprogress-on" state * - * Backends in Bi and Be will write checksums when modifying a page, but only - * backends in Be will verify the checksum during reading. The Bg backend is - * blocked waiting for all backends in Bi to process interrupts and move to - * Be. Any backend starting while Bg is waiting on the procsignalbarrier will - * observe the global state being "on" and will thus automatically belong to - * Be. Checksums are enabled cluster-wide when Bi is an empty set. Bi and Be - * are compatible sets while still operating based on their local state as - * both write data checksums. + * Backends in Bi and Be will write checksums when modifying a page, but only + * backends in Be will verify the checksum during reading. The Bg backend is + * blocked waiting for all backends in Bi to process interrupts and move to + * Be. Any backend starting while Bg is waiting on the procsignalbarrier will + * observe the global state being "on" and will thus automatically belong to + * Be. Checksums are enabled cluster-wide when Bi is an empty set. Bi and Be + * are compatible sets while still operating based on their local state as + * both write data checksums. * - * When Disabling Data Checksums - * ----------------------------- - * A process which fails to observe that data checksums have been disabled - * can induce two types of errors: writing the checksum when modifying the - * page and validating a data checksum which is no longer correct due to - * modifications to the page. + * 3.2 When Disabling Data Checksums + * --------------------------------- + * A process which fails to observe that data checksums have been disabled + * can induce two types of errors: writing the checksum when modifying the + * page and validating a data checksum which is no longer correct due to + * modifications to the page. The former is not an error per se as data + * integrity is maintained, but it is wasteful. The latter will cause errors + * in user operations. Assuming the following sets of backends: * - * Bg: Backend updating the global state and emitting the procsignalbarrier - * Bd: Backends in "off" state - * Be: Backends in "on" state - * Bo: Backends in "inprogress-off" state + * Bg: Backend updating the global state and emitting the procsignalbarrier + * Bd: Backends in "off" state + * Be: Backends in "on" state + * Bo: Backends in "inprogress-off" state + * Bi: Backends in "inprogress-on" state * - * Backends transition from the Be state to Bd like so: Be -> Bo -> Bd + * Backends transition from the Be state to Bd like so: Be -> Bo -> Bd. From + * all other states, the transition can be straight to Bd. * - * The goal is to transition all backends to Bd making the others empty sets. - * Backends in Bo write data checksums, but don't validate them, such that - * backends still in Be can continue to validate pages until the barrier has - * been absorbed such that they are in Bo. Once all backends are in Bo, the - * barrier to transition to "off" can be raised and all backends can safely - * stop writing data checksums as no backend is enforcing data checksum - * validation any longer. + * The goal is to transition all backends to Bd making the others empty sets. + * Backends in Bo write data checksums, but don't validate them, such that + * backends still in Be can continue to validate pages until the barrier has + * been absorbed such that they are in Bo. Once all backends are in Bo, the + * barrier to transition to "off" can be raised and all backends can safely + * stop writing data checksums as no backend is enforcing data checksum + * validation any longer. * - * Potential optimizations - * ----------------------- + * 4. Future opportunities for optimizations + * ----------------------------------------- * Below are some potential optimizations and improvements which were brought * up during reviews of this feature, but which weren't implemented in the * initial version. These are ideas listed without any validation on their - * feasibility or potential payoff. More discussion on these can be found on - * the -hackers threads linked to in the commit message of this feature. + * feasibility or potential payoff. More discussion on (most of) these can be + * found on the -hackers threads linked to in the commit message of this + * feature. * * * Launching datachecksumsworker for resuming operation from the startup * process: Currently users have to restart processing manually after a @@ -149,17 +165,16 @@ * * Avoid dirtying the page when checksums already match: Iff the checksum * on the page happens to already match we still dirty the page. It should * be enough to only do the log_newpage_buffer() call in that case. - * * Invent a lightweight WAL record that doesn't contain the full-page - * image but just the block number: On replay, the redo routine would read - * the page from disk. * * Teach pg_checksums to avoid checksummed pages when pg_checksums is used * to enable checksums on a cluster which is in inprogress-on state and * may have checksummed pages (make pg_checksums be able to resume an - * online operation). + * online operation). This should only be attempted for wal_level minimal. * * Restartability (not necessarily with page granularity). + * * Avoid processing databases which were created during inprogress-on. + * Right now all databases are processed regardless to be safe. * * - * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -221,16 +236,19 @@ typedef struct DataChecksumsWorkerShmemStruct DataChecksumsWorkerOperation launch_operation; int launch_cost_delay; int launch_cost_limit; - bool launch_fast; /* - * Is a launcher process is currently running? - * - * This is set by the launcher process, after it has read the above - * launch_* parameters. + * Is a launcher process is currently running? This is set by the main + * launcher process, after it has read the above launch_* parameters. */ bool launcher_running; + /* + * If a worker process currently running? This is set by the worker + * launcher when it starts waiting for a worker process to finish. + */ + bool worker_running; + /* * These fields indicate the target state that the launcher is currently * working towards. They can be different from the corresponding launch_* @@ -245,7 +263,6 @@ typedef struct DataChecksumsWorkerShmemStruct DataChecksumsWorkerOperation operation; int cost_delay; int cost_limit; - bool immediate_checkpoint; /* * Signaling between the launcher and the worker process. @@ -304,7 +321,7 @@ static List *BuildDatabaseList(void); static List *BuildRelationList(bool temp_relations, bool include_shared); static void FreeDatabaseList(List *dblist); static DataChecksumsWorkerResult ProcessDatabase(DataChecksumsWorkerDatabase *db); -static bool ProcessAllDatabases(bool immediate_checkpoint); +static bool ProcessAllDatabases(void); static bool ProcessSingleRelationFork(Relation reln, ForkNumber forkNum, BufferAccessStrategy strategy); static void launcher_cancel_handler(SIGNAL_ARGS); static void WaitForAllTransactionsToFinish(void); @@ -319,8 +336,7 @@ static void WaitForAllTransactionsToFinish(void); void StartDataChecksumsWorkerLauncher(DataChecksumsWorkerOperation op, int cost_delay, - int cost_limit, - bool fast) + int cost_limit) { BackgroundWorker bgw; BackgroundWorkerHandle *bgw_handle; @@ -338,7 +354,8 @@ StartDataChecksumsWorkerLauncher(DataChecksumsWorkerOperation op, DataChecksumsWorkerShmem->launch_operation = op; DataChecksumsWorkerShmem->launch_cost_delay = cost_delay; DataChecksumsWorkerShmem->launch_cost_limit = cost_limit; - DataChecksumsWorkerShmem->launch_fast = fast; + + INJECTION_POINT("datachecksumsworker-startup-delay", NULL); /* is the launcher already running? */ launcher_running = DataChecksumsWorkerShmem->launcher_running; @@ -384,6 +401,9 @@ StartDataChecksumsWorkerLauncher(DataChecksumsWorkerOperation op, errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("failed to start background worker to process data checksums")); } + else + ereport(ERROR, + errmsg("data checksum processing already running")); } /* @@ -409,15 +429,7 @@ ProcessSingleRelationFork(Relation reln, ForkNumber forkNum, BufferAccessStrateg snprintf(activity, sizeof(activity) - 1, "processing: %s.%s (%s, %dblocks)", relns, RelationGetRelationName(reln), forkNames[forkNum], numblocks); pgstat_report_activity(STATE_RUNNING, activity); - - /* - * As of now we only update the block counter for main forks in order to - * not cause too frequent calls. TODO: investigate whether we should do it - * more frequent? - */ - if (forkNum == MAIN_FORKNUM) - pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_BLOCKS_TOTAL, - numblocks); + pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_BLOCKS_TOTAL, numblocks); /* * We are looping over the blocks which existed at the time of process @@ -456,8 +468,10 @@ ProcessSingleRelationFork(Relation reln, ForkNumber forkNum, BufferAccessStrateg * a lock, because if we miss it being set, we will try again soon. */ Assert(operation == ENABLE_DATACHECKSUMS); + LWLockAcquire(DataChecksumsWorkerLock, LW_SHARED); if (DataChecksumsWorkerShmem->launch_operation == DISABLE_DATACHECKSUMS) abort_requested = true; + LWLockRelease(DataChecksumsWorkerLock); if (abort_requested) return false; @@ -471,6 +485,10 @@ ProcessSingleRelationFork(Relation reln, ForkNumber forkNum, BufferAccessStrateg pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_BLOCKS_DONE, (blknum + 1)); + /* + * Processing is re-using the vacuum cost delay for process + * throttling, hence why we call vacuum APIs here. + */ vacuum_delay_point(false); } @@ -607,6 +625,8 @@ ProcessDatabase(DataChecksumsWorkerDatabase *db) errmsg("initiating data checksum processing in database \"%s\"", db->dbname)); + DataChecksumsWorkerShmem->worker_running = true; + snprintf(activity, sizeof(activity) - 1, "Waiting for worker in database %s (pid %ld)", db->dbname, (long) pid); pgstat_report_activity(STATE_RUNNING, activity); @@ -625,6 +645,7 @@ ProcessDatabase(DataChecksumsWorkerDatabase *db) db->dbname)); pgstat_report_activity(STATE_IDLE, NULL); + DataChecksumsWorkerShmem->worker_running = false; return DataChecksumsWorkerShmem->success; } @@ -633,8 +654,9 @@ ProcessDatabase(DataChecksumsWorkerDatabase *db) * launcher_exit * * Internal routine for cleaning up state when the launcher process exits. We - * need to clean up the abort flag to ensure that processing can be restarted - * again after it was previously aborted. + * need to clean up the abort flag to ensure that processing started again if + * it was previously aborted (note: started again, *not* restarted from where + * it left off). */ static void launcher_exit(int code, Datum arg) @@ -644,6 +666,14 @@ launcher_exit(int code, Datum arg) LWLockAcquire(DataChecksumsWorkerLock, LW_EXCLUSIVE); launcher_running = false; DataChecksumsWorkerShmem->launcher_running = false; + + /* + * TODO: how to really handle the worker still running when the + * launcher exits? + */ + if (DataChecksumsWorkerShmem->worker_running) + ereport(LOG, + errmsg("data checksums launcher exiting while worker is still running")); LWLockRelease(DataChecksumsWorkerLock); } } @@ -720,8 +750,8 @@ WaitForAllTransactionsToFinish(void) if (rc & WL_POSTMASTER_DEATH) ereport(FATAL, errcode(ERRCODE_ADMIN_SHUTDOWN), - errmsg("postmaster exited during data checksum processing"), - errhint("Restart the database and restart data checksum processing by calling pg_enable_data_checksums().")); + errmsg("postmaster exited during data checksums processing"), + errhint("Data checksums processing must be restarted manually after cluster restart.")); LWLockAcquire(DataChecksumsWorkerLock, LW_SHARED); if (DataChecksumsWorkerShmem->launch_operation != operation) @@ -749,7 +779,7 @@ DataChecksumsWorkerLauncherMain(Datum arg) on_shmem_exit(launcher_exit, 0); ereport(DEBUG1, - errmsg("background worker \"datachecksum launcher\" started")); + errmsg("background worker \"datachecksums launcher\" started")); pqsignal(SIGTERM, die); pqsignal(SIGINT, launcher_cancel_handler); @@ -759,10 +789,14 @@ DataChecksumsWorkerLauncherMain(Datum arg) MyBackendType = B_DATACHECKSUMSWORKER_LAUNCHER; init_ps_display(NULL); + INJECTION_POINT("datachecksumsworker-launcher-delay", NULL); + LWLockAcquire(DataChecksumsWorkerLock, LW_EXCLUSIVE); if (DataChecksumsWorkerShmem->launcher_running) { + ereport(LOG, + errmsg("background worker \"datachecksums launcher\" already running, exiting")); /* Launcher was already running, let it finish */ LWLockRelease(DataChecksumsWorkerLock); return; @@ -780,7 +814,6 @@ DataChecksumsWorkerLauncherMain(Datum arg) DataChecksumsWorkerShmem->operation = operation; DataChecksumsWorkerShmem->cost_delay = DataChecksumsWorkerShmem->launch_cost_delay; DataChecksumsWorkerShmem->cost_limit = DataChecksumsWorkerShmem->launch_cost_limit; - DataChecksumsWorkerShmem->immediate_checkpoint = DataChecksumsWorkerShmem->launch_fast; LWLockRelease(DataChecksumsWorkerLock); /* @@ -801,26 +834,21 @@ again: * checksums enabled, exit immediately as there is nothing more to do. * Hold interrupts to make sure state doesn't change during checking. */ - HOLD_INTERRUPTS(); if (DataChecksumsNeedVerify()) - { - RESUME_INTERRUPTS(); goto done; - } - RESUME_INTERRUPTS(); /* * Set the state to inprogress-on and wait on the procsignal barrier. */ pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_PHASE, PROGRESS_DATACHECKSUMS_PHASE_ENABLING); - SetDataChecksumsOnInProgress(DataChecksumsWorkerShmem->immediate_checkpoint); + SetDataChecksumsOnInProgress(); /* * All backends are now in inprogress-on state and are writing data * checksums. Start processing all data at rest. */ - if (!ProcessAllDatabases(DataChecksumsWorkerShmem->immediate_checkpoint)) + if (!ProcessAllDatabases()) { /* * If the target state changed during processing then it's not a @@ -842,20 +870,13 @@ again: * Data checksums have been set on all pages, set the state to on in * order to instruct backends to validate checksums on reading. */ - SetDataChecksumsOn(DataChecksumsWorkerShmem->immediate_checkpoint); + SetDataChecksumsOn(); } else if (operation == DISABLE_DATACHECKSUMS) { - int flags; - pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_PHASE, PROGRESS_DATACHECKSUMS_PHASE_DISABLING); - SetDataChecksumsOff(DataChecksumsWorkerShmem->immediate_checkpoint); - - flags = CHECKPOINT_FORCE | CHECKPOINT_WAIT; - if (DataChecksumsWorkerShmem->immediate_checkpoint) - flags = flags | CHECKPOINT_FAST; - RequestCheckpoint(flags); + SetDataChecksumsOff(); } else { @@ -903,18 +924,14 @@ done: * checksums. Until no new databases are found, this will loop around computing * a new list and comparing it to the already seen ones. * - * If immediate_checkpoint is set to true then a CHECKPOINT_FAST will be - * issued. This is useful for testing but should be avoided in production use - * as it may affect cluster performance drastically. */ static bool -ProcessAllDatabases(bool immediate_checkpoint) +ProcessAllDatabases(void) { List *DatabaseList; HTAB *ProcessedDatabases = NULL; HASHCTL hash_ctl; bool found_failed = false; - int flags; /* Initialize a hash tracking all processed databases */ memset(&hash_ctl, 0, sizeof(hash_ctl)); @@ -936,8 +953,12 @@ ProcessAllDatabases(bool immediate_checkpoint) * were created during our runtime. Since a database can be created as a * copy of any other database (which may not have existed in our last * run), we have to repeat this loop until no new databases show up in the - * list. + * list. Here the initial list for the loop processing is generated after + * waiting for all existing transactions to finish to ensure that we can + * see any database which was created even if the transaction in which it + * was created started before checksums were being processed. */ + WaitForAllTransactionsToFinish(); DatabaseList = BuildDatabaseList(); /* Allow a test case to modify the initial list of databases */ @@ -1060,9 +1081,14 @@ ProcessAllDatabases(bool immediate_checkpoint) break; /* - * Re-generate the list of databases for another pass. Since we wait - * for all pre-existing transactions finish, this way we can be - * certain that there are no databases left without checksums. + * Re-generate the list of databases for another pass in case any new + * databases were created while we were running. Since the initial + * list was generated efter waiting for all transaction to finish we + * know that all new databases found here must have been created while + * seeing the new checksum state. By waiting for all transactions + * here as well we know that any database created using an existing db + * as a template (which may have been used before it had checksums + * enabled) will be committed. */ WaitForAllTransactionsToFinish(); DatabaseList = BuildDatabaseList(); @@ -1078,7 +1104,6 @@ ProcessAllDatabases(bool immediate_checkpoint) * still exists, but enabling checksums failed then we fail the entire * checksumming process and exit with an error. */ - WaitForAllTransactionsToFinish(); DatabaseList = BuildDatabaseList(); foreach_ptr(DataChecksumsWorkerDatabase, db, DatabaseList) @@ -1109,35 +1134,13 @@ ProcessAllDatabases(bool immediate_checkpoint) if (found_failed) { /* Disable checksums on cluster, because we failed */ - SetDataChecksumsOff(DataChecksumsWorkerShmem->immediate_checkpoint); - /* Force a checkpoint to make everything consistent */ - flags = CHECKPOINT_FORCE | CHECKPOINT_WAIT; - if (immediate_checkpoint) - flags = flags | CHECKPOINT_FAST; - RequestCheckpoint(flags); + SetDataChecksumsOff(); ereport(ERROR, errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("data checksums failed to get enabled in all databases, aborting"), errhint("The server log might have more information on the cause of the error.")); } - /* - * When enabling checksums, we have to wait for a checkpoint for the - * checksums to change from in-progress to on. - */ - pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_PHASE, - PROGRESS_DATACHECKSUMS_PHASE_WAITING_CHECKPOINT); - - /* - * Force a checkpoint to get everything out to disk. The use of immediate - * checkpoints is for running tests, as they would otherwise not execute - * in such a way that they can reliably be placed under timeout control. - */ - flags = CHECKPOINT_FORCE | CHECKPOINT_WAIT; - if (immediate_checkpoint) - flags = flags | CHECKPOINT_FAST; - RequestCheckpoint(flags); - pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_PHASE, PROGRESS_DATACHECKSUMS_PHASE_WAITING_BARRIER); return true; @@ -1178,12 +1181,11 @@ DataChecksumsWorkerShmemInit(void) /* * Even if this is a redundant assignment, we want to be explicit - * about our intent for readability, since we want to be able to query - * this state in case of restartability. + * about our intent for readability. */ DataChecksumsWorkerShmem->launch_operation = false; DataChecksumsWorkerShmem->launcher_running = false; - DataChecksumsWorkerShmem->launch_fast = false; + DataChecksumsWorkerShmem->worker_running = false; } } @@ -1364,7 +1366,11 @@ DataChecksumsWorkerMain(Datum arg) InitialTempTableList = BuildRelationList(true, false); /* - * Enable vacuum cost delay, if any. + * Enable vacuum cost delay, if any. While this process isn't doing any + * vacuuming, we are re-using the infrastructure that vacuum cost delay + * provides rather than inventing something bespoke. This is an internal + * implementation detail and care should be taken to avoid it bleeding + * through to the user to avoid confusion. */ Assert(DataChecksumsWorkerShmem->operation == ENABLE_DATACHECKSUMS); VacuumCostDelay = DataChecksumsWorkerShmem->cost_delay; diff --git a/src/backend/postmaster/walsummarizer.c b/src/backend/postmaster/walsummarizer.c index c3d56c866d3..afef118d1a4 100644 --- a/src/backend/postmaster/walsummarizer.c +++ b/src/backend/postmaster/walsummarizer.c @@ -1430,8 +1430,11 @@ SummarizeXlogRecord(XLogReaderState *xlogreader, bool *new_fast_forward) if (info == XLOG_CHECKPOINT_REDO) { + xl_checkpoint_redo xlrec; + /* Payload is wal_level at the time record was written. */ - memcpy(&record_wal_level, XLogRecGetData(xlogreader), sizeof(int)); + memcpy(&xlrec, XLogRecGetData(xlogreader), sizeof(xl_checkpoint_redo)); + record_wal_level = xlrec.wal_level; } else if (info == XLOG_CHECKPOINT_SHUTDOWN) { diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index 8b67ba40f0f..72e1c042cf1 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -107,6 +107,7 @@ PageIsVerified(PageData *page, BlockNumber blkno, int flags, bool *checksum_fail */ if (!PageIsNew(page)) { + HOLD_INTERRUPTS(); if (DataChecksumsNeedVerify()) { checksum = pg_checksum_page(page, blkno); @@ -118,6 +119,7 @@ PageIsVerified(PageData *page, BlockNumber blkno, int flags, bool *checksum_fail *checksum_failure_p = true; } } + RESUME_INTERRUPTS(); /* * The following checks don't prove the header is correct, only that @@ -1510,9 +1512,13 @@ PageSetChecksumCopy(Page page, BlockNumber blkno) { static char *pageCopy = NULL; + HOLD_INTERRUPTS(); /* If we don't need a checksum, just return the passed-in data */ if (PageIsNew(page) || !DataChecksumsNeedWrite()) + { + RESUME_INTERRUPTS(); return page; + } /* * We allocate the copy space once and use it over on each subsequent @@ -1528,6 +1534,7 @@ PageSetChecksumCopy(Page page, BlockNumber blkno) memcpy(pageCopy, page, BLCKSZ); ((PageHeader) pageCopy)->pd_checksum = pg_checksum_page(pageCopy, blkno); + RESUME_INTERRUPTS(); return pageCopy; } @@ -1540,9 +1547,14 @@ PageSetChecksumCopy(Page page, BlockNumber blkno) void PageSetChecksumInplace(Page page, BlockNumber blkno) { + HOLD_INTERRUPTS(); /* If we don't need a checksum, just return */ if (PageIsNew(page) || !DataChecksumsNeedWrite()) + { + RESUME_INTERRUPTS(); return; + } ((PageHeader) page)->pd_checksum = pg_checksum_page(page, blkno); + RESUME_INTERRUPTS(); } diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index d3dc55e3820..34113a7e8bb 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -755,7 +755,7 @@ InitPostgres(const char *in_dbname, Oid dboid, * * This intentionally happens after initializing the procsignal, otherwise * we might miss a state change. This means we can get a barrier for the - * state we've just initialized - but it can happen only once. + * state we've just initialized. * * The postmaster (which is what gets forked into the new child process) * does not handle barriers, therefore it may not have the current value diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index ca4957b2fa1..47afa82d9c5 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -288,7 +288,7 @@ main(int argc, char *argv[]) printf(_("Latest checkpoint's newestCommitTsXid:%u\n"), ControlFile->checkPointCopy.newestCommitTsXid); printf(_("Latest checkpoint's data_checksum_version:%u\n"), - ControlFile->checkPointCopy.data_checksum_version); + ControlFile->checkPointCopy.dataChecksumVersion); printf(_("Time of latest checkpoint: %s\n"), ckpttime_str); printf(_("Fake LSN counter for unlogged rels: %X/%08X\n"), diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 49518ea6b2a..352ce0477d4 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -247,9 +247,9 @@ extern bool DataChecksumsNeedWrite(void); extern bool DataChecksumsNeedVerify(void); extern bool DataChecksumsOnInProgress(void); extern bool DataChecksumsOffInProgress(void); -extern void SetDataChecksumsOnInProgress(bool immediate_checkpoint); -extern void SetDataChecksumsOn(bool immediate_checkpoint); -extern void SetDataChecksumsOff(bool immediate_checkpoint); +extern void SetDataChecksumsOnInProgress(void); +extern void SetDataChecksumsOn(void); +extern void SetDataChecksumsOff(void); extern bool AbsorbDataChecksumsBarrier(int target_state); extern const char *show_data_checksums(void); extern void InitLocalDataChecksumVersion(void); diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 98a8a4ac384..53d1e2bebfd 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -293,7 +293,7 @@ typedef struct xl_restore_point /* Information logged when data checksum level is changed */ typedef struct xl_checksum_state { - uint32 new_checksumtype; + ChecksumType new_checksumtype; } xl_checksum_state; /* Overwrite of prior contrecord */ @@ -312,6 +312,12 @@ typedef struct xl_end_of_recovery int wal_level; } xl_end_of_recovery; +typedef struct xl_checkpoint_redo +{ + int wal_level; + uint32 data_checksum_version; +} xl_checkpoint_redo; + /* * The functions in xloginsert.c construct a chain of XLogRecData structs * to represent the final WAL record. diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index 4201003717b..7290ece5205 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -64,8 +64,8 @@ typedef struct CheckPoint */ TransactionId oldestActiveXid; - /* data checksums at the time of the checkpoint */ - uint32 data_checksum_version; + /* data checksums state at the time of the checkpoint */ + uint32 dataChecksumVersion; } CheckPoint; /* XLOG info values for XLOG rmgr */ @@ -85,7 +85,7 @@ typedef struct CheckPoint #define XLOG_OVERWRITE_CONTRECORD 0xD0 #define XLOG_CHECKPOINT_REDO 0xE0 #define XLOG_LOGICAL_DECODING_STATUS_CHANGE 0xF0 -#define XLOG_CHECKSUMS 0xF1 +#define XLOG_CHECKSUMS 0xC0 /* diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 92d56e60719..d1e8d3cd1e3 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -12408,20 +12408,13 @@ # data checksum management functions { oid => '9258', descr => 'disable data checksums', - proname => 'pg_disable_data_checksums', provolatile => 'v', prorettype => 'void', - proparallel => 'r', - proargtypes => 'bool', proallargtypes => '{bool}', - proargmodes => '{i}', - proargnames => '{fast}', - prosrc => 'disable_data_checksums' }, - + proname => 'pg_disable_data_checksums', provolatile => 's', prorettype => 'void', + proparallel => 'r', prosrc => 'disable_data_checksums', proargtypes => '' }, { oid => '9257', descr => 'enable data checksums', proname => 'pg_enable_data_checksums', provolatile => 'v', prorettype => 'void', - proparallel => 'r', - proargtypes => 'int4 int4 bool', proallargtypes => '{int4,int4,bool}', - proargmodes => '{i,i,i}', - proargnames => '{cost_delay,cost_limit,fast}', + proparallel => 'r', proargtypes => 'int4 int4', proallargtypes => '{int4,int4}', + proargmodes => '{i,i}', proargnames => '{cost_delay,cost_limit}', prosrc => 'enable_data_checksums' }, # collation management functions diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h index 6abe7456d3c..cdaf20a9243 100644 --- a/src/include/commands/progress.h +++ b/src/include/commands/progress.h @@ -192,8 +192,7 @@ #define PROGRESS_DATACHECKSUMS_PHASE_ENABLING 0 #define PROGRESS_DATACHECKSUMS_PHASE_DISABLING 1 #define PROGRESS_DATACHECKSUMS_PHASE_WAITING_TEMPREL 2 -#define PROGRESS_DATACHECKSUMS_PHASE_WAITING_CHECKPOINT 3 -#define PROGRESS_DATACHECKSUMS_PHASE_WAITING_BARRIER 4 -#define PROGRESS_DATACHECKSUMS_PHASE_DONE 5 +#define PROGRESS_DATACHECKSUMS_PHASE_WAITING_BARRIER 3 +#define PROGRESS_DATACHECKSUMS_PHASE_DONE 4 #endif diff --git a/src/include/postmaster/datachecksumsworker.h b/src/include/postmaster/datachecksumsworker.h index 0daef709ec8..dc4bf24be7c 100644 --- a/src/include/postmaster/datachecksumsworker.h +++ b/src/include/postmaster/datachecksumsworker.h @@ -4,7 +4,7 @@ * header file for data checksum helper background worker * * - * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/include/postmaster/datachecksumsworker.h @@ -41,8 +41,7 @@ typedef enum /* Start the background processes for enabling or disabling checksums */ void StartDataChecksumsWorkerLauncher(DataChecksumsWorkerOperation op, int cost_delay, - int cost_limit, - bool fast); + int cost_limit); /* Background worker entrypoints */ void DataChecksumsWorkerLauncherMain(Datum arg); diff --git a/src/include/storage/checksum.h b/src/include/storage/checksum.h index d22687e2e4c..b6bbba82949 100644 --- a/src/include/storage/checksum.h +++ b/src/include/storage/checksum.h @@ -26,8 +26,7 @@ typedef enum ChecksumType PG_DATA_CHECKSUM_OFF = 0, PG_DATA_CHECKSUM_VERSION, PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION, - PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION, - PG_DATA_CHECKSUM_ANY_VERSION + PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION } ChecksumType; /* diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index bd2af5753a2..45b4b312ab9 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -462,11 +462,10 @@ extern PGDLLIMPORT PGPROC *PreparedXactProcs; * Background writer, checkpointer, WAL writer, WAL summarizer, and archiver * run during normal operation. Startup process and WAL receiver also consume * 2 slots, but WAL writer is launched only after startup has exited, so we - * only need 6 slots to cover these. The DataChecksums worker and launcher - * can consume 2 slots when data checksums are enabled or disabled. + * only need 6 slots to cover these. */ #define MAX_IO_WORKERS 32 -#define NUM_AUXILIARY_PROCS (8 + MAX_IO_WORKERS) +#define NUM_AUXILIARY_PROCS (6 + MAX_IO_WORKERS) /* configurable options */ extern PGDLLIMPORT int DeadlockTimeout; diff --git a/src/test/modules/test_checksums/Makefile b/src/test/modules/test_checksums/Makefile index a5b6259a728..fe054cfb88e 100644 --- a/src/test/modules/test_checksums/Makefile +++ b/src/test/modules/test_checksums/Makefile @@ -2,7 +2,7 @@ # # Makefile for src/test/checksum # -# Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group +# Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group # Portions Copyright (c) 1994, Regents of the University of California # # src/test/checksum/Makefile diff --git a/src/test/modules/test_checksums/meson.build b/src/test/modules/test_checksums/meson.build index ffc737ca87a..3e6a155f35f 100644 --- a/src/test/modules/test_checksums/meson.build +++ b/src/test/modules/test_checksums/meson.build @@ -1,4 +1,4 @@ -# Copyright (c) 2025, PostgreSQL Global Development Group +# Copyright (c) 2026, PostgreSQL Global Development Group test_checksums_sources = files( 'test_checksums.c', diff --git a/src/test/modules/test_checksums/t/001_basic.pl b/src/test/modules/test_checksums/t/001_basic.pl index 728a5c4510c..397f6411cb5 100644 --- a/src/test/modules/test_checksums/t/001_basic.pl +++ b/src/test/modules/test_checksums/t/001_basic.pl @@ -1,5 +1,5 @@ -# Copyright (c) 2025, PostgreSQL Global Development Group +# Copyright (c) 2026, PostgreSQL Global Development Group # Test suite for testing enabling data checksums in an online cluster use strict; diff --git a/src/test/modules/test_checksums/t/002_restarts.pl b/src/test/modules/test_checksums/t/002_restarts.pl index 6c17f304eac..a4d35e9f95c 100644 --- a/src/test/modules/test_checksums/t/002_restarts.pl +++ b/src/test/modules/test_checksums/t/002_restarts.pl @@ -1,5 +1,5 @@ -# Copyright (c) 2024, PostgreSQL Global Development Group +# Copyright (c) 2026, PostgreSQL Global Development Group # Test suite for testing enabling data checksums in an online cluster with a # restart which breaks processing. diff --git a/src/test/modules/test_checksums/t/003_standby_restarts.pl b/src/test/modules/test_checksums/t/003_standby_restarts.pl index f724d4ea74c..e9bf21fe5d5 100644 --- a/src/test/modules/test_checksums/t/003_standby_restarts.pl +++ b/src/test/modules/test_checksums/t/003_standby_restarts.pl @@ -1,5 +1,5 @@ -# Copyright (c) 2024, PostgreSQL Global Development Group +# Copyright (c) 2026, PostgreSQL Global Development Group # Test suite for testing enabling data checksums in an online cluster with # streaming replication diff --git a/src/test/modules/test_checksums/t/004_offline.pl b/src/test/modules/test_checksums/t/004_offline.pl index e9fbcf77eab..f33e278de28 100644 --- a/src/test/modules/test_checksums/t/004_offline.pl +++ b/src/test/modules/test_checksums/t/004_offline.pl @@ -1,5 +1,5 @@ -# Copyright (c) 2024, PostgreSQL Global Development Group +# Copyright (c) 2026, PostgreSQL Global Development Group # Test suite for testing enabling data checksums offline from various states # of checksum processing diff --git a/src/test/modules/test_checksums/t/005_injection.pl b/src/test/modules/test_checksums/t/005_injection.pl index ae801cd336f..6d374793eec 100644 --- a/src/test/modules/test_checksums/t/005_injection.pl +++ b/src/test/modules/test_checksums/t/005_injection.pl @@ -1,5 +1,5 @@ -# Copyright (c) 2025, PostgreSQL Global Development Group +# Copyright (c) 2026, PostgreSQL Global Development Group # Test suite for testing enabling data checksums in an online cluster with # injection point tests injecting failures into the processing @@ -43,53 +43,8 @@ $node->safe_psql('postgres', 'SELECT dcw_inject_fail_database(true);'); enable_data_checksums($node, wait => 'off'); $node->safe_psql('postgres', 'SELECT dcw_inject_fail_database(false);'); -# Force the server to crash after enabling data checksums but before issuing -# the checkpoint. Since the switch has been WAL logged the server should come -# up with checksums enabled after replay. -test_checksum_state($node, 'off'); -$node->safe_psql('postgres', 'SELECT dc_crash_before_checkpoint();'); -enable_data_checksums($node, fast => 'true'); -my $ret = wait_for_cluster_crash($node); -ok($ret == 1, "Cluster crash detection timeout"); -ok(!$node->is_alive, "Cluster crashed due to abort() before checkpointing"); -$node->_update_pid(-1); -$node->start; -test_checksum_state($node, 'on'); - -# Another test just like the previous, but for disabling data checksums (and -# crashing just before checkpointing). The previous injection points were all -# detached from through the crash so they need to be reattached. -$node->safe_psql('postgres', 'SELECT dc_crash_before_checkpoint();'); +# Make sure that disabling after a failure works disable_data_checksums($node); -$ret = wait_for_cluster_crash($node); -ok($ret == 1, "Cluster crash detection timeout"); -ok(!$node->is_alive, "Cluster crashed due to abort() before checkpointing"); -$node->_update_pid(-1); -$node->start; -test_checksum_state($node, 'off'); - -# Now inject a crash before inserting the WAL record for data checksum state -# change, when the server comes back up again the state should not have been -# set to the new value since the process didn't succeed. -$node->safe_psql('postgres', 'SELECT dc_crash_before_xlog();'); -enable_data_checksums($node); -$ret = wait_for_cluster_crash($node); -ok($ret == 1, "Cluster crash detection timeout"); -ok(!$node->is_alive, "Cluster crashed"); -$node->_update_pid(-1); -$node->start; -test_checksum_state($node, 'off'); - -# This re-runs the same test again but with first disabling data checksums and -# then enabling again, crashing right before inserting the WAL record. When -# it comes back up the checksums must not be enabled. -$node->safe_psql('postgres', 'SELECT dc_crash_before_xlog();'); -enable_data_checksums($node); -$ret = wait_for_cluster_crash($node); -ok($ret == 1, "Cluster crash detection timeout"); -ok(!$node->is_alive, "Cluster crashed"); -$node->_update_pid(-1); -$node->start; test_checksum_state($node, 'off'); # --------------------------------------------------------------------------- @@ -122,5 +77,33 @@ SKIP: enable_data_checksums($node, wait => 'on'); } +# Test starting the data checksums launcher from two sessions concurrently. In +# order reliably run things concurrently we use background psql's which block +# on various fault injected delays. +enable_data_checksums($node, wait => 'on'); + +my $A = $node->background_psql('postgres'); +$A->query_safe('SELECT dcw_inject_launcher_delay(true);'); +$A->query_safe('SELECT pg_enable_data_checksums();'); +$node->safe_psql('postgres', 'SELECT dcw_inject_launcher_delay(false);'); +my ($ret, $stdout, $stderr) = + $node->psql('postgres', 'SELECT pg_disable_data_checksums();'); +wait_for_checksum_state($node, 'off'); + + +my $B = $node->background_psql('postgres'); +$B->query_safe('SELECT dcw_inject_startup_delay(true);'); +$B->query_safe('SELECT pg_enable_data_checksums();'); +$node->safe_psql('postgres', 'SELECT dcw_inject_startup_delay(false);'); +($ret, $stdout, $stderr) = + $node->psql('postgres', 'SELECT pg_enable_data_checksums();'); +like($stderr, qr/data checksum processing already running/, + 'ERROR on stderr'); +wait_for_checksum_state($node, 'on'); + +$A->quit; +$B->quit; + + $node->stop; done_testing(); diff --git a/src/test/modules/test_checksums/t/006_pgbench_single.pl b/src/test/modules/test_checksums/t/006_pgbench_single.pl index 96f3b2cd8a6..6bb0e67c7e1 100644 --- a/src/test/modules/test_checksums/t/006_pgbench_single.pl +++ b/src/test/modules/test_checksums/t/006_pgbench_single.pl @@ -1,5 +1,5 @@ -# Copyright (c) 2025, PostgreSQL Global Development Group +# Copyright (c) 2026, PostgreSQL Global Development Group # Test suite for testing enabling data checksums in an online cluster with # concurrent activity via pgbench runs @@ -39,10 +39,6 @@ my $TEST_ITERATIONS = 10; my $data_checksum_state = 'off'; my $pgbench = undef; -# determines whether enable_data_checksums/disable_data_checksums forces an -# immediate checkpoint -my @flip_modes = ('true', 'false'); - # Start a pgbench run in the background against the server specified via the # port passed as parameter. sub background_rw_pgbench @@ -50,10 +46,7 @@ sub background_rw_pgbench my $port = shift; # If a previous pgbench is still running, start by shutting it down. - if ($pgbench) - { - $pgbench->finish; - } + $pgbench->finish if $pgbench; # Randomize the number of pgbench clients a bit (range 1-16) my $clients = 1 + int(rand(15)); @@ -92,13 +85,8 @@ sub flip_data_checksums $node->safe_psql('postgres', "SELECT pg_current_wal_lsn()"); note("LSN before enabling: " . $result . "\n"); - my $mode = $flip_modes[ int(rand(@flip_modes)) ]; - # Ensure that the primary switches to "inprogress-on" - enable_data_checksums( - $node, - wait => 'inprogress-on', - 'fast' => $mode); + enable_data_checksums($node, wait => 'inprogress-on'); random_sleep(); @@ -123,9 +111,7 @@ sub flip_data_checksums $node->safe_psql('postgres', "SELECT pg_current_wal_lsn()"); note("LSN before disabling: " . $result . "\n"); - my $mode = $flip_modes[ int(rand(@flip_modes)) ]; - - disable_data_checksums($node, 'fast' => $mode); + disable_data_checksums($node); # Wait for checksums disabled on the primary wait_for_checksum_state($node, 'off'); @@ -141,8 +127,7 @@ sub flip_data_checksums else { # This should only happen due to programmer error when hacking on the - # test code, but since that might pass subtly by let's ensure it gets - # caught with a test error if so. + # test code, but since that might pass subtly we error out. bail('data_checksum_state variable has invalid state:' . $data_checksum_state); } @@ -172,7 +157,7 @@ background_rw_pgbench($node->port); # Main test suite. This loop will start a pgbench run on the cluster and while # that's running flip the state of data checksums concurrently. It will then -# randomly restart thec cluster (in fast or immediate) mode and then check for +# randomly restart the cluster and then check for # the desired state. The idea behind doing things randomly is to stress out # any timing related issues by subjecting the cluster for varied workloads. # A TODO is to generate a trace such that any test failure can be traced to @@ -183,8 +168,6 @@ for (my $i = 0; $i < $TEST_ITERATIONS; $i++) if (!$node->is_alive) { - random_sleep(); - # Start, to do recovery, and stop $node->start; $node->stop('fast'); @@ -203,6 +186,7 @@ for (my $i = 0; $i < $TEST_ITERATIONS; $i++) # Randomize the WAL size, to trigger checkpoints less/more often my $sb = 64 + int(rand(1024)); $node->append_conf('postgresql.conf', qq[max_wal_size = $sb]); + note("changing max_wal_size to " . $sb); $node->start; diff --git a/src/test/modules/test_checksums/t/007_pgbench_standby.pl b/src/test/modules/test_checksums/t/007_pgbench_standby.pl index 8b8e031cbf6..a7365496853 100644 --- a/src/test/modules/test_checksums/t/007_pgbench_standby.pl +++ b/src/test/modules/test_checksums/t/007_pgbench_standby.pl @@ -1,5 +1,5 @@ -# Copyright (c) 2025, PostgreSQL Global Development Group +# Copyright (c) 2026, PostgreSQL Global Development Group # Test suite for testing enabling data checksums in an online cluster, # comprising of a primary and a replicated standby, with concurrent activity @@ -17,6 +17,16 @@ use lib $FindBin::RealBin; use DataChecksums::Utils; +if (!$ENV{PG_TEST_EXTRA} || $ENV{PG_TEST_EXTRA} !~ /\bchecksum_extended\b/) +{ + plan skip_all => 'Extended checksum tests not enabled'; +} + +if ($ENV{enable_injection_points} ne 'yes') +{ + plan skip_all => 'Injection points not supported by this build'; +} + my $node_primary_slot = 'physical_slot'; my $node_primary_backup = 'primary_backup'; my $node_primary; @@ -36,27 +46,6 @@ my $data_checksum_state = 'off'; my $pgbench_primary = undef; my $pgbench_standby = undef; -# Variables holding state for managing the cluster and aux processes in -# various ways -my ($pgb_primary_stdin, $pgb_primary_stdout, $pgb_primary_stderr) = - ('', '', ''); -my ($pgb_standby_1_stdin, $pgb_standby_1_stdout, $pgb_standby_1_stderr) = - ('', '', ''); - -if (!$ENV{PG_TEST_EXTRA} || $ENV{PG_TEST_EXTRA} !~ /\bchecksum_extended\b/) -{ - plan skip_all => 'Extended tests not enabled'; -} - -if ($ENV{enable_injection_points} ne 'yes') -{ - plan skip_all => 'Injection points not supported by this build'; -} - -# determines whether enable_data_checksums/disable_data_checksums forces an -# immediate checkpoint -my @flip_modes = ('true', 'false'); - # Start a pgbench run in the background against the server specified via the # port passed as parameter sub background_pgbench @@ -66,6 +55,7 @@ sub background_pgbench # Terminate any currently running pgbench process before continuing $pgbench_primary->finish if $pgbench_primary; + # Randomize the number of pgbench clients a bit (range 1-16) my $clients = 1 + int(rand(15)); my @cmd = ('pgbench', '-p', $port, '-T', '600', '-c', $clients); @@ -77,7 +67,7 @@ sub background_pgbench push(@cmd, 'postgres'); $pgbench_primary = IPC::Run::start( - [ 'pgbench', '-p', $port, '-T', '600', '-c', $clients, 'postgres' ], + \@cmd, '<' => '/dev/null', '>' => '/dev/null', '2>' => '/dev/null', @@ -89,14 +79,14 @@ sub background_pgbench # before and after state. sub flip_data_checksums { + # First, make sure the cluster is in the state we expect it to be test_checksum_state($node_primary, $data_checksum_state); test_checksum_state($node_standby_1, $data_checksum_state); if ($data_checksum_state eq 'off') { # Coin-toss to see if we are injecting a retry due to a temptable - $node_primary->safe_psql('postgres', - 'SELECT dcw_fake_temptable(true);') + $node_primary->safe_psql('postgres', 'SELECT dcw_fake_temptable();') if cointoss(); # log LSN right before we start changing checksums @@ -104,14 +94,11 @@ sub flip_data_checksums $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn()"); note("LSN before enabling: " . $result . "\n"); - my $mode = $flip_modes[ int(rand(@flip_modes)) ]; - # Ensure that the primary switches to "inprogress-on" - enable_data_checksums( - $node_primary, - wait => 'inprogress-on', - 'fast' => $mode); + enable_data_checksums($node_primary, wait => 'inprogress-on'); + random_sleep(); + # Wait for checksum enable to be replayed $node_primary->wait_for_catchup($node_standby_1, 'replay'); @@ -127,7 +114,6 @@ sub flip_data_checksums 'f'); is($result, 1, 'ensure standby has absorbed the inprogress-on barrier'); - random_sleep(); $result = $node_standby_1->safe_psql('postgres', "SELECT setting " . "FROM pg_catalog.pg_settings " @@ -160,9 +146,7 @@ sub flip_data_checksums $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn()"); note("LSN before disabling: " . $result . "\n"); - my $mode = $flip_modes[ int(rand(@flip_modes)) ]; - - disable_data_checksums($node_primary, 'fast' => $mode); + disable_data_checksums($node_primary); $node_primary->wait_for_catchup($node_standby_1, 'replay'); # Wait for checksums disabled on the primary and standby @@ -182,9 +166,9 @@ sub flip_data_checksums else { # This should only happen due to programmer error when hacking on the - # test code, but since that might pass subtly by let's ensure it gets - # caught with a test error if so. - is(1, 0, 'data_checksum_state variable has invalid state'); + # test code, but since that might pass subtly we error out. + bail('data_checksum_state variable has invalid state:' + . $data_checksum_state); } } @@ -229,7 +213,7 @@ background_pgbench($node_primary->port, 0); # Main test suite. This loop will start a pgbench run on the cluster and while # that's running flip the state of data checksums concurrently. It will then -# randomly restart the cluster (in fast or immediate) mode and then check for +# randomly restart the cluster and then check for # the desired state. The idea behind doing things randomly is to stress out # any timing related issues by subjecting the cluster for varied workloads. # A TODO is to generate a trace such that any test failure can be traced to @@ -240,8 +224,6 @@ for (my $i = 0; $i < $TEST_ITERATIONS; $i++) if (!$node_primary->is_alive) { - random_sleep(); - # start, to do recovery, and stop $node_primary->start; $node_primary->stop('fast'); @@ -271,8 +253,6 @@ for (my $i = 0; $i < $TEST_ITERATIONS; $i++) if (!$node_standby_1->is_alive) { - random_sleep(); - $node_standby_1->start; $node_standby_1->stop('fast'); @@ -297,20 +277,18 @@ for (my $i = 0; $i < $TEST_ITERATIONS; $i++) $node_standby_1->start; - # Start a select-only pgbench in the background on the standby + # Start a read-only pgbench in the background on the standby background_pgbench($node_standby_1->port, 1); } $node_primary->safe_psql('postgres', "UPDATE t SET a = a + 1;"); + $node_primary->wait_for_catchup($node_standby_1, 'write'); flip_data_checksums(); random_sleep(); my $result = $node_primary->safe_psql('postgres', "SELECT count(*) FROM t WHERE a > 1"); is($result, '100000', 'ensure data pages can be read back on primary'); - random_sleep(); - $node_primary->wait_for_catchup($node_standby_1, 'write'); - random_sleep(); # Potentially powercycle the cluster (the nodes independently) diff --git a/src/test/modules/test_checksums/t/DataChecksums/Utils.pm b/src/test/modules/test_checksums/t/DataChecksums/Utils.pm index cf670be944c..d299a1ba01d 100644 --- a/src/test/modules/test_checksums/t/DataChecksums/Utils.pm +++ b/src/test/modules/test_checksums/t/DataChecksums/Utils.pm @@ -1,5 +1,5 @@ -# Copyright (c) 2025, PostgreSQL Global Development Group +# Copyright (c) 2026, PostgreSQL Global Development Group =pod @@ -149,12 +149,6 @@ The B to use when enabling data checksums, default is 0. The B to use when enabling data checksums, default is 100. -=item fast - -If set to C an immediate checkpoint will be issued after data -checksums are enabled. Setting this to false will lead to slower tests. -The default is C. - =item wait If defined, the function will wait for the state defined in this parameter, @@ -173,16 +167,13 @@ sub enable_data_checksums # Set sane defaults for the parameters $params{cost_delay} = 0 unless (defined($params{cost_delay})); $params{cost_limit} = 100 unless (defined($params{cost_limit})); - $params{fast} = 'true' unless (defined($params{fast})); my $query = <<'EOQ'; -SELECT pg_enable_data_checksums(%s, %s, %s); +SELECT pg_enable_data_checksums(%s, %s); EOQ - $postgresnode->safe_psql( - 'postgres', - sprintf($query, - $params{cost_delay}, $params{cost_limit}, $params{fast})); + $postgresnode->safe_psql('postgres', + sprintf($query, $params{cost_delay}, $params{cost_limit})); wait_for_checksum_state($postgresnode, $params{wait}) if (defined($params{wait})); @@ -201,13 +192,6 @@ waiting timing out, before returning. The function will wait for $PostgreSQL::Test::Utils::timeout_default seconds before timing out. Unlike in C the value of the parameter is discarded. -=over - -=item fast - -If set to C the checkpoint after disabling will be set to immediate, else -it will be deferred. The default if no value is set is B. - =back =cut @@ -217,14 +201,8 @@ sub disable_data_checksums my $postgresnode = shift; my %params = @_; - # Set sane defaults for the parameters - $params{fast} = 'true' unless (defined($params{fast})); - - my $query = <<'EOQ'; -SELECT pg_disable_data_checksums(%s); -EOQ - - $postgresnode->safe_psql('postgres', sprintf($query, $params{fast})); + $postgresnode->safe_psql('postgres', + 'SELECT pg_disable_data_checksums();'); wait_for_checksum_state($postgresnode, 'off') if (defined($params{wait})); } diff --git a/src/test/modules/test_checksums/test_checksums--1.0.sql b/src/test/modules/test_checksums/test_checksums--1.0.sql index aa086d5c430..8322b64a6ae 100644 --- a/src/test/modules/test_checksums/test_checksums--1.0.sql +++ b/src/test/modules/test_checksums/test_checksums--1.0.sql @@ -7,22 +7,26 @@ CREATE FUNCTION dcw_inject_delay_barrier(attach boolean DEFAULT true) RETURNS pg_catalog.void AS 'MODULE_PATHNAME' LANGUAGE C; -CREATE FUNCTION dcw_inject_fail_database(attach boolean DEFAULT true) +CREATE FUNCTION dcw_inject_launcher_delay(attach boolean DEFAULT true) RETURNS pg_catalog.void AS 'MODULE_PATHNAME' LANGUAGE C; -CREATE FUNCTION dcw_prune_dblist(attach boolean DEFAULT true) +CREATE FUNCTION dcw_inject_startup_delay(attach boolean DEFAULT true) RETURNS pg_catalog.void AS 'MODULE_PATHNAME' LANGUAGE C; -CREATE FUNCTION dcw_fake_temptable(attach boolean DEFAULT true) +CREATE FUNCTION dcw_inject_fail_database(attach boolean DEFAULT true) RETURNS pg_catalog.void AS 'MODULE_PATHNAME' LANGUAGE C; -CREATE FUNCTION dc_crash_before_checkpoint(attach boolean DEFAULT true) +CREATE FUNCTION dcw_prune_dblist(attach boolean DEFAULT true) + RETURNS pg_catalog.void + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION dcw_fake_temptable(attach boolean DEFAULT true) RETURNS pg_catalog.void AS 'MODULE_PATHNAME' LANGUAGE C; -CREATE FUNCTION dc_crash_before_xlog(attach boolean DEFAULT true) +CREATE FUNCTION dc_crash_after_checksum_enable(attach boolean DEFAULT true) RETURNS pg_catalog.void AS 'MODULE_PATHNAME' LANGUAGE C; diff --git a/src/test/modules/test_checksums/test_checksums.c b/src/test/modules/test_checksums/test_checksums.c index c182f2c868b..8e4c676df76 100644 --- a/src/test/modules/test_checksums/test_checksums.c +++ b/src/test/modules/test_checksums/test_checksums.c @@ -3,7 +3,7 @@ * test_checksums.c * Test data checksums * - * Copyright (c) 2025, PostgreSQL Global Development Group + * Copyright (c) 2026, PostgreSQL Global Development Group * * IDENTIFICATION * src/test/modules/test_checksums/test_checksums.c @@ -67,6 +67,50 @@ dcw_inject_delay_barrier(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } +PG_FUNCTION_INFO_V1(dcw_inject_launcher_delay); +Datum +dcw_inject_launcher_delay(PG_FUNCTION_ARGS) +{ +#ifdef USE_INJECTION_POINTS + bool attach = PG_GETARG_BOOL(0); + + if (attach) + InjectionPointAttach("datachecksumsworker-launcher-delay", + "test_checksums", + "dc_delay_barrier", + NULL, + 0); + else + InjectionPointDetach("datachecksumsworker-launcher-delay"); +#else + elog(ERROR, + "test is not working as intended when injection points are disabled"); +#endif + PG_RETURN_VOID(); +} + +PG_FUNCTION_INFO_V1(dcw_inject_startup_delay); +Datum +dcw_inject_startup_delay(PG_FUNCTION_ARGS) +{ +#ifdef USE_INJECTION_POINTS + bool attach = PG_GETARG_BOOL(0); + + if (attach) + InjectionPointAttach("datachecksumsworker-startup-delay", + "test_checksums", + "dc_delay_barrier", + NULL, + 0); + else + InjectionPointDetach("datachecksumsworker-startup-delay"); +#else + elog(ERROR, + "test is not working as intended when injection points are disabled"); +#endif + PG_RETURN_VOID(); +} + void dc_fail_database(const char *name, const void *private_data, void *arg) { @@ -180,19 +224,19 @@ crash(const char *name, const void *private_data, void *arg) } /* - * dc_crash_before_checkpoint + * dc_crash_after_checksum_enable * * Ensure that the server crashes just before the checkpoint is issued after * enabling or disabling checksums. */ -PG_FUNCTION_INFO_V1(dc_crash_before_checkpoint); +PG_FUNCTION_INFO_V1(dc_crash_after_checksum_enable); Datum -dc_crash_before_checkpoint(PG_FUNCTION_ARGS) +dc_crash_after_checksum_enable(PG_FUNCTION_ARGS) { #ifdef USE_INJECTION_POINTS - InjectionPointAttach("datachecksums-enable-checksums-pre-checkpoint", + InjectionPointAttach("datachecksums-enable-checksums-done", "test_checksums", "crash", NULL, 0); - InjectionPointAttach("datachecksums-disable-checksums-pre-checkpoint", + InjectionPointAttach("datachecksums-disable-checksums-done", "test_checksums", "crash", NULL, 0); #else elog(ERROR, @@ -200,26 +244,3 @@ dc_crash_before_checkpoint(PG_FUNCTION_ARGS) #endif PG_RETURN_VOID(); } - -/* - * dc_crash_before_xlog - * - * Ensure that the server crashes right before it is about insert the xlog - * record XLOG_CHECKSUMS. - */ -PG_FUNCTION_INFO_V1(dc_crash_before_xlog); -Datum -dc_crash_before_xlog(PG_FUNCTION_ARGS) -{ -#ifdef USE_INJECTION_POINTS - bool attach = PG_GETARG_BOOL(0); - - if (attach) - InjectionPointAttach("datachecksums-xlogchecksums-pre-xloginsert", - "test_checksums", "crash", NULL, 0); -#else - elog(ERROR, - "test is not working as intended when injection points are disabled"); -#endif - PG_RETURN_VOID(); -} diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index 1b510a294d5..9da65f2a55a 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -3899,15 +3899,6 @@ sub checksum_disable_offline return; } -sub checksum_verify_offline -{ - my ($self) = @_; - - PostgreSQL::Test::Utils::system_or_bail('pg_checksums', '-D', - $self->data_dir, '-c'); - return; -} - =pod =back diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 3ef1d5916d9..697e0329da4 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2095,10 +2095,9 @@ pg_stat_progress_data_checksums| SELECT s.pid, CASE s.param1 WHEN 0 THEN 'enabling'::text WHEN 1 THEN 'disabling'::text - WHEN 2 THEN 'waiting'::text - WHEN 3 THEN 'waiting on temporary tables'::text - WHEN 4 THEN 'waiting on checkpoint'::text - WHEN 5 THEN 'done'::text + WHEN 2 THEN 'waiting on temporary tables'::text + WHEN 3 THEN 'waiting on barrier'::text + WHEN 4 THEN 'done'::text ELSE NULL::text END AS phase, CASE s.param2 diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 2b87e079aa9..f4cce543113 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -4321,6 +4321,7 @@ xl_btree_split xl_btree_unlink_page xl_btree_update xl_btree_vacuum +xl_checkpoint_redo xl_checksum_state xl_clog_truncate xl_commit_ts_truncate -- 2.39.3 (Apple Git-146)