From 40d26a0406a213207101a837999e8b57df25fc1d Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Wed, 5 Nov 2025 13:26:41 +0100 Subject: [PATCH v20251105 2/3] review --- doc/src/sgml/func/func-admin.sgml | 20 ++++++++++++----- doc/src/sgml/glossary.sgml | 2 +- doc/src/sgml/monitoring.sgml | 23 ++++++++++++++++++-- doc/src/sgml/wal.sgml | 18 +++++++++++++++ src/backend/access/transam/xlog.c | 17 +++++++++------ src/backend/postmaster/datachecksumsworker.c | 14 ++++++++++++ src/backend/storage/ipc/ipci.c | 1 - src/include/postmaster/datachecksumsworker.h | 2 +- src/tools/pgindent/typedefs.list | 2 ++ 9 files changed, 82 insertions(+), 17 deletions(-) diff --git a/doc/src/sgml/func/func-admin.sgml b/doc/src/sgml/func/func-admin.sgml index f3a8782ede0..4d6f6a7e486 100644 --- a/doc/src/sgml/func/func-admin.sgml +++ b/doc/src/sgml/func/func-admin.sgml @@ -3008,11 +3008,11 @@ SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8'); pg_enable_data_checksums - pg_enable_data_checksums ( cost_delay int, cost_limit int ) + pg_enable_data_checksums ( cost_delay int, cost_limit int, fast bool ) void - Initiates data checksums for the cluster. This will switch the data + Initiates the process of enabling data checksums for the cluster. This will switch the data checksums mode to inprogress-on as well as start a background worker that will process all pages in the database and enable checksums on them. When all data pages have had checksums @@ -3023,7 +3023,9 @@ SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8'); If cost_delay and cost_limit are specified, the speed of the process is throttled using the same principles as Cost-based Vacuum Delay. - + + FIXME missing documentation of the "fast" parameter + @@ -3031,7 +3033,7 @@ SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8'); pg_disable_data_checksums - pg_disable_data_checksums () + pg_disable_data_checksums ( fast bool ) void @@ -3042,12 +3044,20 @@ SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8'); changed to off. At this point the data pages will still have checksums recorded but they are not updated when pages are modified. - + + FIXME missing documentation of the "fast" parameter + + + FIXME I think this should briefly explain how this interacts with checkpoints (through + the fast parameters). It should probably also discuss how this affects streaming standby + due to forcing a restart point, etc. And maybe comment on possible mitigations? + + diff --git a/doc/src/sgml/glossary.sgml b/doc/src/sgml/glossary.sgml index 9bac0c96348..3ba0e8c6c5c 100644 --- a/doc/src/sgml/glossary.sgml +++ b/doc/src/sgml/glossary.sgml @@ -580,7 +580,7 @@ An auxiliary process - which enables or disables data checksums in a specific database. + which enables data checksums in a specific database. diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index b56e220f3d8..7efa1af746a 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -3554,7 +3554,10 @@ description | Waiting for a newly initialized WAL file to reach durable storage database (or on a shared object). Detected failures are reported regardless of the setting. - + + XXX I'm not sure what "regardless" means in this context. I guess + it means we don't reset the counters when disabling checksums? + @@ -6959,6 +6962,9 @@ FROM pg_stat_get_backend_idset() AS backendid; pg_stat_progress_data_checksums view will contain a row for the launcher process, and one row for each worker process which is currently calculating checksums for the data pages in one database. + The launcher provides overview of the overall progress (how many database + have been processed, how many remain), while the workers track progress for + currently processed databases. @@ -6984,7 +6990,8 @@ FROM pg_stat_get_backend_idset() AS backendid; pidinteger - Process ID of a datachecksumworker process. + Process ID of a datachecksumsworker process. + FIXME Is datachecksumsworker defined somewhere? Does it refer to the launcher too? @@ -7127,6 +7134,12 @@ FROM pg_stat_get_backend_idset() AS backendid; The command is currently disabling data checksums on the cluster. + + waiting + + FIXME + + waiting on temporary tables @@ -7141,6 +7154,12 @@ FROM pg_stat_get_backend_idset() AS backendid; state before finishing. + + done + + FIXME + +
diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml index 0ada90ca0b1..8ef16d7769f 100644 --- a/doc/src/sgml/wal.sgml +++ b/doc/src/sgml/wal.sgml @@ -284,6 +284,11 @@ functions. + + Both enabling and disabling checksums happens in two phases, separated by + a checkpoint to ensure durability. + + Enabling checksums will put the cluster checksum mode in inprogress-on mode. During this time, checksums will be @@ -314,6 +319,14 @@ no support for resuming work from where it was interrupted. + + Disabling checksums will put the cluster checksum mode in + inprogress-off mode. During this time, checksums will be + written but not verified. After all processes acknowledge the change, + the mode will automatically switch to off. In this case + rewriting all blocks is not needed, but checkpoints are still required. + + Enabling checksums can cause significant I/O to the system, as most of the @@ -324,6 +337,11 @@ + + XXX Maybe this is the place that should mention checkpoints/restartpoints, + how it impacts systems/replication and how to mitigate that? + + diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index f7633f47551..807b82eed4f 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -607,7 +607,7 @@ typedef struct ChecksumBarrierCondition int barrier_ne[MAX_BARRIER_CONDITIONS]; /* The number of elements in the barrier_ne set */ int barrier_ne_sz; -} ChecksumBarrierCondition; +} ChecksumBarrierCondition; static const ChecksumBarrierCondition checksum_barriers[] = { @@ -618,7 +618,6 @@ static const ChecksumBarrierCondition checksum_barriers[] = {-1} }; - /* * Calculate the amount of space left on the page after 'endptr'. Beware * multiple evaluation! @@ -694,10 +693,10 @@ static TimeLineID LocalMinRecoveryPointTLI; static bool updateMinRecoveryPoint = true; /* - * Local state fror Controlfile data_checksum_version. After initialization + * Local state for Controlfile data_checksum_version. After initialization * this is only updated when absorbing a procsignal barrier during interrupt * processing. The reason for keeping a copy in backend-private memory is to - * avoid locking for interrogating checksum state. Possible values are the + * avoid locking for interrogating the checksum state. Possible values are the * checksum versions defined in storage/bufpage.h as well as zero when data * checksums are disabled. */ @@ -712,6 +711,10 @@ static uint32 LocalDataChecksumVersion = 0; * processing the barrier. This may happen if the process is spawned between * the update of XLogCtl->data_checksum_version and the barrier being emitted. * This can only happen on the very first barrier so mark that with this flag. + * + * XXX Is PG_DATA_CHECKSUM_ON_VERSION still the only transition with an assert? + * I think it was replaced by checking checksum_barriers for every transition, + * with elog(ERROR), no? */ static bool InitialDataChecksumTransition = true; @@ -4335,7 +4338,7 @@ InitControlFile(uint64 sysidentifier, uint32 data_checksum_version) /* * Set the data_checksum_version value into XLogCtl, which is where all - * processes get the current value from. (Maybe it should go just there?) + * processes get the current value from. */ XLogCtl->data_checksum_version = data_checksum_version; } @@ -4921,7 +4924,7 @@ SetDataChecksumsOff(bool immediate_checkpoint) uint64 barrier; int flags; - Assert(ControlFile); + Assert(ControlFile != NULL); SpinLockAcquire(&XLogCtl->info_lck); @@ -5030,7 +5033,7 @@ SetDataChecksumsOff(bool immediate_checkpoint) bool AbsorbDataChecksumsBarrier(int target_state) { - const ChecksumBarrierCondition *condition = checksum_barriers; + const ChecksumBarrierCondition *condition = checksum_barriers; int current = LocalDataChecksumVersion; bool found = false; diff --git a/src/backend/postmaster/datachecksumsworker.c b/src/backend/postmaster/datachecksumsworker.c index 3deb57a96de..67045b9014d 100644 --- a/src/backend/postmaster/datachecksumsworker.c +++ b/src/backend/postmaster/datachecksumsworker.c @@ -52,6 +52,9 @@ * - Data checksums SHALL NOT be considered enabled cluster-wide until all * currently connected backends have the local state "enabled" * + * FIXME I'm not 100% sure I understand what the two above points say. What does + * "violate local data_checksums state" means"? + * * There are two levels of synchronization required for enabling data checksums * in an online cluster: (i) changing state in the active backends ("on", * "off", "inprogress-on" and "inprogress-off"), and (ii) ensuring no @@ -60,6 +63,10 @@ * latter with ensuring that any concurrent activity cannot break the data * checksum contract during processing. * + * FIXME This para talks about "enabling" but then mentions all four states, + * including "inprogress-off" and "off". Maybe it should talk about "changing + * data_checksums" instead? + * * Synchronizing the state change is done with procsignal barriers, where the * WAL logging backend updating the global state in the controlfile will wait * for all other backends to absorb the barrier. Barrier absorption will happen @@ -88,6 +95,10 @@ * enables data checksums cluster-wide, there are four sets of backends where * Bd shall be an empty set: * + * FIXME Maybe mention which process initiates the procsignalbarrier? + * FIXME Don't we actually wait or the barrier before we start rewriting data? + * I think Bd has to be empty prior to that, otherwise it might break checksums. + * * Bg: Backend updating the global state and emitting the procsignalbarrier * Bd: Backends in "off" state * Be: Backends in "on" state @@ -124,6 +135,8 @@ * stop writing data checksums as no backend is enforcing data checksum * validation any longer. * + * XXX Maybe it'd make sense to "visualize" the progress between the states + * and barriers in some way. Say, by doing * * Potential optimizations * ----------------------- @@ -148,6 +161,7 @@ * to enable checksums on a cluster which is in inprogress-on state and * may have checksummed pages (make pg_checksums be able to resume an * online operation). + * * Restartability (not necessarily with page granularity). * * * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 44213d140ae..9014e90f1c7 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -31,7 +31,6 @@ #include "postmaster/bgworker_internals.h" #include "postmaster/bgwriter.h" #include "postmaster/datachecksumsworker.h" -#include "postmaster/postmaster.h" #include "postmaster/walsummarizer.h" #include "replication/logicallauncher.h" #include "replication/origin.h" diff --git a/src/include/postmaster/datachecksumsworker.h b/src/include/postmaster/datachecksumsworker.h index 2cd066fd0fe..0daef709ec8 100644 --- a/src/include/postmaster/datachecksumsworker.h +++ b/src/include/postmaster/datachecksumsworker.h @@ -24,7 +24,7 @@ typedef enum DataChecksumsWorkerOperation ENABLE_DATACHECKSUMS, DISABLE_DATACHECKSUMS, /* TODO: VERIFY_DATACHECKSUMS, */ -} DataChecksumsWorkerOperation; +} DataChecksumsWorkerOperation; /* * Possible states for a database entry which has been processed. Exported diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 9d6b4e57cf3..3049e6018b3 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -417,6 +417,7 @@ CheckPointStmt CheckpointStatsData CheckpointerRequest CheckpointerShmemStruct +ChecksumBarrierCondition ChecksumType Chromosome CkptSortItem @@ -587,6 +588,7 @@ CustomScan CustomScanMethods CustomScanState CycleCtr +DataChecksumsWorkerOperation DBState DCHCacheEntry DEADLOCK_INFO -- 2.51.0