From 015b9095011b3e606a5a7e8fbb70936cd7c2c747 Mon Sep 17 00:00:00 2001 From: Daniel Gustafsson Date: Tue, 28 Apr 2026 23:49:03 +0200 Subject: [PATCH v2 3/8] Handle data_checksum state changes during launcher_exit When erroring out from the datachecksums launcher during data checksum enabling, before state has transitioned to "on", we revert back to the "off" state. Since checksums weren't enabled, there is no use staying in an inprogress state since the checksum launcher currently doesn't support restarting from where it left off. Should restartability get added in the future, this would need to be revisited. This state transition was however missing from the allowed transitions in the statemachine causing an error. Author: Daniel Gustafsson Discussion: https://postgr.es/m/xxx --- src/backend/access/transam/xlog.c | 15 +++--- src/backend/postmaster/datachecksum_state.c | 51 +++++++++++++++++++-- 2 files changed, 54 insertions(+), 12 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index e39af79c03b..f74d7a2ab1a 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -4871,13 +4871,14 @@ SetDataChecksumsOff(void) } /* - * If data checksums are currently enabled we first transition to the - * "inprogress-off" state during which backends continue to write - * checksums without verifying them. When all backends are in - * "inprogress-off" the next transition to "off" can be performed, after - * which all data checksum processing is disabled. - */ - if (XLogCtl->data_checksum_version == PG_DATA_CHECKSUM_VERSION) + * If data checksums are currently enabled, or in the process of being + * enabled, we first transition to the "inprogress-off" state during which + * backends continue to write checksums without verifying them. When all + * backends are in "inprogress-off" the next transition to "off" can be + * performed, after which all data checksum processing is disabled. + */ + if (XLogCtl->data_checksum_version == PG_DATA_CHECKSUM_VERSION || + XLogCtl->data_checksum_version == PG_DATA_CHECKSUM_INPROGRESS_ON) { SpinLockRelease(&XLogCtl->info_lck); diff --git a/src/backend/postmaster/datachecksum_state.c b/src/backend/postmaster/datachecksum_state.c index ea102086144..e26803bc501 100644 --- a/src/backend/postmaster/datachecksum_state.c +++ b/src/backend/postmaster/datachecksum_state.c @@ -235,7 +235,7 @@ typedef struct ChecksumBarrierCondition int to; } ChecksumBarrierCondition; -static const ChecksumBarrierCondition checksum_barriers[6] = +static const ChecksumBarrierCondition checksum_barriers[7] = { /* * Disabling checksums: If checksums are currently enabled, disabling must @@ -261,6 +261,12 @@ static const ChecksumBarrierCondition checksum_barriers[6] = * checksums, we can go straight back to 'on' */ {PG_DATA_CHECKSUM_INPROGRESS_OFF, PG_DATA_CHECKSUM_VERSION}, + + /* + * If checksums are being enabled when launcher_exit is executed, state + * is set to off since we cannot reach on at that point. + */ + {PG_DATA_CHECKSUM_INPROGRESS_ON, PG_DATA_CHECKSUM_INPROGRESS_OFF}, }; /* @@ -323,6 +329,13 @@ typedef struct DataChecksumsStateStruct * catalogs */ bool process_shared_catalogs; + + /* + * List of PIDs for which the launcher_exit should avoid doing any abort + * cleanup, as they are exiting gracefully due to being launched while + * another launcher is already running. + */ + List *no_abort; } DataChecksumsStateStruct; /* Shared memory segment for datachecksumsworker */ @@ -348,6 +361,7 @@ static DataChecksumsWorkerOperation operation; /* Prototypes */ static void DataChecksumsShmemRequest(void *arg); +static void DataChecksumsShmemInit(void *arg); static bool DatabaseExists(Oid dboid); static List *BuildDatabaseList(void); static List *BuildRelationList(bool temp_relations, bool include_shared); @@ -360,6 +374,7 @@ static void WaitForAllTransactionsToFinish(void); const ShmemCallbacks DataChecksumsShmemCallbacks = { .request_fn = DataChecksumsShmemRequest, + .init_fn = DataChecksumsShmemInit, }; /***************************************************************************** @@ -771,7 +786,9 @@ ProcessDatabase(DataChecksumsWorkerDatabase *db) pid_t pid; char activity[NAMEDATALEN + 64]; + LWLockAcquire(DataChecksumsWorkerLock, LW_EXCLUSIVE); DataChecksumState->success = DATACHECKSUMSWORKER_FAILED; + LWLockRelease(DataChecksumsWorkerLock); memset(&bgw, 0, sizeof(bgw)); bgw.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION; @@ -881,14 +898,29 @@ ProcessDatabase(DataChecksumsWorkerDatabase *db) /* * launcher_exit * - * Internal routine for cleaning up state when the launcher process exits. We - * need to clean up the abort flag to ensure that processing started again if - * it was previously aborted (note: started again, *not* restarted from where - * it left off). + * Internal routine for cleaning up state when the launcher process exits. If + * the process is exiting due to a duplicate started launcher, cleanup should + * not be done as that would interfere with the running launcher. Otherwise, + * we need to clean up the abort flag to ensure that processing started again + * if it was previously aborted (note: started again, *not* restarted from + * where it left off). */ static void launcher_exit(int code, Datum arg) { + /* Check for processes which are exiting gracefully */ + LWLockAcquire(DataChecksumsWorkerLock, LW_EXCLUSIVE); + foreach_int(dup, DataChecksumState->no_abort) + { + if (dup == MyProcPid) + { + DataChecksumState->no_abort = list_delete_int(DataChecksumState->no_abort, MyProcPid); + LWLockRelease(DataChecksumsWorkerLock); + return; + } + } + LWLockRelease(DataChecksumsWorkerLock); + abort_requested = false; if (launcher_running) @@ -1040,6 +1072,7 @@ DataChecksumsWorkerLauncherMain(Datum arg) ereport(LOG, errmsg("background worker \"datachecksums launcher\" already running, exiting")); /* Launcher was already running, let it finish */ + DataChecksumState->no_abort = lappend_int(DataChecksumState->no_abort, MyProcPid); LWLockRelease(DataChecksumsWorkerLock); return; } @@ -1278,6 +1311,14 @@ DataChecksumsShmemRequest(void *arg) ); } +static void +DataChecksumsShmemInit(void *arg) +{ + DataChecksumState->no_abort = NIL; + DataChecksumState->launcher_running = false; + DataChecksumState->worker_pid = InvalidPid; +} + /* * DatabaseExists * -- 2.39.3 (Apple Git-146)