From aa8a3867b3316a1fa379b1e70553057d5d96fedc Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@vondra.me>
Date: Sat, 8 Mar 2025 19:34:50 +0100
Subject: [PATCH v20250309 4/5] make progress reporting work

- Splits the progress status by worker type - one row for launcher,
  one row for checksum worker (might be more with parallel workers).

- The launcher only updates database counters, the workers only set
  relation/block counters.

- Also reworks the columns in the system view a bit, discards the
  "current" fields (we still know the database for each worker).

- Issue: Not sure what to do about relation forks, at this point it
  tracks only blocks for MAIN fork.
---
 src/backend/catalog/system_views.sql         | 36 ++++----
 src/backend/postmaster/datachecksumsworker.c | 91 +++++++++++++++++---
 src/include/commands/progress.h              | 27 +++---
 3 files changed, 112 insertions(+), 42 deletions(-)

diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 0d62976dc1f..4330d0ad656 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1335,25 +1335,25 @@ CREATE VIEW pg_stat_progress_copy AS
         LEFT JOIN pg_database D ON S.datid = D.oid;
 
 CREATE VIEW pg_stat_progress_data_checksums AS
-	SELECT
-		S.pid AS pid, S.datid AS datid, D.datname AS datname,
-		CASE S.param1 WHEN 0 THEN 'enabling'
+    SELECT
+        S.pid AS pid, S.datid, D.datname AS datname,
+        CASE S.param1 WHEN 0 THEN 'enabling'
                       WHEN 1 THEN 'disabling'
-					  WHEN 2 THEN 'waiting'
-					  WHEN 3 THEN 'waiting on backends'
-					  WHEN 4 THEN 'waiting on temporary tables'
-					  WHEN 5 THEN 'done'
-					  END AS phase,
-		CASE S.param2 WHEN -1 THEN NULL ELSE S.param2 END AS databases_total,
-		CASE S.param3 WHEN -1 THEN NULL ELSE S.param3 END AS relations_total,
-		S.param4 AS databases_processed,
-		S.param5 AS relations_processed,
-		S.param6 AS databases_current,
-		S.param7 AS relation_current,
-		S.param8 AS relation_current_blocks,
-		S.param9 AS relation_current_blocks_processed
-	FROM pg_stat_get_progress_info('DATACHECKSUMS') AS S
-		LEFT JOIN pg_database D ON S.datid = D.oid;
+                      WHEN 2 THEN 'waiting'
+                      WHEN 3 THEN 'waiting on backends'
+                      WHEN 4 THEN 'waiting on temporary tables'
+                      WHEN 5 THEN 'waiting on checkpoint'
+                      WHEN 6 THEN 'done'
+                      END AS phase,
+        CASE S.param2 WHEN -1 THEN NULL ELSE S.param2 END AS databases_total,
+        S.param3 AS databases_done,
+        CASE S.param4 WHEN -1 THEN NULL ELSE S.param4 END AS relations_total,
+        CASE S.param5 WHEN -1 THEN NULL ELSE S.param5 END AS relations_done,
+        CASE S.param6 WHEN -1 THEN NULL ELSE S.param6 END AS blocks_total,
+        CASE S.param7 WHEN -1 THEN NULL ELSE S.param7 END AS blocks_done
+    FROM pg_stat_get_progress_info('DATACHECKSUMS') AS S
+        LEFT JOIN pg_database D ON S.datid = D.oid
+    ORDER BY S.datid; -- return the launcher process first
 
 CREATE VIEW pg_user_mappings AS
     SELECT
diff --git a/src/backend/postmaster/datachecksumsworker.c b/src/backend/postmaster/datachecksumsworker.c
index b9d003423c0..f79dc290b2b 100644
--- a/src/backend/postmaster/datachecksumsworker.c
+++ b/src/backend/postmaster/datachecksumsworker.c
@@ -409,6 +409,11 @@ ProcessSingleRelationFork(Relation reln, ForkNumber forkNum, BufferAccessStrateg
 			 relns, RelationGetRelationName(reln), forkNames[forkNum], numblocks);
 	pgstat_report_activity(STATE_RUNNING, activity);
 
+	/* XXX only do this for main forks, maybe we should do it for all? */
+	if (forkNum == MAIN_FORKNUM)
+		pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_BLOCKS_TOTAL,
+									 numblocks);
+
 	/*
 	 * We are looping over the blocks which existed at the time of process
 	 * start, which is safe since new blocks are created with checksums set
@@ -450,6 +455,11 @@ ProcessSingleRelationFork(Relation reln, ForkNumber forkNum, BufferAccessStrateg
 		if (abort_requested)
 			return false;
 
+		/* XXX only do this for main forks, maybe we should do it for all? */
+		if (forkNum == MAIN_FORKNUM)
+			pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_BLOCKS_DONE,
+										 (blknum + 1));
+
 		vacuum_delay_point(false);
 	}
 
@@ -798,6 +808,8 @@ again:
 		pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_PHASE,
 									 PROGRESS_DATACHECKSUMS_PHASE_WAITING_BACKENDS);
 
+		/* XXX isn't it weird there's no wait between the phase updates? */
+
 		/*
 		 * Set the state to inprogress-on and wait on the procsignal barrier.
 		 */
@@ -908,8 +920,29 @@ ProcessAllDatabases(bool immediate_checkpoint)
 	 * columns for processed databases is instead increased such that it can
 	 * be compared against the total.
 	 */
-	pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_TOTAL_DB,
-								 list_length(DatabaseList));
+	{
+		const int	index[] = {
+			PROGRESS_DATACHECKSUMS_DBS_TOTAL,
+			PROGRESS_DATACHECKSUMS_DBS_DONE,
+			PROGRESS_DATACHECKSUMS_RELS_TOTAL,
+			PROGRESS_DATACHECKSUMS_RELS_DONE,
+			PROGRESS_DATACHECKSUMS_BLOCKS_TOTAL,
+			PROGRESS_DATACHECKSUMS_BLOCKS_DONE,
+		};
+
+		int64	vals[6];
+
+		vals[0] = list_length(DatabaseList);
+		vals[1] = 0;
+
+		/* translated to NULL */
+		vals[2] = -1;
+		vals[3] = -1;
+		vals[4] = -1;
+		vals[5] = -1;
+
+		pgstat_progress_update_multi_param(6, index, vals);
+	}
 
 	while (true)
 	{
@@ -921,14 +954,6 @@ ProcessAllDatabases(bool immediate_checkpoint)
 			DataChecksumsWorkerResultEntry *entry;
 			bool		found;
 
-			/*
-			 * Indicate which database is being processed set the number of
-			 * relations to -1 to clear field from previous values. -1 will
-			 * translate to NULL in the progress view.
-			 */
-			pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_CUR_DB, db->dboid);
-			pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_TOTAL_REL, -1);
-
 			/*
 			 * Check if this database has been processed already, and if so
 			 * whether it should be retried or skipped.
@@ -957,6 +982,12 @@ ProcessAllDatabases(bool immediate_checkpoint)
 			result = ProcessDatabase(db);
 			processed_databases++;
 
+			/*
+			 * Update the number of processed databases in the progress report.
+			 */
+			pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_DBS_DONE,
+										 processed_databases);
+
 			if (result == DATACHECKSUMSWORKER_SUCCESSFUL)
 			{
 				/*
@@ -1050,6 +1081,13 @@ ProcessAllDatabases(bool immediate_checkpoint)
 				 errhint("The server log might have more information on the cause of the error.")));
 	}
 
+	/*
+	 * When enabling checksums, we have to wait for a checkpoint for the
+	 * checksums to e.
+	 */
+	pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_PHASE,
+								 PROGRESS_DATACHECKSUMS_PHASE_WAITING_CHECKPOINT);
+
 	/*
 	 * Force a checkpoint to get everything out to disk. The use of immediate
 	 * checkpoints is for running tests, as they would otherwise not execute
@@ -1255,6 +1293,7 @@ DataChecksumsWorkerMain(Datum arg)
 	List	   *InitialTempTableList = NIL;
 	BufferAccessStrategy strategy;
 	bool		aborted = false;
+	int64		rels_done;
 
 	enabling_checksums = true;
 
@@ -1268,6 +1307,10 @@ DataChecksumsWorkerMain(Datum arg)
 	BackgroundWorkerInitializeConnectionByOid(dboid, InvalidOid,
 											  BGWORKER_BYPASS_ALLOWCONN);
 
+	/* worker will have a separate entry in pg_stat_progress_data_checksums */
+	pgstat_progress_start_command(PROGRESS_COMMAND_DATACHECKSUMS,
+								  InvalidOid);
+
 	/*
 	 * Get a list of all temp tables present as we start in this database. We
 	 * need to wait until they are all gone until we are done, since we cannot
@@ -1294,6 +1337,24 @@ DataChecksumsWorkerMain(Datum arg)
 
 	RelationList = BuildRelationList(false,
 									 DataChecksumsWorkerShmem->process_shared_catalogs);
+
+	/* Update the total number of relations to be processed in this DB. */
+	{
+		const int	index[] = {
+			PROGRESS_DATACHECKSUMS_RELS_TOTAL,
+			PROGRESS_DATACHECKSUMS_RELS_DONE
+		};
+
+		int64	vals[2];
+
+		vals[0] = list_length(RelationList);
+		vals[1] = 0;
+
+		pgstat_progress_update_multi_param(2, index, vals);
+	}
+
+	/* process the relations */
+	rels_done = 0;
 	foreach_oid(reloid, RelationList)
 	{
 		if (!ProcessSingleRelationByOid(reloid, strategy))
@@ -1301,6 +1362,9 @@ DataChecksumsWorkerMain(Datum arg)
 			aborted = true;
 			break;
 		}
+
+		pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_RELS_DONE,
+									 ++rels_done);
 	}
 	list_free(RelationList);
 
@@ -1313,6 +1377,10 @@ DataChecksumsWorkerMain(Datum arg)
 		return;
 	}
 
+	/* The worker is about to wait for temporary tables to go away. */
+	pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_PHASE,
+								 PROGRESS_DATACHECKSUMS_PHASE_WAITING_TEMPREL);
+
 	/*
 	 * Wait for all temp tables that existed when we started to go away. This
 	 * is necessary since we cannot "reach" them to enable checksums. Any temp
@@ -1369,5 +1437,8 @@ DataChecksumsWorkerMain(Datum arg)
 
 	list_free(InitialTempTableList);
 
+	/* worker done */
+	pgstat_progress_end_command();
+
 	DataChecksumsWorkerShmem->success = DATACHECKSUMSWORKER_SUCCESSFUL;
 }
diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h
index 1ebd0c792b4..94b478a6cc9 100644
--- a/src/include/commands/progress.h
+++ b/src/include/commands/progress.h
@@ -158,21 +158,20 @@
 #define PROGRESS_COPY_TYPE_CALLBACK 4
 
 /* Progress parameters for PROGRESS_DATACHECKSUMS */
-#define PROGRESS_DATACHECKSUMS_PHASE 0
-#define PROGRESS_DATACHECKSUMS_TOTAL_DB 1
-#define PROGRESS_DATACHECKSUMS_TOTAL_REL 2
-#define PROGRESS_DATACHECKSUMS_PROCESSED_DB 3
-#define PROGRESS_DATACHECKSUMS_PROCESSED_REL 4
-#define PROGRESS_DATACHECKSUMS_CUR_DB 5
-#define PROGRESS_DATACHECKSUMS_CUR_REL 6
-#define PROGRESS_DATACHECKSUMS_CUR_REL_TOTAL_BLOCKS 7
-#define PROGRESS_DATACHECKSUMS_CUR_REL_PROCESSED_BLOCKS 8
+#define PROGRESS_DATACHECKSUMS_PHASE		0
+#define PROGRESS_DATACHECKSUMS_DBS_TOTAL	1
+#define PROGRESS_DATACHECKSUMS_DBS_DONE		2
+#define PROGRESS_DATACHECKSUMS_RELS_TOTAL	3
+#define PROGRESS_DATACHECKSUMS_RELS_DONE	4
+#define PROGRESS_DATACHECKSUMS_BLOCKS_TOTAL	5
+#define PROGRESS_DATACHECKSUMS_BLOCKS_DONE	6
 
 /* Phases of datachecksumsworker operation */
-#define PROGRESS_DATACHECKSUMS_PHASE_ENABLING 0
-#define PROGRESS_DATACHECKSUMS_PHASE_DISABLING 1
-#define PROGRESS_DATACHECKSUMS_PHASE_WAITING_BACKENDS 2
-#define PROGRESS_DATACHECKSUMS_PHASE_WAITING_TEMPREL 3
-#define PROGRESS_DATACHECKSUMS_DONE 4
+#define PROGRESS_DATACHECKSUMS_PHASE_ENABLING			0
+#define PROGRESS_DATACHECKSUMS_PHASE_DISABLING			1
+#define PROGRESS_DATACHECKSUMS_PHASE_WAITING			2
+#define PROGRESS_DATACHECKSUMS_PHASE_WAITING_BACKENDS	3
+#define PROGRESS_DATACHECKSUMS_PHASE_WAITING_TEMPREL	4
+#define PROGRESS_DATACHECKSUMS_PHASE_WAITING_CHECKPOINT 5
 
 #endif
-- 
2.48.1

