From 2223da7a9b2ef8c8d71780ad72b24eaf6d6c1141 Mon Sep 17 00:00:00 2001
From: Daniil Davidov <d.davydov@postgrespro.ru>
Date: Fri, 16 May 2025 11:58:40 +0700
Subject: [PATCH v3 1/2] Parallel index autovacuum with bgworkers

---
 src/backend/access/common/reloptions.c        | 11 ++++
 src/backend/commands/vacuum.c                 | 55 +++++++++++++++++++
 src/backend/commands/vacuumparallel.c         | 46 ++++++++++------
 src/backend/postmaster/autovacuum.c           | 14 ++++-
 src/backend/postmaster/bgworker.c             | 33 ++++++++++-
 src/backend/utils/init/globals.c              |  1 +
 src/backend/utils/misc/guc_tables.c           | 12 ++++
 src/backend/utils/misc/postgresql.conf.sample |  1 +
 src/include/miscadmin.h                       |  1 +
 src/include/utils/guc_hooks.h                 |  2 +
 src/include/utils/rel.h                       | 10 ++++
 11 files changed, 166 insertions(+), 20 deletions(-)

diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 46c1dce222d..730096002b1 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -166,6 +166,15 @@ static relopt_bool boolRelOpts[] =
 		},
 		true
 	},
+	{
+		{
+			"parallel_index_autovacuum_enabled",
+			"Allows autovacuum to process indexes of this table in parallel mode",
+			RELOPT_KIND_HEAP,
+			ShareUpdateExclusiveLock
+		},
+		false
+	},
 	/* list terminator */
 	{{NULL}}
 };
@@ -1863,6 +1872,8 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind)
 		{"fillfactor", RELOPT_TYPE_INT, offsetof(StdRdOptions, fillfactor)},
 		{"autovacuum_enabled", RELOPT_TYPE_BOOL,
 		offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, enabled)},
+		{"parallel_index_autovacuum_enabled", RELOPT_TYPE_BOOL,
+		offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, parallel_index_autovacuum_enabled)},
 		{"autovacuum_vacuum_threshold", RELOPT_TYPE_INT,
 		offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_threshold)},
 		{"autovacuum_vacuum_max_threshold", RELOPT_TYPE_INT,
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 33a33bf6b1c..6c2f49f203f 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -57,9 +57,21 @@
 #include "utils/guc.h"
 #include "utils/guc_hooks.h"
 #include "utils/memutils.h"
+#include "utils/rel.h"
 #include "utils/snapmgr.h"
 #include "utils/syscache.h"
 
+/*
+ * Minimum number of dead tuples required for the table's indexes to be
+ * processed in parallel during autovacuum.
+ */
+#define AV_PARALLEL_DEADTUP_THRESHOLD	1024
+
+/*
+ * How many indexes should process each parallel worker during autovacuum.
+ */
+#define NUM_INDEXES_PER_PARALLEL_WORKER 30
+
 /*
  * Minimum interval for cost-based vacuum delay reports from a parallel worker.
  * This aims to avoid sending too many messages and waking up the leader too
@@ -2234,6 +2246,49 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
 	else
 		toast_relid = InvalidOid;
 
+	/*
+	 * If we are running autovacuum - decide whether we need to process indexes
+	 * of table with given oid in parallel.
+	 */
+	if (AmAutoVacuumWorkerProcess() &&
+		params->index_cleanup != VACOPTVALUE_DISABLED &&
+		RelationAllowsParallelIdxAutovac(rel))
+	{
+		PgStat_StatTabEntry *tabentry;
+
+		/* fetch the pgstat table entry */
+		tabentry = pgstat_fetch_stat_tabentry_ext(rel->rd_rel->relisshared,
+												  rel->rd_id);
+		if (tabentry && tabentry->dead_tuples >= AV_PARALLEL_DEADTUP_THRESHOLD)
+		{
+			List   *indexes = RelationGetIndexList(rel);
+			int		num_indexes = list_length(indexes);
+
+			list_free(indexes);
+
+			if (pia_reserved_workers > 0)
+			{
+				/*
+				 * We request at least one parallel worker, if user set
+				 * 'parallel_idx_autovac_enabled' option. The total number of
+				 * additional parallel workers depends on how many indexes the
+				 * table has. For now we assume that each parallel worker should
+				 * process NUM_INDEXES_PER_PARALLEL_WORKER indexes.
+				 */
+				params->nworkers =
+					Min((num_indexes / NUM_INDEXES_PER_PARALLEL_WORKER) + 1,
+						pia_reserved_workers);
+			}
+			else
+				ereport(WARNING,
+					(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
+					 errmsg("Cannot launch any supportive workers for parallel index cleanup of rel %s",
+							RelationGetRelationName(rel)),
+					errhint("You might need to set parameter \"pia_reserved_workers\" to a value > 0")));
+
+		}
+	}
+
 	/*
 	 * Switch to the table owner's userid, so that any index functions are run
 	 * as that user.  Also lock down security-restricted operations and
diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c
index 2b9d548cdeb..5c48a1e740e 100644
--- a/src/backend/commands/vacuumparallel.c
+++ b/src/backend/commands/vacuumparallel.c
@@ -1,15 +1,15 @@
 /*-------------------------------------------------------------------------
  *
  * vacuumparallel.c
- *	  Support routines for parallel vacuum execution.
+ *	  Support routines for parallel [auto]vacuum execution.
  *
  * This file contains routines that are intended to support setting up, using,
  * and tearing down a ParallelVacuumState.
  *
- * In a parallel vacuum, we perform both index bulk deletion and index cleanup
- * with parallel worker processes.  Individual indexes are processed by one
- * vacuum process.  ParallelVacuumState contains shared information as well as
- * the memory space for storing dead items allocated in the DSA area.  We
+ * In a parallel [auto]vacuum, we perform both index bulk deletion and index
+ * cleanup with parallel worker processes.  Individual indexes are processed by
+ * one vacuum process.  ParallelVacuumState contains shared information as well
+ * as the memory space for storing dead items allocated in the DSA area.  We
  * launch parallel worker processes at the start of parallel index
  * bulk-deletion and index cleanup and once all indexes are processed, the
  * parallel worker processes exit.  Each time we process indexes in parallel,
@@ -34,6 +34,7 @@
 #include "executor/instrument.h"
 #include "optimizer/paths.h"
 #include "pgstat.h"
+#include "postmaster/autovacuum.h"
 #include "storage/bufmgr.h"
 #include "tcop/tcopprot.h"
 #include "utils/lsyscache.h"
@@ -157,7 +158,8 @@ typedef struct PVIndStats
 } PVIndStats;
 
 /*
- * Struct for maintaining a parallel vacuum state. typedef appears in vacuum.h.
+ * Struct for maintaining a parallel [auto]vacuum state. typedef appears in
+ * vacuum.h.
  */
 struct ParallelVacuumState
 {
@@ -371,10 +373,18 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
 	shared->relid = RelationGetRelid(rel);
 	shared->elevel = elevel;
 	shared->queryid = pgstat_get_my_query_id();
-	shared->maintenance_work_mem_worker =
-		(nindexes_mwm > 0) ?
-		maintenance_work_mem / Min(parallel_workers, nindexes_mwm) :
-		maintenance_work_mem;
+
+	if (AmAutoVacuumWorkerProcess())
+		shared->maintenance_work_mem_worker =
+			(nindexes_mwm > 0) ?
+			autovacuum_work_mem / Min(parallel_workers, nindexes_mwm) :
+			autovacuum_work_mem;
+	else
+		shared->maintenance_work_mem_worker =
+			(nindexes_mwm > 0) ?
+			maintenance_work_mem / Min(parallel_workers, nindexes_mwm) :
+			maintenance_work_mem;
+
 	shared->dead_items_info.max_bytes = vac_work_mem * (size_t) 1024;
 
 	/* Prepare DSA space for dead items */
@@ -558,7 +568,9 @@ parallel_vacuum_compute_workers(Relation *indrels, int nindexes, int nrequested,
 	 * We don't allow performing parallel operation in standalone backend or
 	 * when parallelism is disabled.
 	 */
-	if (!IsUnderPostmaster || max_parallel_maintenance_workers == 0)
+	if (!IsUnderPostmaster ||
+		(pia_reserved_workers == 0 && AmAutoVacuumWorkerProcess()) ||
+		(max_parallel_maintenance_workers == 0 && !AmAutoVacuumWorkerProcess()))
 		return 0;
 
 	/*
@@ -597,15 +609,17 @@ parallel_vacuum_compute_workers(Relation *indrels, int nindexes, int nrequested,
 	parallel_workers = (nrequested > 0) ?
 		Min(nrequested, nindexes_parallel) : nindexes_parallel;
 
-	/* Cap by max_parallel_maintenance_workers */
-	parallel_workers = Min(parallel_workers, max_parallel_maintenance_workers);
+	/* Cap by GUC variable */
+	parallel_workers = AmAutoVacuumWorkerProcess() ?
+		Min(parallel_workers, pia_reserved_workers) :
+		Min(parallel_workers, max_parallel_maintenance_workers);
 
 	return parallel_workers;
 }
 
 /*
  * Perform index vacuum or index cleanup with parallel workers.  This function
- * must be used by the parallel vacuum leader process.
+ * must be used by the parallel [auto]vacuum leader process.
  */
 static void
 parallel_vacuum_process_all_indexes(ParallelVacuumState *pvs, int num_index_scans,
@@ -982,8 +996,8 @@ parallel_vacuum_index_is_parallel_safe(Relation indrel, int num_index_scans,
 /*
  * Perform work within a launched parallel process.
  *
- * Since parallel vacuum workers perform only index vacuum or index cleanup,
- * we don't need to report progress information.
+ * Since parallel [auto]vacuum workers perform only index vacuum or index
+ * cleanup, we don't need to report progress information.
  */
 void
 parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 4d4a1a3197e..59fb52aa443 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -2824,7 +2824,11 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
 		 */
 		tab->at_params.index_cleanup = VACOPTVALUE_UNSPECIFIED;
 		tab->at_params.truncate = VACOPTVALUE_UNSPECIFIED;
-		/* As of now, we don't support parallel vacuum for autovacuum */
+		/*
+		 * Don't request parallel mode by now. nworkers might be set to
+		 * positive value if we will meet appropriate for parallel index
+		 * processing table.
+		 */
 		tab->at_params.nworkers = -1;
 		tab->at_params.freeze_min_age = freeze_min_age;
 		tab->at_params.freeze_table_age = freeze_table_age;
@@ -3406,6 +3410,14 @@ check_autovacuum_work_mem(int *newval, void **extra, GucSource source)
 	return true;
 }
 
+bool
+check_pia_reserved_workers(int *newval, void **extra, GucSource source)
+{
+	if (*newval > (max_worker_processes - 8))
+		return false;
+	return true;
+}
+
 /*
  * Returns whether there is a free autovacuum worker slot available.
  */
diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c
index 116ddf7b835..e62076939ec 100644
--- a/src/backend/postmaster/bgworker.c
+++ b/src/backend/postmaster/bgworker.c
@@ -1046,6 +1046,8 @@ RegisterDynamicBackgroundWorker(BackgroundWorker *worker,
 								BackgroundWorkerHandle **handle)
 {
 	int			slotno;
+	int			from;
+	int			upto;
 	bool		success = false;
 	bool		parallel;
 	uint64		generation = 0;
@@ -1088,10 +1090,23 @@ RegisterDynamicBackgroundWorker(BackgroundWorker *worker,
 		return false;
 	}
 
+	/*
+	 * Determine range of workers in pool, that we can use (last
+	 * 'pia_reserved_workers' is reserved for autovacuum workers).
+	 */
+
+	from = AmAutoVacuumWorkerProcess() ?
+		BackgroundWorkerData->total_slots - pia_reserved_workers :
+		0;
+
+	upto = AmAutoVacuumWorkerProcess() ?
+		BackgroundWorkerData->total_slots :
+		BackgroundWorkerData->total_slots - pia_reserved_workers;
+
 	/*
 	 * Look for an unused slot.  If we find one, grab it.
 	 */
-	for (slotno = 0; slotno < BackgroundWorkerData->total_slots; ++slotno)
+	for (slotno = from; slotno < upto; ++slotno)
 	{
 		BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
 
@@ -1159,7 +1174,13 @@ GetBackgroundWorkerPid(BackgroundWorkerHandle *handle, pid_t *pidp)
 	BackgroundWorkerSlot *slot;
 	pid_t		pid;
 
-	Assert(handle->slot < max_worker_processes);
+	/* Only autovacuum can use last 'pia_reserved_workers' workers in pool. */
+	if (!AmAutoVacuumWorkerProcess())
+		Assert(handle->slot < max_worker_processes - pia_reserved_workers);
+	else
+		Assert(handle->slot < max_worker_processes &&
+			   handle->slot >= max_worker_processes - pia_reserved_workers);
+
 	slot = &BackgroundWorkerData->slot[handle->slot];
 
 	/*
@@ -1298,7 +1319,13 @@ TerminateBackgroundWorker(BackgroundWorkerHandle *handle)
 	BackgroundWorkerSlot *slot;
 	bool		signal_postmaster = false;
 
-	Assert(handle->slot < max_worker_processes);
+	/* Only autovacuum can use last 'pia_reserved_workers' workers in pool. */
+	if (!AmAutoVacuumWorkerProcess())
+		Assert(handle->slot < max_worker_processes - pia_reserved_workers);
+	else
+		Assert(handle->slot < max_worker_processes &&
+			   handle->slot >= max_worker_processes - pia_reserved_workers);
+
 	slot = &BackgroundWorkerData->slot[handle->slot];
 
 	/* Set terminate flag in shared memory, unless slot has been reused. */
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index 92b0446b80c..a6fdcd2de5b 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -144,6 +144,7 @@ int			NBuffers = 16384;
 int			MaxConnections = 100;
 int			max_worker_processes = 8;
 int			max_parallel_workers = 8;
+int         pia_reserved_workers = 0;
 int			MaxBackends = 0;
 
 /* GUC parameters for vacuum */
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 2f8cbd86759..dfc18095d7b 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -3604,6 +3604,18 @@ struct config_int ConfigureNamesInt[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"parallel_index_autovacuum_reserved_workers", PGC_USERSET, RESOURCES_WORKER_PROCESSES,
+			gettext_noop("Maximum number of worker processes (from bgworkers pool), reserved for participation in parallel index autovacuum."),
+			gettext_noop("This parameter is depending on \"max_worker_processes\" (not on \"autovacuum_max_workers\"). "
+						 "*Only* autovacuum workers can use these supportive processes. "
+						 "Also, these processes are taken into account in \"max_parallel_workers\"."),
+		},
+		&pia_reserved_workers,
+		0, 0, MAX_BACKENDS,
+		check_pia_reserved_workers, NULL, NULL
+	},
+
 	{
 		{"max_parallel_maintenance_workers", PGC_USERSET, RESOURCES_WORKER_PROCESSES,
 			gettext_noop("Sets the maximum number of parallel processes per maintenance operation."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 34826d01380..3d96af1547f 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -223,6 +223,7 @@
 #max_parallel_maintenance_workers = 2	# limited by max_parallel_workers
 #max_parallel_workers = 8		# number of max_worker_processes that
 					# can be used in parallel operations
+#parallel_index_autovacuum_reserved_workers = 0	# disabled by default and limited by max_parallel_workers
 #parallel_leader_participation = on
 
 
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 1e59a7f910f..465dfe25009 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -177,6 +177,7 @@ extern PGDLLIMPORT int NBuffers;
 extern PGDLLIMPORT int MaxBackends;
 extern PGDLLIMPORT int MaxConnections;
 extern PGDLLIMPORT int max_worker_processes;
+extern PGDLLIMPORT int pia_reserved_workers;
 extern PGDLLIMPORT int max_parallel_workers;
 
 extern PGDLLIMPORT int commit_timestamp_buffers;
diff --git a/src/include/utils/guc_hooks.h b/src/include/utils/guc_hooks.h
index 799fa7ace68..8507f95b2ea 100644
--- a/src/include/utils/guc_hooks.h
+++ b/src/include/utils/guc_hooks.h
@@ -31,6 +31,8 @@ extern void assign_application_name(const char *newval, void *extra);
 extern const char *show_archive_command(void);
 extern bool check_autovacuum_work_mem(int *newval, void **extra,
 									  GucSource source);
+extern bool check_pia_reserved_workers(int *newval, void **extra,
+									   GucSource source);
 extern bool check_vacuum_buffer_usage_limit(int *newval, void **extra,
 											GucSource source);
 extern bool check_backtrace_functions(char **newval, void **extra,
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index b552359915f..980c3459469 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -311,6 +311,7 @@ typedef struct ForeignKeyCacheInfo
 typedef struct AutoVacOpts
 {
 	bool		enabled;
+	bool		parallel_index_autovacuum_enabled;
 	int			vacuum_threshold;
 	int			vacuum_max_threshold;
 	int			vacuum_ins_threshold;
@@ -409,6 +410,15 @@ typedef struct StdRdOptions
 	((relation)->rd_options ? \
 	 ((StdRdOptions *) (relation)->rd_options)->parallel_workers : (defaultpw))
 
+/*
+ * RelationAllowsParallelIdxAutovac
+ *		Returns whether the relation's indexes can be processed in parallel
+ *		during autovacuum. Note multiple eval of argument!
+ */
+#define RelationAllowsParallelIdxAutovac(relation) \
+	((relation)->rd_options ? \
+	 ((StdRdOptions *) (relation)->rd_options)->autovacuum.parallel_index_autovacuum_enabled : false)
+
 /* ViewOptions->check_option values */
 typedef enum ViewOptCheckOption
 {
-- 
2.43.0

