From db6f070e73f89c38f8a2bfafa10b0718ea51a57d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Herrera?= <alvherre@kurilemu.de>
Date: Mon, 18 May 2026 10:13:24 -0700
Subject: [PATCH] Restructure repack worker teardown
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The original code would leave a shared memory segment unreleased if we
fail partway through initialization.  Change the shutdown order so that
we already free it.

Author: Álvaro Herrera <alvherre@kurilemu.de>
Discussion: https://postgr.es/m/agtNn6ZCmdI2KJFn@alvherre.pgsql
---
 src/backend/commands/repack.c | 67 ++++++++++++++++-------------------
 1 file changed, 31 insertions(+), 36 deletions(-)

diff --git a/src/backend/commands/repack.c b/src/backend/commands/repack.c
index bfc62c8f752..c9064d8fd13 100644
--- a/src/backend/commands/repack.c
+++ b/src/backend/commands/repack.c
@@ -3411,10 +3411,14 @@ start_repack_decoding_worker(Oid relid)
 	shm_mq_handle *mqh;
 	BackgroundWorker bgw;
 
+	decoding_worker = palloc0_object(DecodingWorker);
+
 	/* Setup shared memory. */
 	size = BUFFERALIGN(offsetof(DecodingWorkerShared, error_queue)) +
 		BUFFERALIGN(REPACK_ERROR_QUEUE_SIZE);
 	seg = dsm_create(size, 0);
+	decoding_worker->seg = seg;
+
 	shared = (DecodingWorkerShared *) dsm_segment_address(seg);
 	shared->initialized = false;
 	shared->lsn_upto = InvalidXLogRecPtr;
@@ -3454,14 +3458,12 @@ start_repack_decoding_worker(Oid relid)
 	bgw.bgw_main_arg = UInt32GetDatum(dsm_segment_handle(seg));
 	bgw.bgw_notify_pid = MyProcPid;
 
-	decoding_worker = palloc0_object(DecodingWorker);
 	if (!RegisterDynamicBackgroundWorker(&bgw, &decoding_worker->handle))
 		ereport(ERROR,
 				errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
 				errmsg("out of background worker slots"),
 				errhint("You might need to increase \"%s\".", "max_worker_processes"));
 
-	decoding_worker->seg = seg;
 	decoding_worker->error_mqh = mqh;
 
 	/*
@@ -3487,17 +3489,6 @@ start_repack_decoding_worker(Oid relid)
 	ConditionVariableCancelSleep();
 }
 
-/*
- * PG_ENSURE_ERROR_CLEANUP callback to stop the decoding worker.
- * This ensures the worker is terminated on both ERROR and FATAL exits,
- * unlike PG_FINALLY which only handles ERROR.
- */
-static void
-repack_decoding_worker_cleanup_cb(int code, Datum arg)
-{
-	stop_repack_decoding_worker();
-}
-
 /*
  * Stop the decoding worker and cleanup the related resources.
  *
@@ -3508,39 +3499,43 @@ static void
 stop_repack_decoding_worker(void)
 {
 	BgwHandleStatus status;
+	dsm_segment	   *dsmseg;
 
-	/* Haven't reached the worker startup? */
+	/* Nothing to do if no worker was set up. */
 	if (decoding_worker == NULL)
 		return;
 
-	/* Could not register the worker? */
-	if (decoding_worker->handle == NULL)
-		return;
+	/* Terminate the worker process, if one is running. */
+	if (decoding_worker->handle != NULL)
+	{
+		TerminateBackgroundWorker(decoding_worker->handle);
+		/* The worker should really exit before the REPACK command does. */
+		HOLD_INTERRUPTS();
+		status = WaitForBackgroundWorkerShutdown(decoding_worker->handle);
+		RESUME_INTERRUPTS();
 
-	TerminateBackgroundWorker(decoding_worker->handle);
-	/* The worker should really exit before the REPACK command does. */
-	HOLD_INTERRUPTS();
-	status = WaitForBackgroundWorkerShutdown(decoding_worker->handle);
-	RESUME_INTERRUPTS();
-
-	if (status == BGWH_POSTMASTER_DIED)
-		ereport(FATAL,
-				errcode(ERRCODE_ADMIN_SHUTDOWN),
-				errmsg("postmaster exited during REPACK command"));
-
-	shm_mq_detach(decoding_worker->error_mqh);
+		if (status == BGWH_POSTMASTER_DIED)
+			ereport(FATAL,
+					errcode(ERRCODE_ADMIN_SHUTDOWN),
+					errmsg("postmaster exited during REPACK command"));
+	}
 
 	/*
-	 * If we could not cancel the current sleep due to ERROR, do that before
-	 * we detach from the shared memory the condition variable is located in.
-	 * If we did not, the bgworker ERROR handling code would try and fail
-	 * badly.
+	 * Now detach from our shared memory segment.  In error cases there might
+	 * still be messages from the worker in the queue, which ProcessInterrupts
+	 * would try to read; this is pointless (and causes an assertion failure),
+	 * so set the global pointer to NULL to have ProcessRepackMessages ignore
+	 * them.
 	 */
-	ConditionVariableCancelSleep();
-
-	dsm_detach(decoding_worker->seg);
+	dsmseg = decoding_worker->seg;
 	pfree(decoding_worker);
 	decoding_worker = NULL;
+
+	/* We must also cancel the current sleep, if one is still set up */
+	ConditionVariableCancelSleep();
+
+	if (dsmseg != NULL)
+		dsm_detach(dsmseg);
 }
 
 /* stop_repack_decoding_worker, wrapped as a before_shmem_exit callback */
-- 
2.47.3

