From b5ffcb057da6914d7d5ac61e343d73b5102be8db Mon Sep 17 00:00:00 2001 From: Baji Shaik Date: Tue, 12 May 2026 01:27:46 +0000 Subject: [PATCH] Fix REPACK decoding worker not cleaned up on FATAL exit When the launching backend of REPACK (CONCURRENTLY) is terminated via pg_terminate_backend(), ProcDiePending causes ereport(FATAL) which bypasses PG_FINALLY blocks. As a result, stop_repack_decoding_ worker() is never called, leaving the decoding worker running indefinitely and holding its temporary replication slot. Fix by registering an on_proc_exit callback when the decoding worker is started. The callback signals the worker to terminate via TerminateBackgroundWorker(). We do not wait for the worker to exit in the callback, as WaitLatch is not safe during proc_exit; instead we rely on the worker temporary replication slot (RS_TEMPORARY) being dropped automatically when the worker process exits after receiving the termination signal. --- src/backend/commands/repack.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/backend/commands/repack.c b/src/backend/commands/repack.c index 860e2aecbe9..f4ba2d02348 100644 --- a/src/backend/commands/repack.c +++ b/src/backend/commands/repack.c @@ -64,6 +64,7 @@ #include "pgstat.h" #include "replication/logicalrelation.h" #include "storage/bufmgr.h" +#include "storage/ipc.h" #include "storage/lmgr.h" #include "storage/predicate.h" #include "storage/proc.h" @@ -211,6 +212,7 @@ static Oid determine_clustered_index(Relation rel, bool usingindex, static void start_repack_decoding_worker(Oid relid); static void stop_repack_decoding_worker(void); +static void repack_decoding_worker_exit_cleanup(int code, Datum arg); static Snapshot get_initial_snapshot(DecodingWorker *worker); static void ProcessRepackMessage(StringInfo msg); @@ -3454,6 +3456,8 @@ start_repack_decoding_worker(Oid relid) decoding_worker->seg = seg; decoding_worker->error_mqh = mqh; + on_proc_exit(repack_decoding_worker_exit_cleanup, (Datum) 0); + /* * The decoding setup must be done before the caller can have XID assigned * for any reason, otherwise the worker might end up in a deadlock, @@ -3477,6 +3481,25 @@ start_repack_decoding_worker(Oid relid) ConditionVariableCancelSleep(); } +/* + * proc_exit callback to stop the decoding worker on abnormal exit. + * PG_FINALLY does not run on FATAL, so this ensures the worker is + * terminated even when the launching backend is killed. + * + * We only send the termination signal here; we do not wait for the + * worker to exit, as waiting via WaitLatch is not safe during proc_exit. + * The temporary replication slot will be cleaned up when the worker + * exits on its own after receiving the signal. + */ +static void +repack_decoding_worker_exit_cleanup(int code, Datum arg) +{ + if (decoding_worker == NULL) + return; + if (decoding_worker->handle != NULL) + TerminateBackgroundWorker(decoding_worker->handle); +} + /* * Stop the decoding worker and cleanup the related resources. * -- 2.50.1