From 162c6f6ff1c3ba22af7385f8dc7c53915b8ff5a2 Mon Sep 17 00:00:00 2001
From: Vlad Lesin <vladlesin@gmail.com>
Date: Fri, 24 Apr 2026 15:58:39 +0300
Subject: [PATCH] Add regression test for ProcKill lock-group procLatch recycle
 race

When a lock-group leader and a follower run ProcKill() concurrently,
the last process out is responsible for returning the leader's PGPROC
to the freelist.  If that push races with the leader's own
DisownLatch(), a newly-forked backend that picks up the recycled slot
PANICs in OwnLatch() with "latch already owned by PID ...".

To reproduce the race deterministically, add two INJECTION_POINT
hooks in ProcKill() immediately after LWLockRelease(leader_lwlock):
prockill-after-lockgroup-leader and prockill-after-lockgroup-follower.
Two distinct names are required because injection points have a single
global attachment per name; each is scoped to one PID via
INJ_CONDITION_PID so unrelated backends that traverse the same code
path run the callback and return without sleeping.

Add src/test/modules/prockill_race, a new TAP-only test module whose
SQL helpers expose the pieces the test cannot assemble from existing
APIs:

  - prockill_become_lock_group_leader() and
    prockill_become_lock_group_member(leader_pid) form a lock group
    without spawning parallel workers, so both backends stay under
    test control.

  - prockill_attach_injection_wait(point_name, target_pid) attaches
    injection_points' injection_wait callback from a throwaway
    controller session and deliberately bypasses
    injection_points_set_local().  Attaching via set_local() from the
    victim would register injection_points_cleanup as a
    before_shmem_exit hook; that cleanup fires ahead of shmem_exit
    (and therefore ahead of ProcKill itself) and would detach the
    point before the victim reaches it, turning
    INJECTION_POINT(prockill-after-lockgroup-*) into a no-op.

  - prockill_backend_in_injection(target_pid, point_name) inspects
    ProcGlobal->allProcs[]->wait_event_info directly.  By the time
    ProcKill() reaches the injection points, pgstat_beshutdown_hook
    has already cleared the target's pg_stat_activity row and
    RemoveProcFromArray has emptied the ProcArray, so the standard
    observability surfaces cannot observe the wait; the PGPROC slot
    itself is still intact until the end of ProcKill().

Move InjectionPointCondition and its enum from
src/test/modules/injection_points/injection_points.c into a new
header injection_point_condition.h so prockill_race can build the
same PID-scoped private_data payload that injection_wait consumes,
without duplicating the type.

The TAP test forms a two-backend lock group, attaches PID-scoped
injection_wait to both names from a separate controller session,
terminates the leader, polls until its PGPROC reports the leader
wait event, terminates the follower, polls until its PGPROC reports
the follower wait event, wakes the follower, then runs SELECT 1 in
a new session to force a newly-forked backend through OwnLatch() on
a procLatch that an unfixed tree has not yet disowned.  The outcome
is classified via the postmaster log ("latch already owned by PID")
and via the error text of the final SELECT 1.

The test expects a fixed server: no PANIC, both victims reach their
respective injection points, and the controller's SELECT 1 succeeds.
It fails on an unfixed tree by design, so it must land together
with the matching ProcKill fix.

Requires --enable-injection-points.
---
 src/backend/storage/lmgr/proc.c               |   7 +
 src/test/modules/Makefile                     |   2 +-
 .../injection_point_condition.h               |  25 ++
 .../injection_points/injection_points.c       |  26 +-
 src/test/modules/meson.build                  |   1 +
 src/test/modules/prockill_race/Makefile       |  47 ++++
 src/test/modules/prockill_race/meson.build    |  38 +++
 .../prockill_race/prockill_race--1.0.sql      |  27 ++
 .../modules/prockill_race/prockill_race.c     | 256 ++++++++++++++++++
 .../prockill_race/prockill_race.control       |   7 +
 .../t/001_prockill_lockgroup_injection.pl     | 243 +++++++++++++++++
 11 files changed, 654 insertions(+), 25 deletions(-)
 create mode 100644 src/test/modules/injection_points/injection_point_condition.h
 create mode 100644 src/test/modules/prockill_race/Makefile
 create mode 100644 src/test/modules/prockill_race/meson.build
 create mode 100644 src/test/modules/prockill_race/prockill_race--1.0.sql
 create mode 100644 src/test/modules/prockill_race/prockill_race.c
 create mode 100644 src/test/modules/prockill_race/prockill_race.control
 create mode 100644 src/test/modules/prockill_race/t/001_prockill_lockgroup_injection.pl

diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 1ac25068d62..c6b490af186 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -990,6 +990,13 @@ ProcKill(int code, Datum arg)
 		else if (leader != MyProc)
 			MyProc->lockGroupLeader = NULL;
 		LWLockRelease(leader_lwlock);
+		/*
+		 * Test hooks for src/test/modules/prockill_race.  Synchronize
+		 * concurrent ProcKill paths in a lock group; two names are used so
+		 * a controller can attach a PID-scoped "wait" action per name.
+		 */
+		INJECTION_POINT("prockill-after-lockgroup-leader", NULL);
+		INJECTION_POINT("prockill-after-lockgroup-follower", NULL);
 	}
 
 	/*
diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile
index 0a74ab5c86f..ba80651e009 100644
--- a/src/test/modules/Makefile
+++ b/src/test/modules/Makefile
@@ -59,7 +59,7 @@ SUBDIRS = \
 
 
 ifeq ($(enable_injection_points),yes)
-SUBDIRS += injection_points gin typcache
+SUBDIRS += injection_points gin typcache prockill_race
 else
 ALWAYS_SUBDIRS += injection_points gin typcache
 endif
diff --git a/src/test/modules/injection_points/injection_point_condition.h b/src/test/modules/injection_points/injection_point_condition.h
new file mode 100644
index 00000000000..fc926196c65
--- /dev/null
+++ b/src/test/modules/injection_points/injection_point_condition.h
@@ -0,0 +1,25 @@
+/*-------------------------------------------------------------------------
+ * injection_point_condition.h
+ *		Shared condition payload for injection_points module callbacks
+ *
+ * Copyright (c) 2025-2026, PostgreSQL Global Development Group
+ *
+ * src/test/modules/injection_points/injection_point_condition.h
+ *-------------------------------------------------------------------------
+ */
+#ifndef INJECTION_POINT_CONDITION_H
+#define INJECTION_POINT_CONDITION_H
+
+typedef enum InjectionPointConditionType
+{
+	INJ_CONDITION_ALWAYS = 0,
+	INJ_CONDITION_PID,
+} InjectionPointConditionType;
+
+typedef struct InjectionPointCondition
+{
+	InjectionPointConditionType type;
+	int			pid;
+} InjectionPointCondition;
+
+#endif							/* INJECTION_POINT_CONDITION_H */
diff --git a/src/test/modules/injection_points/injection_points.c b/src/test/modules/injection_points/injection_points.c
index 0f1af513673..fb7a7d477cc 100644
--- a/src/test/modules/injection_points/injection_points.c
+++ b/src/test/modules/injection_points/injection_points.c
@@ -34,36 +34,14 @@
 #include "utils/tuplestore.h"
 #include "utils/wait_event.h"
 
+#include "injection_point_condition.h"
+
 PG_MODULE_MAGIC;
 
 /* Maximum number of waits usable in injection points at once */
 #define INJ_MAX_WAIT	8
 #define INJ_NAME_MAXLEN	64
 
-/*
- * Conditions related to injection points.  This tracks in shared memory the
- * runtime conditions under which an injection point is allowed to run,
- * stored as private_data when an injection point is attached, and passed as
- * argument to the callback.
- *
- * If more types of runtime conditions need to be tracked, this structure
- * should be expanded.
- */
-typedef enum InjectionPointConditionType
-{
-	INJ_CONDITION_ALWAYS = 0,	/* always run */
-	INJ_CONDITION_PID,			/* PID restriction */
-} InjectionPointConditionType;
-
-typedef struct InjectionPointCondition
-{
-	/* Type of the condition */
-	InjectionPointConditionType type;
-
-	/* ID of the process where the injection point is allowed to run */
-	int			pid;
-} InjectionPointCondition;
-
 /*
  * List of injection points stored in TopMemoryContext attached
  * locally to this process.
diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build
index 4bca42bb370..9e013fd05af 100644
--- a/src/test/modules/meson.build
+++ b/src/test/modules/meson.build
@@ -13,6 +13,7 @@ subdir('libpq_pipeline')
 subdir('nbtree')
 subdir('oauth_validator')
 subdir('plsample')
+subdir('prockill_race')
 subdir('spgist_name_ops')
 subdir('ssl_passphrase_callback')
 subdir('test_aio')
diff --git a/src/test/modules/prockill_race/Makefile b/src/test/modules/prockill_race/Makefile
new file mode 100644
index 00000000000..738e3314653
--- /dev/null
+++ b/src/test/modules/prockill_race/Makefile
@@ -0,0 +1,47 @@
+# src/test/modules/prockill_race/Makefile
+#
+# TAP test: concurrent ProcKill in a lock group must not put a PGPROC on the
+# freelist while procLatch is still owned.  With the server fix, this test passes.
+
+MODULE_big = prockill_race
+OBJS = $(WIN32RES) prockill_race.o
+PGFILEDESC = "prockill_race - ProcKill lock-group race test helpers"
+
+EXTENSION = prockill_race
+DATA = prockill_race--1.0.sql
+
+TAP_TESTS = 1
+NO_INSTALLCHECK = 1
+
+# Temp-install: ensure injection_points and this module are installed.
+REGRESS_PREP = prockill_race_tap_install
+
+.PHONY: prockill_race_tap_install
+prockill_race_tap_install:
+	$(MAKE) -C '$(top_builddir)'/src/test/modules/injection_points DESTDIR='$(abs_top_builddir)'/tmp_install install
+	$(MAKE) -C '$(top_builddir)'/src/test/modules/prockill_race DESTDIR='$(abs_top_builddir)'/tmp_install install
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = src/test/modules/prockill_race
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+
+ifeq ($(enable_injection_points),yes)
+ifndef NO_TEMP_INSTALL
+prockill_race_tap_install: temp-install
+endif
+include $(top_srcdir)/contrib/contrib-global.mk
+else
+all install installdirs uninstall distprep:
+	@echo "prockill_race TAP test requires --enable-injection-points"
+check:
+	@echo "prockill_race TAP test requires --enable-injection-points"
+clean distclean maintainer-clean:
+	@:
+endif
+
+endif
diff --git a/src/test/modules/prockill_race/meson.build b/src/test/modules/prockill_race/meson.build
new file mode 100644
index 00000000000..98bee360dc1
--- /dev/null
+++ b/src/test/modules/prockill_race/meson.build
@@ -0,0 +1,38 @@
+# Copyright (c) 2025-2026, PostgreSQL Global Development Group
+
+if not get_option('injection_points')
+  subdir_done()
+endif
+
+prockill_race_sources = files('prockill_race.c')
+
+if host_system == 'windows'
+  prockill_race_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+    '--NAME', 'prockill_race',
+    '--FILEDESC', 'prockill_race - ProcKill lock-group test helpers',])
+endif
+
+prockill_race = shared_module('prockill_race',
+  prockill_race_sources,
+  kwargs: pg_test_mod_args,
+)
+test_install_libs += prockill_race
+
+test_install_data += files(
+  'prockill_race.control',
+  'prockill_race--1.0.sql',
+)
+
+tests += {
+  'name': 'prockill_race',
+  'sd': meson.current_source_dir(),
+  'bd': meson.current_build_dir(),
+  'tap': {
+    'env': {
+      'enable_injection_points': get_option('injection_points') ? 'yes' : 'no',
+    },
+    'tests': [
+      't/001_prockill_lockgroup_injection.pl',
+    ],
+  },
+}
diff --git a/src/test/modules/prockill_race/prockill_race--1.0.sql b/src/test/modules/prockill_race/prockill_race--1.0.sql
new file mode 100644
index 00000000000..b9a43e70c91
--- /dev/null
+++ b/src/test/modules/prockill_race/prockill_race--1.0.sql
@@ -0,0 +1,27 @@
+/* prockill_race--1.0.sql */
+
+\echo Use "CREATE EXTENSION prockill_race" to load this file. \quit
+
+CREATE FUNCTION prockill_become_lock_group_leader()
+RETURNS void
+AS 'MODULE_PATHNAME', 'prockill_become_lock_group_leader'
+LANGUAGE C STRICT PARALLEL UNSAFE;
+
+CREATE FUNCTION prockill_become_lock_group_member(leader_pid integer)
+RETURNS void
+AS 'MODULE_PATHNAME', 'prockill_become_lock_group_member'
+LANGUAGE C STRICT PARALLEL UNSAFE;
+
+CREATE FUNCTION prockill_attach_injection_wait(point_name text, target_pid integer)
+RETURNS void
+AS 'MODULE_PATHNAME', 'prockill_attach_injection_wait'
+LANGUAGE C STRICT PARALLEL UNSAFE;
+
+-- Test-only probe: is the backend with the given PID currently waiting on the
+-- named injection point?  Looks directly at ProcGlobal->allProcs so it keeps
+-- working while the target is blocked inside ProcKill() (after pgstat and
+-- ProcArray teardown have already run).
+CREATE FUNCTION prockill_backend_in_injection(target_pid integer, point_name text)
+RETURNS boolean
+AS 'MODULE_PATHNAME', 'prockill_backend_in_injection'
+LANGUAGE C STRICT PARALLEL UNSAFE;
diff --git a/src/test/modules/prockill_race/prockill_race.c b/src/test/modules/prockill_race/prockill_race.c
new file mode 100644
index 00000000000..260cb2c75ae
--- /dev/null
+++ b/src/test/modules/prockill_race/prockill_race.c
@@ -0,0 +1,256 @@
+/*-------------------------------------------------------------------------
+ *
+ * prockill_race.c
+ *		SQL helpers for TAP test t/001_prockill_lockgroup_injection.pl
+ *
+ * Exposes lock-group formation without parallel query so ProcKill lock-group
+ * teardown can be stress-tested with injection points.
+ *
+ * Copyright (c) 2025-2026, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		src/test/modules/prockill_race/prockill_race.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+#include "miscadmin.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "utils/builtins.h"
+#include "utils/injection_point.h"
+#include "utils/wait_event.h"
+
+#include "../injection_points/injection_point_condition.h"
+
+PG_MODULE_MAGIC;
+
+/*
+ * Read a uint32 field exactly once.  Matches the idiom used elsewhere (see
+ * src/backend/utils/adt/waitfuncs.c) for sampling PGPROC->wait_event_info
+ * without locking.
+ */
+#define UINT32_ACCESS_ONCE(var)  ((uint32) (*((volatile uint32 *)&(var))))
+
+PG_FUNCTION_INFO_V1(prockill_become_lock_group_leader);
+
+Datum
+prockill_become_lock_group_leader(PG_FUNCTION_ARGS)
+{
+	BecomeLockGroupLeader();
+	PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(prockill_become_lock_group_member);
+
+Datum
+prockill_become_lock_group_member(PG_FUNCTION_ARGS)
+{
+	int			leader_pid = PG_GETARG_INT32(0);
+	PGPROC	   *leader;
+
+	leader = BackendPidGetProc(leader_pid);
+	if (leader == NULL)
+		elog(ERROR, "backend with PID %d not found", leader_pid);
+
+	if (!BecomeLockGroupMember(leader, leader_pid))
+		elog(ERROR, "could not join lock group of backend %d", leader_pid);
+
+	PG_RETURN_VOID();
+}
+
+/**
+ * @brief Attach injection_wait() to a named INJECTION_POINT, scoped to a
+ *        single victim PID, from a controller backend.
+ *
+ * @details
+ * Registers the injection_wait callback from the sibling injection_points
+ * module against point_name, carrying an InjectionPointCondition of type
+ * INJ_CONDITION_PID and pid = target_pid as private data.  Every backend
+ * that later reaches INJECTION_POINT(point_name, NULL) runs the callback,
+ * but only the one whose MyProcPid equals target_pid actually sleeps on
+ * the point's condition variable — until injection_points_wakeup(point_name)
+ * is called from some other session.
+ *
+ * Why this helper exists (the important part):
+ *
+ * The TAP scenario needs to pause the victim backends (leader and follower)
+ * inside ProcKill() at the prockill-after-lockgroup-* sites in proc.c.  The
+ * naive approach would be for the victim itself to run
+ * injection_points_set_local() + injection_points_attach(name, 'wait')
+ * before it is terminated.  That does not work — and the failure mode is
+ * not obvious at first:
+ *
+ *   - injection_points_set_local() flips injection_point_local = true and
+ *     registers before_shmem_exit(injection_points_cleanup, 0).
+ *   - injection_points_attach() in local mode appends the attached point
+ *     name to inj_list_local so that injection_points_cleanup() knows to
+ *     detach it on process exit.
+ *   - On pg_terminate_backend() the victim goes into proc_exit().
+ *     before_shmem_exit callbacks run before shmem_exit, and shmem_exit is
+ *     what eventually calls ProcKill (registered via on_shmem_exit).  So
+ *     injection_points_cleanup() fires first and calls
+ *     InjectionPointDetach() for every locally-tracked point.
+ *   - By the time ProcKill() actually runs in the victim, the injection
+ *     point is gone; INJECTION_POINT(prockill-after-lockgroup-*) resolves
+ *     to a no-op, the victim never waits, and the TAP test has no way to
+ *     control the order in which the two victims traverse ProcKill —
+ *     defeating the whole point of the reproducer.
+ *
+ * Attaching via this function from a controller backend (a third session
+ * that is not being terminated) avoids the trap: the controller never
+ * enters ProcKill during the scenario, its own eventual shutdown is
+ * unrelated to the victims', and — crucially — we call
+ * InjectionPointAttach() directly, without going through
+ * injection_points_set_local().  No before_shmem_exit cleanup is registered
+ * on the victims, so the injection point remains live in shared memory
+ * throughout each victim's exit, including while they are executing
+ * ProcKill().  The PID condition keeps the wait surgical: only the intended
+ * victim blocks; every other backend that happens to traverse the same code
+ * path runs the callback but returns immediately.
+ *
+ * Additional properties worth noting:
+ *
+ *   - The injection_points extension must be preloaded
+ *     (shared_preload_libraries = 'injection_points') so that
+ *     injection_wait and its condition handling are resolvable from every
+ *     backend, including the victim at the moment it runs ProcKill().
+ *   - The callback metadata lives in shared memory via
+ *     InjectionPointAttach(), so it is visible to all backends regardless
+ *     of which session performed the attach.
+ *   - The action is hard-coded to "injection_wait".  This module has no
+ *     use for the "error" / "notice" actions, and the narrower API keeps
+ *     the TAP call sites short and hard to misuse.
+ *   - Detaching is the caller's responsibility (the TAP test calls
+ *     injection_points_detach(name) during cleanup).  If the maximum
+ *     number of attached points is reached, InjectionPointAttach() raises
+ *     ERROR; this function does not try to paper over that.
+ *
+ * @param[in] point_name  Name of the injection point as it appears in the
+ *                        backend source (the first argument of
+ *                        INJECTION_POINT()).  SQL text.  Must be non-NULL;
+ *                        the function is declared STRICT.
+ * @param[in] target_pid  OS PID of the victim backend that should block
+ *                        when it executes point_name.  SQL integer.  Other
+ *                        backends that reach the same point run the
+ *                        callback but return immediately, without sleeping.
+ *
+ * @return SQL void — the function is called for its side effect (registering
+ *         the callback in shared memory).  On any failure inside
+ *         InjectionPointAttach() control does not return normally; an
+ *         ERROR is raised instead.
+ *
+ * @note Must be called from a backend that is not one of the victims.
+ *       Intended for use only from TAP tests of this module.
+ */
+PG_FUNCTION_INFO_V1(prockill_attach_injection_wait);
+
+Datum
+prockill_attach_injection_wait(PG_FUNCTION_ARGS)
+{
+	char	   *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
+	int			target_pid = PG_GETARG_INT32(1);
+	InjectionPointCondition cond;
+
+	cond.type = INJ_CONDITION_PID;
+	cond.pid = target_pid;
+
+	InjectionPointAttach(name, "injection_points", "injection_wait",
+						 &cond, sizeof(cond));
+	PG_RETURN_VOID();
+}
+
+/**
+ * @brief Test-only probe: is the backend with the given PID currently
+ *        waiting on the named injection point, read directly from its
+ *        PGPROC slot?
+ *
+ * @details
+ * Scans ProcGlobal->allProcs for the slot whose proc->pid equals target_pid,
+ * atomically reads proc->wait_event_info (see UINT32_ACCESS_ONCE), resolves
+ * it to a wait-event name via pgstat_get_wait_event(), and returns whether
+ * that name equals point_name.
+ *
+ * Why this helper exists (the important part):
+ *
+ * The TAP scenario that drives this module needs to observe a leader /
+ * follower backend exactly while it is suspended at an INJECTION_POINT
+ * placed inside ProcKill() during lock-group teardown.  None of the standard
+ * observability surfaces work at that point in the exit sequence:
+ *
+ *   - pg_stat_activity returns nothing for the target because the pgstat
+ *     BackendStatusArray slot has already been cleared by
+ *     pgstat_beshutdown_hook (it sets st_procpid = 0, and
+ *     pgstat_read_current_status filters on st_procpid > 0).
+ *   - BackendPidGetProc() returns NULL because the target has already been
+ *     removed from the ProcArray by RemoveProcFromArray().
+ *
+ * Both of those cleanups are registered as on_shmem_exit callbacks and
+ * therefore run, in LIFO order, before ProcKill()'s own body — that is,
+ * before our injection point fires.  Attempts to use pg_stat_activity or
+ * BackendPidGetProc() to detect the wait therefore spin until they time
+ * out.
+ *
+ * What is still valid at the injection point is the PGPROC slot itself,
+ * because ProcKill() only zeroes proc->pid and proc->wait_event_info near
+ * the end of its body — after all INJECTION_POINT macros inside it.
+ * Scanning ProcGlobal->allProcs directly therefore yields a truthful,
+ * non-racy answer at exactly the moment we care about.
+ *
+ * The read of wait_event_info is deliberately lock-free and single-word
+ * atomic (mirroring pg_stat_get_backend_wait_event_type /
+ * pg_stat_get_backend_wait_event in waitfuncs.c).  A torn read is not
+ * possible because the field is a 4-byte aligned uint32.
+ *
+ * Limitations:
+ *
+ *   - If two PGPROC slots claim the same PID (e.g. briefly, while a slot is
+ *     being recycled) the first match wins.  This is not a concern for the
+ *     test scenario because the target PIDs are known-live and unique.
+ *   - point_name must be the name as returned by pgstat_get_wait_event()
+ *     for the PG_WAIT_INJECTIONPOINT class, which is exactly the name
+ *     passed to INJECTION_POINT() in the backend code under test.
+ *
+ * @param[in] target_pid  OS PID of the backend to inspect (SQL integer).
+ * @param[in] point_name  Injection-point / wait-event name to match
+ *                        (SQL text).  Must be non-NULL; the function is
+ *                        declared STRICT.
+ *
+ * @retval true   A PGPROC with proc->pid equal to target_pid exists and
+ *                its current wait-event name equals point_name.
+ * @retval false  Either no PGPROC has proc->pid == target_pid, or it does
+ *                exist but is not currently reporting any wait, or it is
+ *                waiting on a different event.
+ *
+ * @note Intended for use only from TAP tests of this module.  The lookup is
+ *       O(MaxBackends + NUM_AUXILIARY_PROCS) per call.
+ */
+PG_FUNCTION_INFO_V1(prockill_backend_in_injection);
+
+Datum
+prockill_backend_in_injection(PG_FUNCTION_ARGS)
+{
+	int			target_pid = PG_GETARG_INT32(0);
+	char	   *want_name = text_to_cstring(PG_GETARG_TEXT_PP(1));
+	uint32		n = ProcGlobal->allProcCount;
+
+	for (uint32 i = 0; i < n; i++)
+	{
+		PGPROC	   *proc = &ProcGlobal->allProcs[i];
+		uint32		wei;
+		const char *ev;
+
+		if (proc->pid != target_pid)
+			continue;
+
+		wei = UINT32_ACCESS_ONCE(proc->wait_event_info);
+		ev = pgstat_get_wait_event(wei);
+
+		PG_RETURN_BOOL(ev != NULL && strcmp(ev, want_name) == 0);
+	}
+
+	PG_RETURN_BOOL(false);
+}
diff --git a/src/test/modules/prockill_race/prockill_race.control b/src/test/modules/prockill_race/prockill_race.control
new file mode 100644
index 00000000000..3f8cfc8c995
--- /dev/null
+++ b/src/test/modules/prockill_race/prockill_race.control
@@ -0,0 +1,7 @@
+# prockill_race extension
+comment = 'TAP test helpers for ProcKill lock-group / injection point race'
+default_version = '1.0'
+module_pathname = '$libdir/prockill_race'
+relocatable = true
+superuser = true
+trusted = true
diff --git a/src/test/modules/prockill_race/t/001_prockill_lockgroup_injection.pl b/src/test/modules/prockill_race/t/001_prockill_lockgroup_injection.pl
new file mode 100644
index 00000000000..70808608005
--- /dev/null
+++ b/src/test/modules/prockill_race/t/001_prockill_lockgroup_injection.pl
@@ -0,0 +1,243 @@
+# Copyright (c) 2025-2026, PostgreSQL Global Development Group
+#
+# Deterministic regression for ProcKill lock-group teardown vs. shared
+# procLatch ownership (bug: PGPROC recycled while procLatch still marked owned).
+# Requires -Dinjection_points=true, TAP tests, and temp-install (setup suite).
+#
+# Injection point names: prockill-after-lockgroup-* in proc.c (INJECTION_POINT).
+#
+# The test expects a fixed server: no "latch already owned" PANIC, both
+# victims reach their respective ProcKill injection points, and the
+# controller's final SELECT 1 succeeds.  Any of those failing indicates a
+# regression of the lock-group teardown fix.
+#
+# Why eval around some calls: safe_psql dies on connection errors.  If the
+# lock-group fix regresses, the postmaster may PANIC mid-scenario and later
+# psql invocations will fail to connect; we catch those failures so we can
+# still classify the outcome via the server log and shut the node down
+# cleanly.
+
+use strict;
+use warnings FATAL => 'all';
+
+use PostgreSQL::Test::Utils qw(slurp_file);
+use Test::More;
+use PostgreSQL::Test::Cluster;
+
+use constant PANIC_RE => qr/PANIC:.*latch already owned by PID/s;
+
+##################
+# Initialization #
+##################
+if ($ENV{enable_injection_points} ne 'yes')
+{
+	plan skip_all => 'Injection points not supported by this build';
+}
+
+my $node = PostgreSQL::Test::Cluster->new('prockill_race');
+$node->init;
+$node->append_conf('postgresql.conf',
+	q{shared_preload_libraries = 'injection_points'});
+$node->start;
+
+plan skip_all => 'Extension injection_points not installed'
+  unless $node->check_extension('injection_points');
+plan skip_all => 'Extension prockill_race not installed'
+  unless $node->check_extension('prockill_race');
+
+$node->safe_psql('postgres',
+	q{CREATE EXTENSION IF NOT EXISTS injection_points; CREATE EXTENSION IF NOT EXISTS prockill_race;});
+
+my $leader = $node->background_psql('postgres');
+my $follower = $node->background_psql('postgres');
+
+#################################################
+# Part 1. Create lock group leader and a member #
+#################################################
+$leader->query_safe('SELECT prockill_become_lock_group_leader();');
+my $leader_pid = $leader->query_safe('SELECT pg_backend_pid()');
+$leader_pid =~ s/\s+//g;
+
+my $follower_pid = $follower->query_safe('SELECT pg_backend_pid()');
+$follower_pid =~ s/\s+//g;
+
+$follower->query_safe(
+	"SELECT prockill_become_lock_group_member($leader_pid);");
+
+# Attach the injection-point waits from a *separate*, one-shot controller
+# session -- never from $leader or $follower, and never from a session that
+# has run injection_points_set_local().
+#
+# Why a controller session:
+#
+#   * The ProcKill injection points must still be attached when the victim
+#     backends (leader/follower) tear themselves down.  If the attach were
+#     done via injection_points_set_local()+injection_points_attach() from
+#     the victim itself, the before_shmem_exit(injection_points_cleanup)
+#     callback registered by set_local() would fire ahead of shmem_exit
+#     (and therefore ahead of ProcKill, which is on_shmem_exit), detach
+#     the point mid-exit, and turn INJECTION_POINT(prockill-after-
+#     lockgroup-*) into a no-op.  The victim would never wait, and the
+#     race-ordering handshake this test relies on would collapse.
+#
+#   * prockill_attach_injection_wait() calls InjectionPointAttach() directly,
+#     without touching injection_points_set_local().  Running it from a
+#     non-victim backend therefore leaves no before_shmem_exit cleanup hook
+#     on the leader or follower: the injection points stay live in shared
+#     memory across the victims' entire exit path, including across
+#     ProcKill().
+#
+#   * safe_psql() spawns a fresh psql (a "throwaway controller") for this
+#     one call.  That backend exits cleanly right after attaching; its own
+#     eventual proc_exit is unrelated to the leader/follower teardown we
+#     are about to drive.  Using $leader or $follower here instead would
+#     both (a) reintroduce the set_local()/before_shmem_exit trap above
+#     and (b) tie the attach's lifetime to the session we are about to
+#     terminate.
+#
+# The PID arguments pick which victim each injection point matches:
+# INJ_CONDITION_PID in the callback's private data keys on MyProcPid, so
+# only the named leader_pid / follower_pid actually sleep; any other
+# backend that happens to hit the same INJECTION_POINT runs the callback
+# and returns immediately.
+$node->safe_psql(
+	'postgres', qq(
+	SELECT prockill_attach_injection_wait('prockill-after-lockgroup-leader', $leader_pid);
+	SELECT prockill_attach_injection_wait('prockill-after-lockgroup-follower', $follower_pid);
+));
+
+#######################################################################
+# Part 2. Terminate the locks group leader and the member concurently #
+#######################################################################
+$node->safe_psql('postgres',
+	"SELECT pg_terminate_backend($leader_pid)");
+
+my $leader_wait_ok = wait_for_injection_event(
+	$node, $leader_pid, 'prockill-after-lockgroup-leader');
+
+eval {
+	$node->safe_psql('postgres',
+		"SELECT pg_terminate_backend($follower_pid)");
+};
+
+my $follower_wait_ok = wait_for_injection_event(
+	$node, $follower_pid, 'prockill-after-lockgroup-follower');
+
+eval {
+	$node->safe_psql('postgres',
+		q{SELECT injection_points_wakeup('prockill-after-lockgroup-follower');});
+};
+
+##########################################################
+# Part 3. Looking for "latch already owned by PID" panic #
+##########################################################
+my $survived = eval {
+	$node->safe_psql('postgres', 'SELECT 1');
+	1;
+};
+my $select_err = $@;
+
+# Was the latch-recycle PANIC reached?  Either the postmaster log already
+# recorded it, or our last SELECT died with that specific error text.
+my $log = eval { slurp_file($node->logfile) } // '';
+my $panic = ($log =~ PANIC_RE)
+  || (defined $select_err
+	&& $select_err ne ''
+	&& $select_err =~ /latch already owned by PID/);
+
+my ($outcome_ok, $outcome_desc);
+if ($panic)
+{
+	$outcome_ok   = 0;
+	$outcome_desc = 'latch recycle PANIC (regression of ProcKill lock-group fix)';
+}
+elsif ($survived)
+{
+	$outcome_ok   = 1;
+	$outcome_desc =
+		'ProcKill lock-group: no latch recycle PANIC and session survived';
+}
+else
+{
+	$outcome_ok   = 0;
+	$outcome_desc =
+		'neither PANIC nor successful SELECT 1 (harness or environment)';
+}
+
+ok($outcome_ok, $outcome_desc);
+
+ok(
+	!$panic && ($leader_wait_ok && $follower_wait_ok),
+	'leader and follower reached ProcKill injection waits without latch PANIC (via PGPROC scan)'
+);
+
+eval {
+	$node->safe_psql('postgres',
+		q{SELECT injection_points_wakeup('prockill-after-lockgroup-leader');}
+	);
+};
+
+eval {
+	$node->safe_psql('postgres',
+		q{SELECT injection_points_detach('prockill-after-lockgroup-leader');});
+	$node->safe_psql('postgres',
+		q{SELECT injection_points_detach('prockill-after-lockgroup-follower');});
+};
+
+############
+# Clean up #
+############
+eval { $leader->quit; };
+eval { $follower->quit; };
+
+my $stop_fail_ok =
+	 $panic
+	|| !-e ($node->data_dir . '/postmaster.pid');
+if ($stop_fail_ok)
+{
+	$node->stop('fast', fail_ok => 1);
+}
+else
+{
+	$node->stop('fast');
+}
+
+done_testing();
+
+
+# Wait until the given backend (pid) reports the expected injection-point
+# wait event.  Uses prockill_backend_in_injection() from the prockill_race
+# extension, which reads PGPROC->wait_event_info directly via
+# ProcGlobal->allProcs and therefore keeps working while the target backend
+# is blocked inside ProcKill() -- after pgstat_beshutdown_hook and
+# RemoveProcFromArray have already torn down pg_stat_activity / BackendPidGetProc.
+#
+# Bounded loop.  We stop polling early if safe_psql dies with a signature of
+# "server is gone" (connection refused, closed mid-query, or the latch-recycle
+# PANIC itself): if the lock-group fix regresses the postmaster may crash and
+# shut down before we get here, in which case every subsequent poll would
+# just be a wasted psql fork/exec.  Classification of that outcome is left to
+# the caller (via the postmaster log / $@ on later statements).
+sub wait_for_injection_event
+{
+	my ($node, $pid, $injection_name) = @_;
+	my $n = $injection_name;
+	$n =~ s/'/''/g;
+	# 200 * 50ms ~= 10s per phase.
+	for my $i (1 .. 200)
+	{
+		my $saw = eval
+		{
+			$node->safe_psql('postgres',
+							 "SELECT prockill_backend_in_injection($pid, '$n')")
+		};
+		my $err = $@;
+		return 1 if defined $saw && $saw eq 't';
+		last
+		  if $err
+		  && $err =~
+		  /could not connect|server closed the connection|latch already owned/;
+		select undef, undef, undef, 0.05;
+	}
+	return 0;
+}
-- 
2.43.0

