From 10684fed3937d60ccd3738b712c848c7c39a41fc Mon Sep 17 00:00:00 2001
From: Vlad Lesin <vladlesin@gmail.com>
Date: Thu, 14 May 2026 16:27:27 +0300
Subject: [PATCH 3/5] Add regression test for ProcKill lock-group procLatch
 recycle race

Two backends form a lock group with new SQL helpers and are
terminated concurrently.  Two injection points placed inside
ProcKill() let the test pause the victims so that a freshly forked
backend's attempt to claim the recycled PGPROC slot can be observed:
if the leader's procLatch is still owned at the moment the slot is
published on the freelist, the new backend's OwnLatch() PANICs with
"latch already owned by PID ...".

The first injection point, "prockill-after-lockgroup", sits inside
the enclosing MyProc->lockGroupLeader != NULL branch in ProcKill --
so only backends that are part of a lock group (leader or follower)
reach it.  Both victims park there once they have entered their
lock-group teardown.  The new prockill_backend_in_injection() helper
reads PGPROC->wait_event_info from ProcGlobal->allProcs directly to
detect when a victim has parked.

The injection_wait must be attached from a controller session rather
than from a victim, and the attachment must outlive the controller's
own backend exit.  injection_points_attach() registers a
before_shmem_exit cleanup that detaches the point when its caller's
backend exits (and, if called from a victim, before ProcKill ever
runs).  The new prockill_attach_injection_wait() helper calls
InjectionPointAttach() directly, leaving no such hook.

The race is timing-dependent in practice, so a second injection
point is placed in proc.c immediately after the lock-group block,
"prockill-pre-disown-latch".  It is unconditional at the macro level
(any exiting backend traverses it), but only backends matching a
PID-scoped attach actually wait.  The new
prockill_attach_injection_wait_pid() helper attaches a wait scoped to
a specific backend's PID using
InjectionPointCondition{INJ_CONDITION_PID, pid} as private_data; like
the unconditional variant, it bypasses injection_points_attach()'s
before_shmem_exit cleanup hook.

The test attaches the unconditional first point and the PID-scoped
second point (scoped to the leader's PID), then terminates both
victims concurrently.  After both wake from
"prockill-after-lockgroup":

  - the leader reaches "prockill-pre-disown-latch" and parks
    (PID-scoped condition matches MyProcPid);

  - the follower reaches the same point but skips the wait (PID
    does not match), continues through SwitchBackToLocalLatch,
    pgstat_reset_wait_event_storage, MyProc = NULL, the DisownLatch
    of its own latch, and the consolidated freelist push --
    publishing the leader's PGPROC while the leader is still parked.

If the leader's procLatch is still owned at that moment, the next
backend fork PANICs in OwnLatch() -- which the test detects via the
server log.

Meson propagates enable_injection_points into the new TAP test's
environment (matching test_misc / test_aio / test_checksums), so it
does not need an explicit env-var prefix at run time.  The test
expects a fixed server and must land together with the matching
ProcKill fix.  Requires --enable-injection-points.
---
 src/backend/storage/lmgr/proc.c               |  17 ++
 src/test/modules/injection_points/Makefile    |   5 +-
 .../injection_points--1.0.sql                 |  41 +++++
 src/test/modules/injection_points/meson.build |   9 ++
 .../injection_points/regress_prockill.c       | 133 ++++++++++++++++
 .../t/001_prockill_lockgroup.pl               | 146 ++++++++++++++++++
 6 files changed, 350 insertions(+), 1 deletion(-)
 create mode 100644 src/test/modules/injection_points/regress_prockill.c
 create mode 100644 src/test/modules/injection_points/t/001_prockill_lockgroup.pl

diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 1ac25068d62..9ba0d4790fd 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -990,8 +990,25 @@ ProcKill(int code, Datum arg)
 		else if (leader != MyProc)
 			MyProc->lockGroupLeader = NULL;
 		LWLockRelease(leader_lwlock);
+
+		/*
+		 * Test hook for src/test/modules/injection_points.  Gated by the
+		 * enclosing lockGroupLeader != NULL branch, so only backends that are
+		 * part of a lock group (leader or follower) reach it.
+		 */
+		INJECTION_POINT("prockill-after-lockgroup", NULL);
 	}
 
+	/*
+	 * Test hook for src/test/modules/injection_points.  Unconditional: any
+	 * exiting backend reaches it.  Combined with a PID-scoped wait attach, it
+	 * lets a test stall a specific victim between the lock-group block and
+	 * the (post-block, pre-fix) DisownLatch so that the consolidated freelist
+	 * push of the leader's slot by the follower can race against the leader's
+	 * still-owned procLatch.
+	 */
+	INJECTION_POINT("prockill-pre-disown-latch", NULL);
+
 	/*
 	 * Reset MyLatch to the process local one.  This is so that signal
 	 * handlers et al can continue using the latch after the shared latch
diff --git a/src/test/modules/injection_points/Makefile b/src/test/modules/injection_points/Makefile
index c01d2fb095c..8a2bdebbbbb 100644
--- a/src/test/modules/injection_points/Makefile
+++ b/src/test/modules/injection_points/Makefile
@@ -4,7 +4,8 @@ MODULE_big = injection_points
 OBJS = \
 	$(WIN32RES) \
 	injection_points.o \
-	regress_injection.o
+	regress_injection.o \
+	regress_prockill.o
 EXTENSION = injection_points
 DATA = injection_points--1.0.sql
 PGFILEDESC = "injection_points - facility for injection points"
@@ -12,6 +13,8 @@ PGFILEDESC = "injection_points - facility for injection points"
 REGRESS = injection_points hashagg reindex_conc vacuum
 REGRESS_OPTS = --dlpath=$(top_builddir)/src/test/regress
 
+TAP_TESTS = 1
+
 ISOLATION = basic \
 	    inplace \
 	    repack \
diff --git a/src/test/modules/injection_points/injection_points--1.0.sql b/src/test/modules/injection_points/injection_points--1.0.sql
index 861c7355d4e..1169e018eb6 100644
--- a/src/test/modules/injection_points/injection_points--1.0.sql
+++ b/src/test/modules/injection_points/injection_points--1.0.sql
@@ -108,3 +108,44 @@ CREATE FUNCTION removable_cutoff(rel regclass)
 RETURNS xid8
 AS 'MODULE_PATHNAME'
 LANGUAGE C CALLED ON NULL INPUT;
+
+--
+-- regress_prockill.c functions
+--
+-- Form a lock group without parallel query so ProcKill lock-group teardown
+-- can be tested with injection points.
+--
+CREATE FUNCTION prockill_become_lock_group_leader()
+RETURNS void
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION prockill_become_lock_group_member(leader_pid int)
+RETURNS void
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+-- Attach injection_wait to point_name with no condition.  Must be called from
+-- a controller session, not from a backend that will itself fire the point
+-- (see source).
+CREATE FUNCTION prockill_attach_injection_wait(point_name text)
+RETURNS void
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+-- Attach injection_wait to point_name with INJ_CONDITION_PID scoping it to a
+-- specific backend's PID (the callback returns immediately for any other
+-- backend that fires the point).  Like the unconditional variant above, must
+-- be called from a controller session.
+CREATE FUNCTION prockill_attach_injection_wait_pid(point_name text, target_pid int)
+RETURNS void
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
+
+-- Return true if target_pid is currently waiting at the named injection point,
+-- read directly from its PGPROC slot (works inside ProcKill where
+-- pg_stat_activity and ProcArray are already torn down).
+CREATE FUNCTION prockill_backend_in_injection(target_pid int, point_name text)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT;
diff --git a/src/test/modules/injection_points/meson.build b/src/test/modules/injection_points/meson.build
index 59dba1cb023..626a1a8641c 100644
--- a/src/test/modules/injection_points/meson.build
+++ b/src/test/modules/injection_points/meson.build
@@ -7,6 +7,7 @@ endif
 injection_points_sources = files(
   'injection_points.c',
   'regress_injection.c',
+  'regress_prockill.c',
 )
 
 if host_system == 'windows'
@@ -41,6 +42,14 @@ tests += {
     # The injection points are cluster-wide, so disable installcheck
     'runningcheck': false,
   },
+  'tap': {
+    'env': {
+      'enable_injection_points': get_option('injection_points') ? 'yes' : 'no',
+    },
+    'tests': [
+      't/001_prockill_lockgroup.pl',
+    ],
+  },
   'isolation': {
     'specs': [
       'basic',
diff --git a/src/test/modules/injection_points/regress_prockill.c b/src/test/modules/injection_points/regress_prockill.c
new file mode 100644
index 00000000000..69a73adcc49
--- /dev/null
+++ b/src/test/modules/injection_points/regress_prockill.c
@@ -0,0 +1,133 @@
+/*-------------------------------------------------------------------------
+ *
+ * regress_prockill.c
+ *		SQL helpers for the ProcKill lock-group TAP test
+ *
+ * Exposes lock-group formation without parallel query and helpers that
+ * observe victim backends while they are inside ProcKill().
+ *
+ * Copyright (c) 2025-2026, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		src/test/modules/injection_points/regress_prockill.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "utils/builtins.h"
+#include "utils/injection_point.h"
+#include "utils/wait_event.h"
+
+#include "injection_point_condition.h"
+
+/* Read a uint32 field exactly once (matches waitfuncs.c idiom). */
+#define UINT32_ACCESS_ONCE(var)	((uint32) (*((volatile uint32 *) &(var))))
+
+PG_FUNCTION_INFO_V1(prockill_become_lock_group_leader);
+
+Datum
+prockill_become_lock_group_leader(PG_FUNCTION_ARGS)
+{
+	BecomeLockGroupLeader();
+	PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(prockill_become_lock_group_member);
+
+Datum
+prockill_become_lock_group_member(PG_FUNCTION_ARGS)
+{
+	int			leader_pid = PG_GETARG_INT32(0);
+	PGPROC	   *leader;
+
+	leader = BackendPidGetProc(leader_pid);
+	if (leader == NULL)
+		elog(ERROR, "backend with PID %d not found", leader_pid);
+
+	if (!BecomeLockGroupMember(leader, leader_pid))
+		elog(ERROR, "could not join lock group of backend %d", leader_pid);
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * Attach injection_wait to point_name with no condition.  The caller is
+ * responsible for ensuring the injection point fires only in the contexts of
+ * interest -- in the lock-group teardown test, the point is placed inside a
+ * MyProc->lockGroupLeader != NULL branch in ProcKill().
+ *
+ * Must be called from a controller session, not from a backend that is itself
+ * about to fire the point.  injection_points_attach() registers a
+ * before_shmem_exit(injection_points_cleanup) hook that would detach the
+ * point as soon as the controller's session ends (and, if called from a
+ * victim, before ProcKill ever runs).  Calling InjectionPointAttach()
+ * directly leaves no such hook.
+ */
+PG_FUNCTION_INFO_V1(prockill_attach_injection_wait);
+
+Datum
+prockill_attach_injection_wait(PG_FUNCTION_ARGS)
+{
+	char	   *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
+
+	InjectionPointAttach(name, "injection_points", "injection_wait", NULL, 0);
+	PG_RETURN_VOID();
+}
+
+/*
+ * Attach injection_wait to point_name with INJ_CONDITION_PID scoping it to a
+ * specific backend's PID.  See injection_point_condition.h for the layout
+ * of the private_data blob.
+ */
+PG_FUNCTION_INFO_V1(prockill_attach_injection_wait_pid);
+
+Datum
+prockill_attach_injection_wait_pid(PG_FUNCTION_ARGS)
+{
+	char	   *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
+	int			target_pid = PG_GETARG_INT32(1);
+	InjectionPointCondition cond = {INJ_CONDITION_PID, target_pid};
+
+	InjectionPointAttach(name, "injection_points", "injection_wait",
+						 &cond, sizeof(cond));
+	PG_RETURN_VOID();
+}
+
+/*
+ * Return true if the backend with target_pid is currently waiting at the
+ * named injection point, by reading PGPROC->wait_event_info directly.
+ *
+ * At the injection points in ProcKill(), the standard observability surfaces
+ * (pg_stat_activity, BackendPidGetProc) are already unavailable: both
+ * pgstat_beshutdown_hook and RemoveProcFromArray run as on_shmem_exit
+ * callbacks before ProcKill.  The PGPROC slot itself remains intact until
+ * ProcKill zeroes proc->pid near its end, so a direct allProcs scan works.
+ */
+PG_FUNCTION_INFO_V1(prockill_backend_in_injection);
+
+Datum
+prockill_backend_in_injection(PG_FUNCTION_ARGS)
+{
+	int			target_pid = PG_GETARG_INT32(0);
+	char	   *want_name = text_to_cstring(PG_GETARG_TEXT_PP(1));
+	uint32		n = ProcGlobal->allProcCount;
+
+	for (uint32 i = 0; i < n; i++)
+	{
+		PGPROC	   *proc = &ProcGlobal->allProcs[i];
+		const char *ev;
+
+		if (proc->pid != target_pid)
+			continue;
+
+		ev = pgstat_get_wait_event(UINT32_ACCESS_ONCE(proc->wait_event_info));
+		PG_RETURN_BOOL(ev != NULL && strcmp(ev, want_name) == 0);
+	}
+
+	PG_RETURN_BOOL(false);
+}
diff --git a/src/test/modules/injection_points/t/001_prockill_lockgroup.pl b/src/test/modules/injection_points/t/001_prockill_lockgroup.pl
new file mode 100644
index 00000000000..425899ac986
--- /dev/null
+++ b/src/test/modules/injection_points/t/001_prockill_lockgroup.pl
@@ -0,0 +1,146 @@
+# Copyright (c) 2025-2026, PostgreSQL Global Development Group
+#
+# Regression test for the ProcKill lock-group vs. procLatch recycle race.
+#
+# Two backends form a lock group, then are terminated concurrently.  The test
+# uses injection points placed inside ProcKill() to pause both victims there
+# and verify that a freshly forked backend can claim the recycled PGPROC slot
+# without hitting "latch already owned by PID ..." PANIC.
+#
+# Requires --enable-injection-points.
+
+use strict;
+use warnings FATAL => 'all';
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils qw(slurp_file);
+use Test::More;
+use Time::HiRes qw(usleep);
+
+use constant PANIC_RE => qr/PANIC:\s+latch already owned by PID/;
+
+if (($ENV{enable_injection_points} // '') ne 'yes')
+{
+	plan skip_all => 'Injection points not supported by this build';
+}
+
+my $node = PostgreSQL::Test::Cluster->new('prockill_race');
+$node->init;
+$node->append_conf('postgresql.conf',
+	q{shared_preload_libraries = 'injection_points'});
+$node->start;
+
+$node->safe_psql('postgres', 'CREATE EXTENSION injection_points;');
+
+# Form a two-backend lock group.
+my $leader   = $node->background_psql('postgres');
+my $follower = $node->background_psql('postgres');
+
+$leader->query_safe('SELECT prockill_become_lock_group_leader()');
+my $leader_pid   = $leader->query_safe('SELECT pg_backend_pid()');
+my $follower_pid = $follower->query_safe('SELECT pg_backend_pid()');
+$leader_pid   =~ s/\s+//g;
+$follower_pid =~ s/\s+//g;
+
+$follower->query_safe("SELECT prockill_become_lock_group_member($leader_pid)");
+
+# Attach an injection wait from a throwaway controller session, not from
+# $leader/$follower.  injection_points_attach() registers a before_shmem_exit
+# cleanup that would fire when the controller's session ends (and, if called
+# from a victim, before ProcKill ever runs).  prockill_attach_injection_wait()
+# calls InjectionPointAttach() directly, leaving no such hook.
+$node->safe_psql('postgres',
+	"SELECT prockill_attach_injection_wait('prockill-after-lockgroup')");
+
+# Second injection point, scoped to the leader's PID.  This sits between the
+# lock-group block and the post-block DisownLatch in proc.c.  After both
+# victims wake from 'prockill-after-lockgroup', the leader will stall here
+# while its procLatch is still owned, and the follower (whose PID does not
+# match the condition) will run through to the consolidated freelist push,
+# publishing the leader's PGPROC with the latch still in the owned state.
+# This is what turns the otherwise empirical race into a deterministic one
+# on branches where DisownLatch is not yet hoisted to the top of ProcKill.
+$node->safe_psql('postgres',
+	"SELECT prockill_attach_injection_wait_pid('prockill-pre-disown-latch', $leader_pid)");
+
+# Cap the default poll_query_until timeout: if the lock-group fix has
+# regressed the postmaster will PANIC mid-cleanup, and we don't want to
+# spend 180s polling against a dead server.
+local $PostgreSQL::Test::Utils::timeout_default = 5;
+
+# Kill the leader and wait for it to pause inside ProcKill.
+# prockill_backend_in_injection() reads PGPROC->wait_event_info directly
+# because pg_stat_activity and ProcArray are already torn down before ProcKill.
+$node->safe_psql('postgres', "SELECT pg_terminate_backend($leader_pid)");
+my $leader_ok = $node->poll_query_until(
+	'postgres',
+	"SELECT prockill_backend_in_injection($leader_pid, 'prockill-after-lockgroup')"
+);
+
+# Kill the follower.  Once the follower starts cleanup, one of two things
+# happens: the follower parks at the injection point (fix in place), or a
+# probe backend grabs the recycled PGPROC slot and PANICs (fix regressed).
+# On the PANIC path the postmaster crashes, so subsequent SQL probes return
+# nothing but connection errors -- the live query side has no way to signal
+# "the race fired".  Scanning the server log for PANIC_RE is the only
+# positive evidence of the regression; the dual-condition loop below exits
+# as soon as either condition is observable.
+eval {
+	$node->safe_psql('postgres',
+		"SELECT pg_terminate_backend($follower_pid)");
+};
+my $follower_ok = 0;
+my $deadline = time() + 2;
+while (time() < $deadline)
+{
+	last if slurp_file($node->logfile) =~ PANIC_RE;
+	$follower_ok = eval {
+		$node->safe_psql('postgres',
+			"SELECT prockill_backend_in_injection($follower_pid, 'prockill-after-lockgroup')")
+		  eq 't';
+	} // 0;
+	last if $follower_ok;
+	usleep(50_000);
+}
+
+# Release both victims (no-op if the postmaster has already crashed).
+eval {
+	$node->safe_psql('postgres',
+		"SELECT injection_points_wakeup('prockill-after-lockgroup')");
+};
+
+# Release the leader from the second injection point too (the follower never
+# parked there because of the PID-scoped condition).  No-op on crash.
+eval {
+	$node->safe_psql('postgres',
+		"SELECT injection_points_wakeup('prockill-pre-disown-latch')");
+};
+
+# A new backend must be able to claim the recycled PGPROC slot without PANIC.
+my $select_ok = eval { $node->safe_psql('postgres', 'SELECT 1'); 1 };
+
+# Decisive check: scan the server log for the exact PANIC the fix prevents.
+# This turns a flaky/slow failure mode into a deterministic one-line diagnosis.
+my $log_contents = slurp_file($node->logfile);
+ok($log_contents !~ PANIC_RE,
+	'no "latch already owned" PANIC in server log (regression of ProcKill lock-group fix)');
+
+ok($leader_ok && $follower_ok,
+	'leader and follower reached ProcKill injection points');
+ok($select_ok,
+	'new backend claimed recycled PGPROC without latch-recycle PANIC');
+
+eval {
+	$node->safe_psql('postgres',
+		"SELECT injection_points_detach('prockill-after-lockgroup')");
+};
+eval {
+	$node->safe_psql('postgres',
+		"SELECT injection_points_detach('prockill-pre-disown-latch')");
+};
+
+eval { $leader->quit };
+eval { $follower->quit };
+$node->stop('fast', fail_ok => 1);
+
+done_testing();
-- 
2.43.0