From 51d4335998d2bca24a3cb0a16c3655baaadf0f78 Mon Sep 17 00:00:00 2001 From: Andrey Borodin Date: Thu, 7 May 2026 11:29:09 +0500 Subject: [PATCH vAB3 1/2] Add regression test for ProcKill lock-group procLatch recycle race Two backends form a lock group via prockill_become_lock_group_leader / prockill_become_lock_group_member (a new test module), then are terminated concurrently. Injection points placed in ProcKill() right after LWLockRelease(leader_lwlock) pause both victims there, letting the test verify that a freshly forked backend can claim the recycled PGPROC slot without hitting "latch already owned by PID ..." PANIC. Standard observability (pg_stat_activity, BackendPidGetProc) is unavailable at that point in ProcKill: pgstat_beshutdown_hook and RemoveProcFromArray, both registered as on_shmem_exit callbacks, run before ProcKill. The new prockill_backend_in_injection() helper reads PGPROC->wait_event_info from ProcGlobal->allProcs directly, which remains valid until ProcKill zeroes proc->pid near its end. Injection waits must be attached from a controller session rather than from the victims themselves. injection_points_attach() from the victim would register a before_shmem_exit cleanup that detaches the point before ProcKill runs. The new prockill_attach_injection_wait() calls InjectionPointAttach() directly, leaving no such hook on the victims. The test expects a fixed server and must land together with the matching ProcKill fix. --- src/test/modules/injection_points/Makefile | 5 +- .../injection_point_condition.h | 25 ++++ .../injection_points--1.0.sql | 31 +++++ .../injection_points/injection_points.c | 26 +--- src/test/modules/injection_points/meson.build | 6 + .../injection_points/regress_prockill.c | 115 ++++++++++++++++++ .../t/001_prockill_lockgroup.pl | 99 +++++++++++++++ 7 files changed, 282 insertions(+), 25 deletions(-) create mode 100644 src/test/modules/injection_points/injection_point_condition.h create mode 100644 src/test/modules/injection_points/regress_prockill.c create mode 100644 src/test/modules/injection_points/t/001_prockill_lockgroup.pl diff --git a/src/test/modules/injection_points/Makefile b/src/test/modules/injection_points/Makefile index f057d143d1a..9b15adf7e60 100644 --- a/src/test/modules/injection_points/Makefile +++ b/src/test/modules/injection_points/Makefile @@ -4,7 +4,8 @@ MODULE_big = injection_points OBJS = \ $(WIN32RES) \ injection_points.o \ - regress_injection.o + regress_injection.o \ + regress_prockill.o EXTENSION = injection_points DATA = injection_points--1.0.sql PGFILEDESC = "injection_points - facility for injection points" @@ -12,6 +13,8 @@ PGFILEDESC = "injection_points - facility for injection points" REGRESS = injection_points hashagg reindex_conc vacuum REGRESS_OPTS = --dlpath=$(top_builddir)/src/test/regress +TAP_TESTS = 1 + ISOLATION = basic \ inplace \ repack \ diff --git a/src/test/modules/injection_points/injection_point_condition.h b/src/test/modules/injection_points/injection_point_condition.h new file mode 100644 index 00000000000..a3200673eed --- /dev/null +++ b/src/test/modules/injection_points/injection_point_condition.h @@ -0,0 +1,25 @@ +/*------------------------------------------------------------------------- + * injection_point_condition.h + * Condition payload for injection_wait callbacks + * + * Copyright (c) 2025-2026, PostgreSQL Global Development Group + * + * src/test/modules/injection_points/injection_point_condition.h + *------------------------------------------------------------------------- + */ +#ifndef INJECTION_POINT_CONDITION_H +#define INJECTION_POINT_CONDITION_H + +typedef enum InjectionPointConditionType +{ + INJ_CONDITION_ALWAYS = 0, + INJ_CONDITION_PID, +} InjectionPointConditionType; + +typedef struct InjectionPointCondition +{ + InjectionPointConditionType type; + int pid; +} InjectionPointCondition; + +#endif /* INJECTION_POINT_CONDITION_H */ diff --git a/src/test/modules/injection_points/injection_points--1.0.sql b/src/test/modules/injection_points/injection_points--1.0.sql index 861c7355d4e..fa7afa9995e 100644 --- a/src/test/modules/injection_points/injection_points--1.0.sql +++ b/src/test/modules/injection_points/injection_points--1.0.sql @@ -108,3 +108,34 @@ CREATE FUNCTION removable_cutoff(rel regclass) RETURNS xid8 AS 'MODULE_PATHNAME' LANGUAGE C CALLED ON NULL INPUT; + +-- +-- regress_prockill.c functions +-- +-- Form a lock group without parallel query so ProcKill lock-group teardown +-- can be tested with injection points. +-- +CREATE FUNCTION prockill_become_lock_group_leader() +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE FUNCTION prockill_become_lock_group_member(leader_pid int) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +-- Attach injection_wait to point_name scoped to target_pid. Must be called +-- from a controller session, not from the victim itself (see source). +CREATE FUNCTION prockill_attach_injection_wait(point_name text, target_pid int) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +-- Return true if target_pid is currently waiting at the named injection point, +-- read directly from its PGPROC slot (works inside ProcKill where +-- pg_stat_activity and ProcArray are already torn down). +CREATE FUNCTION prockill_backend_in_injection(target_pid int, point_name text) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; diff --git a/src/test/modules/injection_points/injection_points.c b/src/test/modules/injection_points/injection_points.c index 0f1af513673..fb7a7d477cc 100644 --- a/src/test/modules/injection_points/injection_points.c +++ b/src/test/modules/injection_points/injection_points.c @@ -34,36 +34,14 @@ #include "utils/tuplestore.h" #include "utils/wait_event.h" +#include "injection_point_condition.h" + PG_MODULE_MAGIC; /* Maximum number of waits usable in injection points at once */ #define INJ_MAX_WAIT 8 #define INJ_NAME_MAXLEN 64 -/* - * Conditions related to injection points. This tracks in shared memory the - * runtime conditions under which an injection point is allowed to run, - * stored as private_data when an injection point is attached, and passed as - * argument to the callback. - * - * If more types of runtime conditions need to be tracked, this structure - * should be expanded. - */ -typedef enum InjectionPointConditionType -{ - INJ_CONDITION_ALWAYS = 0, /* always run */ - INJ_CONDITION_PID, /* PID restriction */ -} InjectionPointConditionType; - -typedef struct InjectionPointCondition -{ - /* Type of the condition */ - InjectionPointConditionType type; - - /* ID of the process where the injection point is allowed to run */ - int pid; -} InjectionPointCondition; - /* * List of injection points stored in TopMemoryContext attached * locally to this process. diff --git a/src/test/modules/injection_points/meson.build b/src/test/modules/injection_points/meson.build index fb1418e2caa..4ac9e54adb8 100644 --- a/src/test/modules/injection_points/meson.build +++ b/src/test/modules/injection_points/meson.build @@ -7,6 +7,7 @@ endif injection_points_sources = files( 'injection_points.c', 'regress_injection.c', + 'regress_prockill.c', ) if host_system == 'windows' @@ -41,6 +42,11 @@ tests += { # The injection points are cluster-wide, so disable installcheck 'runningcheck': false, }, + 'tap': { + 'tests': [ + 't/001_prockill_lockgroup.pl', + ], + }, 'isolation': { 'specs': [ 'basic', diff --git a/src/test/modules/injection_points/regress_prockill.c b/src/test/modules/injection_points/regress_prockill.c new file mode 100644 index 00000000000..9720875d9e2 --- /dev/null +++ b/src/test/modules/injection_points/regress_prockill.c @@ -0,0 +1,115 @@ +/*------------------------------------------------------------------------- + * + * regress_prockill.c + * SQL helpers for the ProcKill lock-group TAP test + * + * Exposes lock-group formation without parallel query and helpers that + * observe victim backends while they are inside ProcKill(). + * + * Copyright (c) 2025-2026, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/test/modules/injection_points/regress_prockill.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "fmgr.h" +#include "storage/proc.h" +#include "storage/procarray.h" +#include "utils/builtins.h" +#include "utils/injection_point.h" +#include "utils/wait_event.h" + +#include "injection_point_condition.h" + +/* Read a uint32 field exactly once (matches waitfuncs.c idiom). */ +#define UINT32_ACCESS_ONCE(var) ((uint32) (*((volatile uint32 *) &(var)))) + +PG_FUNCTION_INFO_V1(prockill_become_lock_group_leader); + +Datum +prockill_become_lock_group_leader(PG_FUNCTION_ARGS) +{ + BecomeLockGroupLeader(); + PG_RETURN_VOID(); +} + +PG_FUNCTION_INFO_V1(prockill_become_lock_group_member); + +Datum +prockill_become_lock_group_member(PG_FUNCTION_ARGS) +{ + int leader_pid = PG_GETARG_INT32(0); + PGPROC *leader; + + leader = BackendPidGetProc(leader_pid); + if (leader == NULL) + elog(ERROR, "backend with PID %d not found", leader_pid); + + if (!BecomeLockGroupMember(leader, leader_pid)) + elog(ERROR, "could not join lock group of backend %d", leader_pid); + + PG_RETURN_VOID(); +} + +/* + * Attach injection_wait to point_name, scoped to target_pid via + * INJ_CONDITION_PID. + * + * Must be called from a controller session that is not one of the victims. + * Using injection_points_attach() from the victim itself would register a + * before_shmem_exit(injection_points_cleanup) hook that fires before ProcKill + * (which is on_shmem_exit) and would detach the point before the victim ever + * reaches it. Calling InjectionPointAttach() directly from a separate + * controller session leaves no such hook on the victims. + */ +PG_FUNCTION_INFO_V1(prockill_attach_injection_wait); + +Datum +prockill_attach_injection_wait(PG_FUNCTION_ARGS) +{ + char *name = text_to_cstring(PG_GETARG_TEXT_PP(0)); + int target_pid = PG_GETARG_INT32(1); + InjectionPointCondition cond = {INJ_CONDITION_PID, target_pid}; + + InjectionPointAttach(name, "injection_points", "injection_wait", + &cond, sizeof(cond)); + PG_RETURN_VOID(); +} + +/* + * Return true if the backend with target_pid is currently waiting at the + * named injection point, by reading PGPROC->wait_event_info directly. + * + * At the injection points in ProcKill(), the standard observability surfaces + * (pg_stat_activity, BackendPidGetProc) are already unavailable: both + * pgstat_beshutdown_hook and RemoveProcFromArray run as on_shmem_exit + * callbacks before ProcKill. The PGPROC slot itself remains intact until + * ProcKill zeroes proc->pid near its end, so a direct allProcs scan works. + */ +PG_FUNCTION_INFO_V1(prockill_backend_in_injection); + +Datum +prockill_backend_in_injection(PG_FUNCTION_ARGS) +{ + int target_pid = PG_GETARG_INT32(0); + char *want_name = text_to_cstring(PG_GETARG_TEXT_PP(1)); + uint32 n = ProcGlobal->allProcCount; + + for (uint32 i = 0; i < n; i++) + { + PGPROC *proc = &ProcGlobal->allProcs[i]; + const char *ev; + + if (proc->pid != target_pid) + continue; + + ev = pgstat_get_wait_event(UINT32_ACCESS_ONCE(proc->wait_event_info)); + PG_RETURN_BOOL(ev != NULL && strcmp(ev, want_name) == 0); + } + + PG_RETURN_BOOL(false); +} diff --git a/src/test/modules/injection_points/t/001_prockill_lockgroup.pl b/src/test/modules/injection_points/t/001_prockill_lockgroup.pl new file mode 100644 index 00000000000..a2ead24cf10 --- /dev/null +++ b/src/test/modules/injection_points/t/001_prockill_lockgroup.pl @@ -0,0 +1,99 @@ +# Copyright (c) 2025-2026, PostgreSQL Global Development Group +# +# Regression test for the ProcKill lock-group vs. procLatch recycle race. +# +# Two backends form a lock group, then are terminated concurrently. The test +# uses injection points placed inside ProcKill() to pause both victims there +# and verify that a freshly forked backend can claim the recycled PGPROC slot +# without hitting "latch already owned by PID ..." PANIC. +# +# Requires --enable-injection-points. + +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +if (($ENV{enable_injection_points} // '') ne 'yes') +{ + plan skip_all => 'Injection points not supported by this build'; +} + +my $node = PostgreSQL::Test::Cluster->new('prockill_race'); +$node->init; +$node->append_conf('postgresql.conf', + q{shared_preload_libraries = 'injection_points'}); +$node->start; + +$node->safe_psql('postgres', 'CREATE EXTENSION injection_points;'); + +# Form a two-backend lock group. +my $leader = $node->background_psql('postgres'); +my $follower = $node->background_psql('postgres'); + +$leader->query_safe('SELECT prockill_become_lock_group_leader()'); +my $leader_pid = $leader->query_safe('SELECT pg_backend_pid()'); +my $follower_pid = $follower->query_safe('SELECT pg_backend_pid()'); +$leader_pid =~ s/\s+//g; +$follower_pid =~ s/\s+//g; + +$follower->query_safe("SELECT prockill_become_lock_group_member($leader_pid)"); + +# Attach PID-scoped injection waits from a throwaway controller session, not +# from $leader/$follower. injection_points_attach() via the victim would +# register a before_shmem_exit cleanup that fires before ProcKill and detaches +# the point mid-exit. prockill_attach_injection_wait() calls +# InjectionPointAttach() directly, leaving no such hook on the victims. +$node->safe_psql('postgres', qq{ + SELECT prockill_attach_injection_wait('prockill-after-lockgroup-leader', $leader_pid); + SELECT prockill_attach_injection_wait('prockill-after-lockgroup-follower', $follower_pid); +}); + +# Kill the leader and wait for it to pause inside ProcKill. +# prockill_backend_in_injection() reads PGPROC->wait_event_info directly +# because pg_stat_activity and ProcArray are already torn down before ProcKill. +$node->safe_psql('postgres', "SELECT pg_terminate_backend($leader_pid)"); +my $leader_ok = $node->poll_query_until( + 'postgres', + "SELECT prockill_backend_in_injection($leader_pid, 'prockill-after-lockgroup-leader')" +); + +# Kill the follower and wait for it similarly. Use eval: if the fix has +# regressed, the postmaster may have already PANICed. +eval { + $node->safe_psql('postgres', + "SELECT pg_terminate_backend($follower_pid)"); +}; +my $follower_ok = $node->poll_query_until( + 'postgres', + "SELECT prockill_backend_in_injection($follower_pid, 'prockill-after-lockgroup-follower')" +); + +# Release both victims. +eval { + $node->safe_psql('postgres', + q{SELECT injection_points_wakeup('prockill-after-lockgroup-follower'); + SELECT injection_points_wakeup('prockill-after-lockgroup-leader');}); +}; + +# A new backend must be able to claim the recycled PGPROC slot without PANIC. +my $select_ok = eval { $node->safe_psql('postgres', 'SELECT 1'); 1 }; + +ok($leader_ok && $follower_ok, + 'leader and follower reached ProcKill injection points'); +ok($select_ok, + 'new backend claimed recycled PGPROC without latch-recycle PANIC'); + +eval { + $node->safe_psql('postgres', + q{SELECT injection_points_detach('prockill-after-lockgroup-leader'); + SELECT injection_points_detach('prockill-after-lockgroup-follower');}); +}; + +eval { $leader->quit }; +eval { $follower->quit }; +$node->stop('fast', fail_ok => 1); + +done_testing(); -- 2.50.1 (Apple Git-155)