From 6d2aeaffa0150779ab92278fbe6752b23b9288ce Mon Sep 17 00:00:00 2001 From: Josh Curtis Date: Thu, 11 Jun 2026 17:06:37 -0700 Subject: [PATCH v4] Fix race condition when reading `PredXact->SxactGlobalXmin` `DropAllPredicateLocksFromTable`, `PredicateLockPageSplit`, and `CheckTableForSerializableConflictIn` all assume that `!TransactionIdIsValid(PredXact->SxactGlobalXmin)` implies there are no active serializable transactions. This assumption is not true due to a race with `SetNewSxactGlobalXmin`, which first sets `PredXact->SxactGlobalXmin` to `InvalidTransactionId` then iterates over the active serializable transactions to find the new xmin. If `SetNewSxactGlobalXmin` is interrupted before setting a new xmin and other processes check `!TransactionIdIsValid(PredXact->SxactGlobalXmin)` without taking a lock during this time, they will incorrectly assume it is safe to proceed as if there are no concurrent serializable transactions. Author: Josh Curtis Reviewed-by: Mihail Nikalayeu Discussion: https://postgr.es/m/flat/CAEkjoh2X8yL9TKrQKx6SL6gtMNuZsyH8K-%2BrE78EwdOv4oXjSA%40mail.gmail.com --- src/backend/storage/lmgr/predicate.c | 37 +++-- src/test/modules/injection_points/Makefile | 1 + .../expected/predicate-lock-page-split.out | 108 ++++++++++++++ src/test/modules/injection_points/meson.build | 1 + .../specs/predicate-lock-page-split.spec | 133 ++++++++++++++++++ 5 files changed, 267 insertions(+), 13 deletions(-) create mode 100644 src/test/modules/injection_points/expected/predicate-lock-page-split.out create mode 100644 src/test/modules/injection_points/specs/predicate-lock-page-split.spec diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index 0ae85b7d5b4..b2509ac0765 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -210,6 +210,7 @@ #include "storage/shmem.h" #include "storage/subsystems.h" #include "utils/guc_hooks.h" +#include "utils/injection_point.h" #include "utils/rel.h" #include "utils/snapmgr.h" #include "utils/wait_event.h" @@ -2878,12 +2879,14 @@ DropAllPredicateLocksFromTable(Relation relation, bool transfer) /* * Bail out quickly if there are no serializable transactions running. - * It's safe to check this without taking locks because the caller is - * holding an ACCESS EXCLUSIVE lock on the relation. No new locks which - * would matter here can be acquired while that is held. */ + LWLockAcquire(SerializableXactHashLock, LW_SHARED); if (!TransactionIdIsValid(PredXact->SxactGlobalXmin)) + { + LWLockRelease(SerializableXactHashLock); return; + } + LWLockRelease(SerializableXactHashLock); if (!PredicateLockingNeededForRelation(relation)) return; @@ -3079,16 +3082,15 @@ PredicateLockPageSplit(Relation relation, BlockNumber oldblkno, /* * Bail out quickly if there are no serializable transactions running. - * - * It's safe to do this check without taking any additional locks. Even if - * a serializable transaction starts concurrently, we know it can't take - * any SIREAD locks on the page being split because the caller is holding - * the associated buffer page lock. Memory reordering isn't an issue; the - * memory barrier in the LWLock acquisition guarantees that this read - * occurs while the buffer page lock is held. */ + INJECTION_POINT("predicate-lock-page-split", NULL); + LWLockAcquire(SerializableXactHashLock, LW_SHARED); if (!TransactionIdIsValid(PredXact->SxactGlobalXmin)) + { + LWLockRelease(SerializableXactHashLock); return; + } + LWLockRelease(SerializableXactHashLock); if (!PredicateLockingNeededForRelation(relation)) return; @@ -3186,6 +3188,10 @@ SetNewSxactGlobalXmin(void) PredXact->SxactGlobalXmin = InvalidTransactionId; PredXact->SxactGlobalXminCount = 0; +#ifdef USE_INJECTION_POINTS + INJECTION_POINT_CACHED("predicate-set-sxact-global-xmin-invalid", NULL); +#endif + dlist_foreach(iter, &PredXact->activeList) { SERIALIZABLEXACT *sxact = @@ -3299,6 +3305,9 @@ ReleasePredicateLocks(bool isCommit, bool isReadOnlySafe) Assert(LocalPredicateLockHash == NULL); return; } +#ifdef USE_INJECTION_POINTS + INJECTION_POINT_LOAD("predicate-set-sxact-global-xmin-invalid"); +#endif LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE); @@ -4355,12 +4364,14 @@ CheckTableForSerializableConflictIn(Relation relation) /* * Bail out quickly if there are no serializable transactions running. - * It's safe to check this without taking locks because the caller is - * holding an ACCESS EXCLUSIVE lock on the relation. No new locks which - * would matter here can be acquired while that is held. */ + LWLockAcquire(SerializableXactHashLock, LW_SHARED); if (!TransactionIdIsValid(PredXact->SxactGlobalXmin)) + { + LWLockRelease(SerializableXactHashLock); return; + } + LWLockRelease(SerializableXactHashLock); if (!SerializationNeededForWrite(relation)) return; diff --git a/src/test/modules/injection_points/Makefile b/src/test/modules/injection_points/Makefile index c01d2fb095c..da0ead2b727 100644 --- a/src/test/modules/injection_points/Makefile +++ b/src/test/modules/injection_points/Makefile @@ -14,6 +14,7 @@ REGRESS_OPTS = --dlpath=$(top_builddir)/src/test/regress ISOLATION = basic \ inplace \ + predicate-lock-page-split \ repack \ repack_temporal \ repack_temporal_multirange \ diff --git a/src/test/modules/injection_points/expected/predicate-lock-page-split.out b/src/test/modules/injection_points/expected/predicate-lock-page-split.out new file mode 100644 index 00000000000..8f5613c9d8e --- /dev/null +++ b/src/test/modules/injection_points/expected/predicate-lock-page-split.out @@ -0,0 +1,108 @@ +Parsed test spec with 5 sessions + +starting permutation: s1_begin bump_xmin s2_begin s3_begin s1_insert s2_insert_wait_at_page_split s1_commit_wait_in_SetNewSxactGlobalXmin wakeup_s2_then_s1 s3_insert s3_commit s2_commit verify +injection_points_attach +----------------------- + +(1 row) + +injection_points_attach +----------------------- + +(1 row) + +step s1_begin: + BEGIN ISOLATION LEVEL SERIALIZABLE; + SELECT max(id) > 0 AS locked FROM test_table; + +locked +------ +t +(1 row) + +step bump_xmin: + DO $$ + BEGIN + PERFORM pg_current_xact_id(); + END; + $$; + +step s2_begin: + BEGIN ISOLATION LEVEL SERIALIZABLE; + SELECT max(id) > 0 AS locked FROM test_table; + +locked +------ +t +(1 row) + +step s3_begin: + BEGIN ISOLATION LEVEL SERIALIZABLE; + SELECT max(id) > 0 AS locked FROM test_table; + +locked +------ +t +(1 row) + +step s1_insert: + INSERT INTO test_table + SELECT max(id) + 1 + FROM test_table; + +step s2_insert_wait_at_page_split: + DO $$ + DECLARE + next_id int := 0; + base_size bigint := pg_relation_size('test_table_id_idx'); + BEGIN + LOOP + INSERT INTO test_table VALUES (next_id); + EXIT WHEN pg_relation_size('test_table_id_idx') > base_size; + next_id := next_id - 1; + END LOOP; + END; + $$; + +step s1_commit_wait_in_SetNewSxactGlobalXmin: + COMMIT; + +step wakeup_s2_then_s1: + SELECT injection_points_wakeup('predicate-lock-page-split'); + SELECT injection_points_wakeup('predicate-set-sxact-global-xmin-invalid'); + +step s2_insert_wait_at_page_split: <... completed> +step s1_commit_wait_in_SetNewSxactGlobalXmin: <... completed> +step s3_insert: + INSERT INTO test_table + SELECT max(id) + 1 + FROM test_table; + +ERROR: could not serialize access due to read/write dependencies among transactions +step wakeup_s2_then_s1: <... completed> +injection_points_wakeup +----------------------- + +(1 row) + +injection_points_wakeup +----------------------- + +(1 row) + +step s3_commit: + COMMIT; + +step s2_commit: + COMMIT; + +ERROR: could not serialize access due to read/write dependencies among transactions +step verify: + SELECT id, count(*) FROM test_table GROUP BY id ORDER BY id; + +id|count +--+----- + 1| 1 + 2| 1 +(2 rows) + diff --git a/src/test/modules/injection_points/meson.build b/src/test/modules/injection_points/meson.build index 59dba1cb023..94c03283335 100644 --- a/src/test/modules/injection_points/meson.build +++ b/src/test/modules/injection_points/meson.build @@ -45,6 +45,7 @@ tests += { 'specs': [ 'basic', 'inplace', + 'predicate-lock-page-split', 'repack', 'repack_temporal', 'repack_temporal_multirange', diff --git a/src/test/modules/injection_points/specs/predicate-lock-page-split.spec b/src/test/modules/injection_points/specs/predicate-lock-page-split.spec new file mode 100644 index 00000000000..fd905a37b07 --- /dev/null +++ b/src/test/modules/injection_points/specs/predicate-lock-page-split.spec @@ -0,0 +1,133 @@ +# Test for race condition in PredicateLockPageSplit +# +# When SetNewSxactGlobalXmin() temporarily sets SxactGlobalXmin to +# InvalidTransactionId, a concurrent PredicateLockPageSplit() can see +# the invalid value and skip transferring SIREAD locks to the new page. +# This lets a third transaction insert onto the new page without SSI +# detecting the rw-conflict: s1 and s3 both compute max(id) + 1 from +# the same snapshot and insert the same id twice, which no serial +# ordering of the transactions could produce. +# +setup +{ + CREATE EXTENSION IF NOT EXISTS injection_points; + DROP TABLE IF EXISTS test_table; + + CREATE TABLE test_table (id int); + CREATE INDEX test_table_id_idx ON test_table USING btree (id); + INSERT INTO test_table VALUES (1); +} + +teardown +{ + DROP TABLE IF EXISTS test_table; + DROP EXTENSION IF EXISTS injection_points; +} + +session s0 +step bump_xmin { + DO $$ + BEGIN + PERFORM pg_current_xact_id(); + END; + $$; +} +step verify { + SELECT id, count(*) FROM test_table GROUP BY id ORDER BY id; +} + +session s1 +setup { + SELECT injection_points_set_local(); + SELECT injection_points_attach('predicate-set-sxact-global-xmin-invalid', 'wait'); +} +step s1_begin { + BEGIN ISOLATION LEVEL SERIALIZABLE; + SELECT max(id) > 0 AS locked FROM test_table; +} +step s1_insert { + INSERT INTO test_table + SELECT max(id) + 1 + FROM test_table; +} +step s1_commit_wait_in_SetNewSxactGlobalXmin { + COMMIT; +} + +session s2 +setup { + SELECT injection_points_set_local(); + SELECT injection_points_attach('predicate-lock-page-split', 'wait'); +} +step s2_begin { + BEGIN ISOLATION LEVEL SERIALIZABLE; + SELECT max(id) > 0 AS locked FROM test_table; +} +step s2_insert_wait_at_page_split { + DO $$ + DECLARE + next_id int := 0; + base_size bigint := pg_relation_size('test_table_id_idx'); + BEGIN + LOOP + INSERT INTO test_table VALUES (next_id); + EXIT WHEN pg_relation_size('test_table_id_idx') > base_size; + next_id := next_id - 1; + END LOOP; + END; + $$; +} +step s2_commit { + COMMIT; +} + +session s3 +step s3_begin { + BEGIN ISOLATION LEVEL SERIALIZABLE; + SELECT max(id) > 0 AS locked FROM test_table; +} +step s3_insert { + INSERT INTO test_table + SELECT max(id) + 1 + FROM test_table; +} +step s3_commit { + COMMIT; +} + +session s4 +step wakeup_s2_then_s1 { + SELECT injection_points_wakeup('predicate-lock-page-split'); + SELECT injection_points_wakeup('predicate-set-sxact-global-xmin-invalid'); +} + +# s1_begin: s1 reads from the table, establishing SIREAD locks on the index +# bump_xmin: advance xmin so s2/s3 get a higher xmin than s1 +# s2_begin, s3_begin: s2 and s3 read from the table (same snapshot as s1) +# +# s1_insert: s1 inserts max(id)+1 = 2 +# s2_insert_wait_at_page_split: s2 inserts descending values until a real +# btree page split happens, then waits in PredicateLockPageSplit before +# checking SxactGlobalXmin. The values must be descending so that the +# split moves ids 1 and 2 to the new page +# s1_commit_wait_in_SetNewSxactGlobalXmin: after s2 is already waiting, +# s1 commits and waits after SetNewSxactGlobalXmin sets SxactGlobalXmin +# to InvalidTransactionId +# wakeup_s2_then_s1: wake s2 (sees InvalidTransactionId, skips SIREAD +# lock transfer), then wake s1 +# s3_insert: s3 inserts max(id)+1 = 2, computed from its snapshot +# s3_commit: s3 commits (should have been aborted by SSI) +# s2_commit: s2 aborts due to serialization failure +permutation + s1_begin + bump_xmin + s2_begin + s3_begin + s1_insert + s2_insert_wait_at_page_split + s1_commit_wait_in_SetNewSxactGlobalXmin + wakeup_s2_then_s1(s2_insert_wait_at_page_split,s1_commit_wait_in_SetNewSxactGlobalXmin) + s3_insert + s3_commit + s2_commit + verify -- 2.52.0