From 7fad50a4a2a9fc6a4c2870fde31fe266ca2da84d Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Wed, 11 Mar 2026 16:26:12 +0900 Subject: [PATCH v6 2/2] Suppress FK-based inner join removal during the trigger gap FK constraints are enforced via AFTER ROW triggers, creating a window (the "trigger gap") between when a row is physically modified and when the enforcement trigger fires. Within this window the FK invariant is locally false in the heap. It becomes user-observable through any query that runs against a snapshot captured inside the window -- for example, a SELECT inside a plpgsql function invoked from RETURNING, a query inside an AFTER ROW trigger body, or a fetch from a cursor opened inside the window. The optimization introduced in the previous commit must therefore be disabled whenever a query could be executed against a snapshot that captures gap-window state. A predicate that only inspects the current trigger queue is not sufficient. A snapshot captured during the gap can outlive the gap's closure -- for example, a cursor frozen via CopySnapshot inside the window, or a cached plan reused at execution time against a snapshot that was captured inside the window. A query executed against such a stale snapshot can still observe the inconsistent state even after triggers have fired and the queue has drained. The predicate guarding the optimization therefore has to remain positive at least as long as any snapshot in this transaction could still be referenced. RowExclusiveLock has exactly that lifetime: it is acquired during parse analysis of any DML on the rel and released only at end of transaction, which trivially exceeds the lifetime of any in-xact snapshot. The optimization is therefore disabled whenever either side of the FK is RowExclusiveLock'd by this backend. The cost is pessimism in cases where the lock outlives the actual gap window: after the DML completes within the same transaction, after ROLLBACK TO a savepoint, or when RowExclusiveLock is held for unrelated reasons (e.g., LOCK TABLE). In those cases the query falls back to executing the join normally; correctness is preserved. For cached plans, the plan cache forces replanning when a previously cached generic plan with FK-based join removal might be affected by an active trigger gap. The OIDs of the relations involved in FK join removal are propagated through PlannerGlobal and PlannedStmt so that choose_custom_plan() can perform this check efficiently. --- src/backend/optimizer/plan/analyzejoins.c | 53 +++++++++++++++++++++++ src/backend/optimizer/plan/planner.c | 2 + src/backend/utils/cache/plancache.c | 34 +++++++++++++++ src/include/nodes/pathnodes.h | 3 ++ src/include/nodes/plannodes.h | 3 ++ src/include/utils/plancache.h | 1 + src/test/regress/expected/join.out | 40 +++++++++++++++++ src/test/regress/sql/join.sql | 30 +++++++++++++ 8 files changed, 166 insertions(+) diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 7085bde6b2a..84927a41c3f 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -35,6 +35,7 @@ #include "optimizer/restrictinfo.h" #include "parser/parse_agg.h" #include "rewrite/rewriteManip.h" +#include "storage/lmgr.h" #include "utils/lsyscache.h" /* @@ -2728,6 +2729,18 @@ restart: if (!inner_join_is_removable(root, fkinfo)) continue; + /* + * Record the OIDs of both sides of the FK so that the plan cache can + * detect when a cached plan with FK-based join removal needs + * replanning due to the trigger gap becoming possible. + */ + root->glob->fkRemovedRelOids = + list_append_unique_oid(root->glob->fkRemovedRelOids, + root->simple_rte_array[fkinfo->con_relid]->relid); + root->glob->fkRemovedRelOids = + list_append_unique_oid(root->glob->fkRemovedRelOids, + root->simple_rte_array[fkinfo->ref_relid]->relid); + /* Inject IS NOT NULL clauses for nullable foreign key columns */ inject_fk_not_null_quals(root, fkinfo); @@ -2795,6 +2808,8 @@ inner_join_is_removable(PlannerInfo *root, ForeignKeyOptInfo *fkinfo) { RelOptInfo *con_rel; RelOptInfo *ref_rel; + Oid con_reloid; + Oid ref_reloid; int attroff; Relids inputrelids; Bitmapset *fk_attnums = NULL; @@ -2851,6 +2866,44 @@ inner_join_is_removable(PlannerInfo *root, ForeignKeyOptInfo *fkinfo) ref_rel->reloptkind != RELOPT_BASEREL) return false; + con_reloid = root->simple_rte_array[fkinfo->con_relid]->relid; + ref_reloid = root->simple_rte_array[fkinfo->ref_relid]->relid; + + /* + * FK constraints are enforced via AFTER ROW triggers, creating a window + * (the "trigger gap") between when a row is physically modified and when + * the FK enforcement trigger fires. Within this window the FK invariant + * is locally false in the heap. It becomes user-observable through any + * query that runs against a snapshot captured inside the window -- for + * example, a SELECT inside a plpgsql function invoked from RETURNING, a + * query inside an AFTER ROW trigger body, or a fetch from a cursor opened + * inside the window. + * + * The predicate guarding the optimization must remain positive at least + * as long as any snapshot in this transaction could still be referenced, + * because a snapshot captured during the gap can outlive the gap's + * closure -- for example, a cursor frozen via CopySnapshot inside the + * window, or a cached plan reused at execution time against a snapshot + * that was captured inside the window. Any FK-removed plan executed + * against such a stale snapshot would observe the inconsistent state and + * produce wrong results, even if the trigger queue has long since + * drained. Predicates tied to the active DML's own lifetime, such as a + * per-statement counter or AfterTriggerPendingOnRel(), go silent once the + * gap closes and would leave stale-snapshot executions unguarded. + * + * RowExclusiveLock has exactly the right lifetime: it is acquired during + * parse analysis of any DML on the rel and released only at end of + * transaction, which trivially exceeds the lifetime of any in-transaction + * snapshot. The pessimism this introduces -- skipping the optimization + * for the rest of the transaction after the DML completes, or when + * RowExclusiveLock is held for other reasons such as LOCK TABLE -- is the + * price of that lifetime guarantee. In those cases the query falls back + * to executing the join normally. + */ + if (CheckRelationOidLockedByMe(con_reloid, RowExclusiveLock, true) || + CheckRelationOidLockedByMe(ref_reloid, RowExclusiveLock, true)) + return false; + /* * If the referenced relation has any restriction clauses, they act as * explicit filters. Since we cannot perform variable substitution to diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index f4689e7c9f8..4f66747d83f 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -385,6 +385,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, glob->partPruneInfos = NIL; glob->relationOids = NIL; glob->invalItems = NIL; + glob->fkRemovedRelOids = NIL; glob->paramExecTypes = NIL; glob->lastPHId = 0; glob->lastRowMarkId = 0; @@ -693,6 +694,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, result->relationOids = glob->relationOids; result->invalItems = glob->invalItems; + result->fkRemovedRelOids = glob->fkRemovedRelOids; result->paramExecTypes = glob->paramExecTypes; /* utilityStmt should be null, but we might as well copy it */ result->utilityStmt = parse->utilityStmt; diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index 698e7c1aa22..f5350ff0302 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -1132,6 +1132,7 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist, */ plan->planRoleId = GetUserId(); plan->dependsOnRole = plansource->dependsOnRLS; + plan->hasFKJoinRemoval = false; is_transient = false; foreach(lc, plist) { @@ -1144,6 +1145,8 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist, is_transient = true; if (plannedstmt->dependsOnRole) plan->dependsOnRole = true; + if (plannedstmt->fkRemovedRelOids != NIL) + plan->hasFKJoinRemoval = true; } if (is_transient) { @@ -1180,6 +1183,37 @@ choose_custom_plan(CachedPlanSource *plansource, ParamListInfo boundParams) if (plansource->is_oneshot) return true; + /* + * If the generic plan used FK-based inner join removal, check whether any + * of the relations involved are currently being modified in this + * transaction (detected by RowExclusiveLock). If so, we must force a + * custom plan: an FK-removed plan executed against a snapshot that still + * sees the trigger-gap state would produce incorrect results. This check + * uses the same predicate as inner_join_is_removable() at planning time; + * see that function for the correctness argument. + * + * This check must come before all other early returns, because + * correctness requires replanning regardless of parameter availability, + * GUC settings, or cursor options. + * + * We use the hasFKJoinRemoval flag for a fast-path check so that plans + * without FK join removal (the common case) pay only a single boolean + * test. + */ + if (plansource->gplan != NULL && plansource->gplan->hasFKJoinRemoval) + { + foreach_node(PlannedStmt, stmt, plansource->gplan->stmt_list) + { + if (stmt->commandType == CMD_UTILITY) + continue; + foreach_oid(reloid, stmt->fkRemovedRelOids) + { + if (CheckRelationOidLockedByMe(reloid, RowExclusiveLock, true)) + return true; + } + } + } + /* Otherwise, never any point in a custom plan if there's no parameters */ if (boundParams == NULL) return false; diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index d67f68089e3..604df3bd5dd 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -229,6 +229,9 @@ typedef struct PlannerGlobal /* other dependencies, as PlanInvalItems */ List *invalItems; + /* OIDs of relations involved in FK-based inner join removal */ + List *fkRemovedRelOids; + /* type OIDs for PARAM_EXEC Params */ List *paramExecTypes; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 14a1dfed2b9..63af469a981 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -146,6 +146,9 @@ typedef struct PlannedStmt /* other dependencies, as PlanInvalItems */ List *invalItems; + /* OIDs of relations involved in FK-based inner join removal */ + List *fkRemovedRelOids; + /* type OIDs for PARAM_EXEC Params */ List *paramExecTypes; diff --git a/src/include/utils/plancache.h b/src/include/utils/plancache.h index 7a4a85c8038..8130cc3e1a3 100644 --- a/src/include/utils/plancache.h +++ b/src/include/utils/plancache.h @@ -165,6 +165,7 @@ typedef struct CachedPlan bool is_valid; /* is the stmt_list currently valid? */ Oid planRoleId; /* Role ID the plan was created for */ bool dependsOnRole; /* is plan specific to that role? */ + bool hasFKJoinRemoval; /* does plan use FK-based join removal? */ TransactionId saved_xmin; /* if valid, replan when TransactionXmin * changes from this value */ int generation; /* parent's generation number for this plan */ diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 4ff6d2323f8..bb74625abb0 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -10703,6 +10703,46 @@ FROM fk_child c JOIN fk_parent1 p ON c.p1_id = p.id; -> Seq Scan on fk_parent1 p (5 rows) +-- Trigger gap: fk_parent1 cannot be removed when RowExclusiveLock is held +-- during active DML on a FK-related table +ALTER TABLE fk_child DROP CONSTRAINT fk_child_p1_id_fkey; +ALTER TABLE fk_child ADD CONSTRAINT fk_child_p1_id_fkey + FOREIGN KEY (p1_id) REFERENCES fk_parent1(id) ON DELETE CASCADE; +CREATE FUNCTION trigger_gap_test_fn() RETURNS int LANGUAGE plpgsql AS $$ +DECLARE + join_count int; + plan_line text; +BEGIN + SELECT count(*) INTO join_count + FROM fk_child c JOIN fk_parent1 p ON c.p1_id = p.id; + RAISE NOTICE 'Join Count: %', join_count; + + RAISE NOTICE '--- EXPLAIN PLAN ---'; + FOR plan_line IN + EXPLAIN (COSTS OFF) + SELECT count(*) FROM fk_child c JOIN fk_parent1 p ON c.p1_id = p.id + LOOP + RAISE NOTICE '%', plan_line; + END LOOP; + + RETURN join_count; +END; +$$; +DELETE FROM fk_parent1 WHERE id = 1 RETURNING trigger_gap_test_fn(); +NOTICE: Join Count: 1 +NOTICE: --- EXPLAIN PLAN --- +NOTICE: Aggregate +NOTICE: -> Nested Loop +NOTICE: Join Filter: (p.id = c.p1_id) +NOTICE: -> Seq Scan on fk_child c +NOTICE: -> Materialize +NOTICE: -> Seq Scan on fk_parent1 p + trigger_gap_test_fn +--------------------- + 1 +(1 row) + +DROP FUNCTION trigger_gap_test_fn(); DROP TABLE fk_child; DROP TABLE fk_multi_parent; DROP TABLE fk_parent_text; diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index f6d79770a95..29ff65dd912 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -4176,6 +4176,36 @@ EXPLAIN (COSTS OFF) SELECT c.id, c.val FROM fk_child c JOIN fk_parent1 p ON c.p1_id = p.id; +-- Trigger gap: fk_parent1 cannot be removed when RowExclusiveLock is held +-- during active DML on a FK-related table +ALTER TABLE fk_child DROP CONSTRAINT fk_child_p1_id_fkey; +ALTER TABLE fk_child ADD CONSTRAINT fk_child_p1_id_fkey + FOREIGN KEY (p1_id) REFERENCES fk_parent1(id) ON DELETE CASCADE; + +CREATE FUNCTION trigger_gap_test_fn() RETURNS int LANGUAGE plpgsql AS $$ +DECLARE + join_count int; + plan_line text; +BEGIN + SELECT count(*) INTO join_count + FROM fk_child c JOIN fk_parent1 p ON c.p1_id = p.id; + RAISE NOTICE 'Join Count: %', join_count; + + RAISE NOTICE '--- EXPLAIN PLAN ---'; + FOR plan_line IN + EXPLAIN (COSTS OFF) + SELECT count(*) FROM fk_child c JOIN fk_parent1 p ON c.p1_id = p.id + LOOP + RAISE NOTICE '%', plan_line; + END LOOP; + + RETURN join_count; +END; +$$; + +DELETE FROM fk_parent1 WHERE id = 1 RETURNING trigger_gap_test_fn(); +DROP FUNCTION trigger_gap_test_fn(); + DROP TABLE fk_child; DROP TABLE fk_multi_parent; DROP TABLE fk_parent_text; -- 2.39.5 (Apple Git-154)