From f4ac9696bb00629e1662c002c4890bb70bee5c3c Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Mon, 17 Nov 2025 10:54:17 +0900 Subject: [PATCH v3 3/3] Tighten nullingrels checks for outer joins When fixing up the targetlist and qpqual of an outer join, we must account for the effects of the outer join. Vars and PHVs appearing there are logically above the join, so they should have nullingrels equal to the input Vars/PHVs' nullingrels plus the bit added by the outer join. Determining the effects of the outer join can be tricky when the join has been commuted with another one per outer join identity 3. In this case, the Vars/PHVs in the join's targetlist and qpqual should have the same nullingrels that they would if the two joins had been done in syntactic order. Unfortunately, in setrefs.c, we don't have enough information to identify what that should be, so we have to use superset nullingrels matches instead of exact ones. However, we can tighten the check somewhat. Currently, we check whether the jointype is JOIN_INNER and use NRM_SUPERSET if it is not. We can improve this by checking whether the Join node has non-empty ojrelids and using NRM_SUPERSET only in that case. This allows us to perform exact matches in more situations. To support this, we record the outer-join relids in Join plan nodes. This information can also improve EXPLAIN (RANGE_TABLE) output by showing which outer-join relids are completed by each Join plan node. We may discover additional uses for this information in the future. --- .../expected/pg_overexplain.out | 40 ++++++++++++++++++- contrib/pg_overexplain/pg_overexplain.c | 19 +++++++++ contrib/pg_overexplain/sql/pg_overexplain.sql | 14 ++++++- src/backend/optimizer/plan/createplan.c | 39 ++++++++++++++++-- src/backend/optimizer/plan/setrefs.c | 18 ++++----- src/include/nodes/plannodes.h | 2 + 6 files changed, 116 insertions(+), 16 deletions(-) diff --git a/contrib/pg_overexplain/expected/pg_overexplain.out b/contrib/pg_overexplain/expected/pg_overexplain.out index 12ab92629ab..dfe530f6f3f 100644 --- a/contrib/pg_overexplain/expected/pg_overexplain.out +++ b/contrib/pg_overexplain/expected/pg_overexplain.out @@ -497,14 +497,15 @@ $$); (15 rows) -- Create an index, and then attempt to force a nested loop with inner index --- scan so that we can see parameter-related information. Also, let's try --- actually running the query, but try to suppress potentially variable output. +-- scan so that we can see parameter-related information. CREATE INDEX ON vegetables (id); ANALYZE vegetables; SET enable_hashjoin = false; SET enable_material = false; SET enable_mergejoin = false; SET enable_seqscan = false; +-- Let's try actually running the query, but try to suppress potentially +-- variable output. SELECT explain_filter($$ EXPLAIN (BUFFERS OFF, COSTS OFF, SUMMARY OFF, TIMING OFF, ANALYZE, DEBUG) SELECT * FROM vegetables v1, vegetables v2 WHERE v1.id = v2.id; @@ -560,6 +561,41 @@ $$); Parse Location: 0 to end (47 rows) +-- Test the RANGE_TABLE option with a case that involves an outer join. +SELECT explain_filter($$ +EXPLAIN (RANGE_TABLE, COSTS OFF) +SELECT * FROM daucus d LEFT JOIN brassica b ON d.id = b.id; +$$); + explain_filter +--------------------------------------------------------- + Nested Loop Left Join + Outer Join RTIs: 3 + -> Index Scan using daucus_id_idx on daucus d + Scan RTI: 1 + -> Index Scan using brassica_id_idx on brassica b + Index Cond: (id = d.id) + Scan RTI: 2 + RTI 1 (relation, in-from-clause): + Alias: d () + Eref: d (id, name, genus) + Relation: daucus + Relation Kind: relation + Relation Lock Mode: AccessShareLock + Permission Info Index: 1 + RTI 2 (relation, in-from-clause): + Alias: b () + Eref: b (id, name, genus) + Relation: brassica + Relation Kind: relation + Relation Lock Mode: AccessShareLock + Permission Info Index: 2 + RTI 3 (join, in-from-clause): + Eref: unnamed_join (id, name, genus, id, name, genus) + Join Type: Left + Unprunable RTIs: 1 2 +(25 rows) + +-- Restore default settings. RESET enable_hashjoin; RESET enable_material; RESET enable_mergejoin; diff --git a/contrib/pg_overexplain/pg_overexplain.c b/contrib/pg_overexplain/pg_overexplain.c index a93a4dcfed6..3890d5b94c1 100644 --- a/contrib/pg_overexplain/pg_overexplain.c +++ b/contrib/pg_overexplain/pg_overexplain.c @@ -261,6 +261,25 @@ overexplain_per_node_hook(PlanState *planstate, List *ancestors, ((Result *) plan)->relids, es); break; + case T_MergeJoin: + case T_NestLoop: + case T_HashJoin: + { + Join *join = (Join *) plan; + + /* + * 'ojrelids' is only meaningful for non-inner joins, but + * if it somehow ends up set for an inner join, print it + * anyway. + */ + if (join->jointype != JOIN_INNER || + join->ojrelids != NULL) + overexplain_bitmapset("Outer Join RTIs", + join->ojrelids, + es); + break; + } + default: break; } diff --git a/contrib/pg_overexplain/sql/pg_overexplain.sql b/contrib/pg_overexplain/sql/pg_overexplain.sql index 3f17b61a2da..80e9acda8de 100644 --- a/contrib/pg_overexplain/sql/pg_overexplain.sql +++ b/contrib/pg_overexplain/sql/pg_overexplain.sql @@ -92,18 +92,28 @@ INSERT INTO vegetables (name, genus) $$); -- Create an index, and then attempt to force a nested loop with inner index --- scan so that we can see parameter-related information. Also, let's try --- actually running the query, but try to suppress potentially variable output. +-- scan so that we can see parameter-related information. CREATE INDEX ON vegetables (id); ANALYZE vegetables; SET enable_hashjoin = false; SET enable_material = false; SET enable_mergejoin = false; SET enable_seqscan = false; + +-- Let's try actually running the query, but try to suppress potentially +-- variable output. SELECT explain_filter($$ EXPLAIN (BUFFERS OFF, COSTS OFF, SUMMARY OFF, TIMING OFF, ANALYZE, DEBUG) SELECT * FROM vegetables v1, vegetables v2 WHERE v1.id = v2.id; $$); + +-- Test the RANGE_TABLE option with a case that involves an outer join. +SELECT explain_filter($$ +EXPLAIN (RANGE_TABLE, COSTS OFF) +SELECT * FROM daucus d LEFT JOIN brassica b ON d.id = b.id; +$$); + +-- Restore default settings. RESET enable_hashjoin; RESET enable_material; RESET enable_mergejoin; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index de6a183da79..2c696ea0268 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -231,14 +231,18 @@ static BitmapOr *make_bitmap_or(List *bitmapplans); static NestLoop *make_nestloop(List *tlist, List *joinclauses, List *otherclauses, List *nestParams, Plan *lefttree, Plan *righttree, - JoinType jointype, bool inner_unique); + JoinType jointype, + Relids ojrelids, + bool inner_unique); static HashJoin *make_hashjoin(List *tlist, List *joinclauses, List *otherclauses, List *hashclauses, List *hashoperators, List *hashcollations, List *hashkeys, Plan *lefttree, Plan *righttree, - JoinType jointype, bool inner_unique); + JoinType jointype, + Relids ojrelids, + bool inner_unique); static Hash *make_hash(Plan *lefttree, List *hashkeys, Oid skewTable, @@ -252,7 +256,9 @@ static MergeJoin *make_mergejoin(List *tlist, bool *mergereversals, bool *mergenullsfirst, Plan *lefttree, Plan *righttree, - JoinType jointype, bool inner_unique, + JoinType jointype, + Relids ojrelids, + bool inner_unique, bool skip_mark_restore); static Sort *make_sort(Plan *lefttree, int numCols, AttrNumber *sortColIdx, Oid *sortOperators, @@ -4191,6 +4197,7 @@ create_nestloop_plan(PlannerInfo *root, Plan *outer_plan; Plan *inner_plan; Relids outerrelids; + Relids ojrelids; List *tlist = build_path_tlist(root, &best_path->jpath.path); List *joinrestrictclauses = best_path->jpath.joinrestrictinfo; List *joinclauses; @@ -4257,6 +4264,11 @@ create_nestloop_plan(PlannerInfo *root, replace_nestloop_params(root, (Node *) otherclauses); } + /* Identify any outer joins computed at this level */ + ojrelids = bms_difference(best_path->jpath.path.parent->relids, + bms_union(best_path->jpath.outerjoinpath->parent->relids, + best_path->jpath.innerjoinpath->parent->relids)); + /* * Identify any nestloop parameters that should be supplied by this join * node, and remove them from root->curOuterParams. @@ -4328,6 +4340,7 @@ create_nestloop_plan(PlannerInfo *root, outer_plan, inner_plan, best_path->jpath.jointype, + ojrelids, best_path->jpath.inner_unique); copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); @@ -4342,6 +4355,7 @@ create_mergejoin_plan(PlannerInfo *root, MergeJoin *join_plan; Plan *outer_plan; Plan *inner_plan; + Relids ojrelids; List *tlist = build_path_tlist(root, &best_path->jpath.path); List *joinclauses; List *otherclauses; @@ -4420,6 +4434,11 @@ create_mergejoin_plan(PlannerInfo *root, mergeclauses = get_switched_clauses(best_path->path_mergeclauses, best_path->jpath.outerjoinpath->parent->relids); + /* Identify any outer joins computed at this level */ + ojrelids = bms_difference(best_path->jpath.path.parent->relids, + bms_union(outer_path->parent->relids, + inner_path->parent->relids)); + /* * Create explicit sort nodes for the outer and inner paths if necessary. */ @@ -4680,6 +4699,7 @@ create_mergejoin_plan(PlannerInfo *root, outer_plan, inner_plan, best_path->jpath.jointype, + ojrelids, best_path->jpath.inner_unique, best_path->skip_mark_restore); @@ -4697,6 +4717,7 @@ create_hashjoin_plan(PlannerInfo *root, Hash *hash_plan; Plan *outer_plan; Plan *inner_plan; + Relids ojrelids; List *tlist = build_path_tlist(root, &best_path->jpath.path); List *joinclauses; List *otherclauses; @@ -4845,6 +4866,11 @@ create_hashjoin_plan(PlannerInfo *root, hash_plan->rows_total = best_path->inner_rows_total; } + /* Identify any outer joins computed at this level */ + ojrelids = bms_difference(best_path->jpath.path.parent->relids, + bms_union(best_path->jpath.outerjoinpath->parent->relids, + best_path->jpath.innerjoinpath->parent->relids)); + join_plan = make_hashjoin(tlist, joinclauses, otherclauses, @@ -4855,6 +4881,7 @@ create_hashjoin_plan(PlannerInfo *root, outer_plan, (Plan *) hash_plan, best_path->jpath.jointype, + ojrelids, best_path->jpath.inner_unique); copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); @@ -5933,6 +5960,7 @@ make_nestloop(List *tlist, Plan *lefttree, Plan *righttree, JoinType jointype, + Relids ojrelids, bool inner_unique) { NestLoop *node = makeNode(NestLoop); @@ -5945,6 +5973,7 @@ make_nestloop(List *tlist, node->join.jointype = jointype; node->join.inner_unique = inner_unique; node->join.joinqual = joinclauses; + node->join.ojrelids = ojrelids; node->nestParams = nestParams; return node; @@ -5961,6 +5990,7 @@ make_hashjoin(List *tlist, Plan *lefttree, Plan *righttree, JoinType jointype, + Relids ojrelids, bool inner_unique) { HashJoin *node = makeNode(HashJoin); @@ -5977,6 +6007,7 @@ make_hashjoin(List *tlist, node->join.jointype = jointype; node->join.inner_unique = inner_unique; node->join.joinqual = joinclauses; + node->join.ojrelids = ojrelids; return node; } @@ -6016,6 +6047,7 @@ make_mergejoin(List *tlist, Plan *lefttree, Plan *righttree, JoinType jointype, + Relids ojrelids, bool inner_unique, bool skip_mark_restore) { @@ -6035,6 +6067,7 @@ make_mergejoin(List *tlist, node->join.jointype = jointype; node->join.inner_unique = inner_unique; node->join.joinqual = joinclauses; + node->join.ojrelids = ojrelids; return node; } diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index c95aaa6d858..fa65a6435de 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -2489,13 +2489,13 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset) /* * Now we need to fix up the targetlist and qpqual, which are logically - * above the join. This means that, if it's not an inner join, any Vars - * and PHVs appearing here should have nullingrels that include the - * effects of the outer join, ie they will have nullingrels equal to the - * input Vars' nullingrels plus the bit added by the outer join. We don't - * currently have enough info available here to identify what that should - * be, so we just tell fix_join_expr to accept superset nullingrels - * matches instead of exact ones. + * above the join. This means that, if it's an outer join with non-empty + * ojrelids, any Vars and PHVs appearing here should have nullingrels that + * include the effects of the outer join, ie they will have nullingrels + * equal to the input Vars' nullingrels plus the bit added by the outer + * join. We don't currently have enough info available here to identify + * what that should be, so we just tell fix_join_expr to accept superset + * nullingrels matches instead of exact ones. */ join->plan.targetlist = fix_join_expr(root, join->plan.targetlist, @@ -2503,7 +2503,7 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset) inner_itlist, (Index) 0, rtoffset, - (join->jointype == JOIN_INNER ? NRM_EQUAL : NRM_SUPERSET), + (bms_is_empty(join->ojrelids) ? NRM_EQUAL : NRM_SUPERSET), NUM_EXEC_TLIST((Plan *) join)); join->plan.qual = fix_join_expr(root, join->plan.qual, @@ -2511,7 +2511,7 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset) inner_itlist, (Index) 0, rtoffset, - (join->jointype == JOIN_INNER ? NRM_EQUAL : NRM_SUPERSET), + (bms_is_empty(join->ojrelids) ? NRM_EQUAL : NRM_SUPERSET), NUM_EXEC_QUAL((Plan *) join)); pfree(outer_itlist); diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 14a1dfed2b9..dbafa71a616 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -966,6 +966,7 @@ typedef struct CustomScan * inner_unique each outer tuple can match to no more than one inner tuple * joinqual: qual conditions that came from JOIN/ON or JOIN/USING * (plan.qual contains conditions that came from WHERE) + * ojrelids: outer joins completed at this level * * When jointype is INNER, joinqual and plan.qual are semantically * interchangeable. For OUTER jointypes, the two are *not* interchangeable; @@ -990,6 +991,7 @@ typedef struct Join bool inner_unique; /* JOIN quals (in addition to plan.qual) */ List *joinqual; + Bitmapset *ojrelids; } Join; /* ---------------- -- 2.39.5 (Apple Git-154)