From 11c165bb56406fab2603ddfca8728bac274570b7 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Mon, 15 Jun 2026 11:24:00 +0900 Subject: [PATCH v2 3/4] Reduce outer joins to anti joins for whole-row IS NULL tests reduce_outer_joins() recognizes "WHERE b.z IS NULL" above an outer join as an anti-join condition, but not the whole-row "WHERE b IS NULL", which is a natural way to ask for an anti-join without naming a specific column. Teach it to recognize the whole-row form too. A row that the join null-extends has all of b's columns set to NULL, so it satisfies "b IS NULL". A matched row satisfies the test only if its columns happen to be all NULL. Hence proving any one column of b non-null in matching rows rules out every matched row, leaving only null-extended rows: exactly anti-join semantics. This mirrors the single-column case, and the same proofs apply: a NOT NULL table constraint, a strict join clause (for LEFT joins), or strict quals within the relation's subtree. Because any one column suffices, the whole-row test reduces in a strict superset of the cases a single-column test does. To implement this, find_forced_null_vars() now reports a whole-row Var tested with row-format IS NULL as a varattno-zero entry meaning that all of the relation's columns are forced null. Row-format tests on ordinary composite-type columns remain excluded, since such a test does not force the column null: it is also true when the column is a non-null row whose fields are all NULL. The proof functions in reduce_outer_joins() treat a whole-row entry accordingly: it is refuted by proving any one column of its relation non-null. A match on the whole-row attribute itself proves nothing, because a non-null composite datum can still have all columns NULL, so the per-column matching now explicitly excludes that attribute. --- src/backend/optimizer/prep/prepjointree.c | 85 +++++++++++- src/backend/optimizer/util/clauses.c | 19 ++- src/test/regress/expected/join.out | 156 ++++++++++++++++++++++ src/test/regress/sql/join.sql | 60 +++++++++ 4 files changed, 311 insertions(+), 9 deletions(-) diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 48631fb99c6..2d710a8546d 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -3252,6 +3252,11 @@ flatten_simple_union_all(PlannerInfo *root) * removed to prevent bogus selectivity calculations, but we leave it to * distribute_qual_to_rels to get rid of such clauses. * + * A whole-row Var works too. "WHERE b IS NULL" in row-format semantics is + * true when b's whole-row value is NULL or when every column of b is NULL; + * for a matching row only the latter is possible, so proving any one column + * of b non-null in matching rows justifies the same reduction. + * * The same recognition reduces a FULL join to an anti-semijoin when a * forced-null Var on either side is proven non-null: only the other side's * unmatched rows can survive. If that surviving side is the right-hand @@ -3803,6 +3808,9 @@ report_reduced_full_join(reduce_outer_joins_pass2_state *state2, * indicated by "state" that are known to be non-nullable due to table * constraints. * + * A whole-row Var, in any matching row, requires every column of its relation + * to be NULL, so any NOT NULL column of the relation refutes it. + * * Note that we must also consider the situation where a NOT NULL Var can be * nulled by lower-level outer joins. * @@ -3819,6 +3827,7 @@ forced_null_var_is_attnotnull(PlannerInfo *root, List *forced_null_vars, RangeTblEntry *rte; Bitmapset *notnullattnums; Bitmapset *forcednullattnums = NULL; + bool wholerow = false; int attno; varno++; @@ -3853,6 +3862,13 @@ forced_null_var_is_attnotnull(PlannerInfo *root, List *forced_null_vars, if (real_attno < 0) return true; + /* whole-row Vars are handled below, not by attnum matching */ + if (real_attno == 0) + { + wholerow = true; + continue; + } + forcednullattnums = bms_add_member(forcednullattnums, real_attno); } @@ -3879,6 +3895,17 @@ forced_null_var_is_attnotnull(PlannerInfo *root, List *forced_null_vars, /* Get the column not-null constraint information for this relation */ notnullattnums = find_relation_notnullatts(root, rte->relid); + /* + * A forced-null whole-row Var, in any matching row, requires every + * column of the relation to be NULL, so any NOT NULL column refutes + * it. + */ + if (wholerow && !bms_is_empty(notnullattnums)) + { + bms_free(forcednullattnums); + return true; + } + /* * Check if any forced-null attributes are defined as NOT NULL by * table constraints. @@ -3905,6 +3932,10 @@ forced_null_var_is_attnotnull(PlannerInfo *root, List *forced_null_vars, * the Var, or a NOT NULL table constraint (excluding Vars nullable due to * lower-level outer joins). * + * A whole-row Var in "forced_null_vars" requires, in any matching row, every + * column of its relation to be NULL, so it is refuted by proving any one of + * those columns non-null. + * * Helper for reduce_outer_joins_pass2. */ static bool @@ -3914,19 +3945,59 @@ forced_null_var_is_nonnullable(PlannerInfo *root, List *forced_null_vars, { List *all_quals; List *nonnullable_vars; - Bitmapset *overlap; + int wholerow_attno = 0 - FirstLowInvalidHeapAttributeNumber; + int varno = -1; all_quals = list_concat_copy(state->safe_quals, extra_quals); nonnullable_vars = find_nonnullable_vars((Node *) all_quals); /* - * It's not sufficient to check whether nonnullable_vars and - * forced_null_vars overlap: we need to know if the overlap includes any - * variables of this subtree. + * It's not sufficient to consider all matches between nonnullable_vars + * and forced_null_vars: a match counts only for a Var belonging to this + * subtree, and the whole-row attribute needs special treatment. */ - overlap = mbms_overlap_sets(nonnullable_vars, forced_null_vars); - if (bms_overlap(overlap, state->relids)) - return true; + foreach_node(Bitmapset, attrs, forced_null_vars) + { + Bitmapset *nonnull_attrs; + + varno++; + + /* Skip empty bitmaps */ + if (bms_is_empty(attrs)) + continue; + + /* Skip Vars that do not belong to the target relations */ + if (!bms_is_member(varno, state->relids)) + continue; + + /* Get what the quals prove non-null for this relation, if anything */ + if (varno >= list_length(nonnullable_vars)) + continue; + nonnull_attrs = list_nth_node(Bitmapset, nonnullable_vars, varno); + + /* + * A proof for the whole-row attribute refutes nothing: it shows only + * that the composite datum is non-null, and such a datum can still + * have all columns NULL. Discard it up front. + */ + nonnull_attrs = bms_del_member(nonnull_attrs, wholerow_attno); + + /* A forced-null attribute that is proven non-null settles it. */ + if (bms_overlap(attrs, nonnull_attrs)) + return true; + + /* + * So does any real column proven non-null, if the whole-row Var is + * forced null: in a matching row (whose whole-row datum is non-null) + * the row-format IS NULL test is true only when every column is NULL. + * System attributes don't count, since they are not part of the row + * value; conveniently they sort below the whole-row attribute in the + * bitmap. + */ + if (bms_is_member(wholerow_attno, attrs) && + bms_next_member(nonnull_attrs, wholerow_attno) >= 0) + return true; + } /* * Otherwise, check if any forced-null var is defined NOT NULL by table diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 07738894d1a..081f00ae814 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -1931,6 +1931,13 @@ find_nonnullable_vars_walker(Node *node, bool top_level) * * As with find_nonnullable_vars, we return the varattnos of the identified * Vars in a multibitmapset. + * + * A whole-row Var tested with a row-format IS NULL is reported too, as a + * varattno-zero entry. That test is true when the whole-row value is NULL + * or when every column of the row is NULL, so for any row that is not itself + * null the entry signifies that all of the relation's columns are forced + * null; consumers must interpret it that way rather than as an ordinary + * attribute. */ List * find_forced_null_vars(Node *node) @@ -2003,12 +2010,20 @@ find_forced_null_var(Node *node) /* check for var IS NULL */ NullTest *expr = (NullTest *) node; - if (expr->nulltesttype == IS_NULL && !expr->argisrow) + if (expr->nulltesttype == IS_NULL) { Var *var = (Var *) expr->arg; + /* + * A row-format test is accepted only on a whole-row Var, where + * its truth requires every column of the relation to be NULL. On + * an ordinary composite-type column it is rejected, because the + * test does not force that column null: it is also true when the + * column is a non-null row whose fields are all NULL. + */ if (var && IsA(var, Var) && - var->varlevelsup == 0) + var->varlevelsup == 0 && + (!expr->argisrow || var->varattno == 0)) return var; } } diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index b318cddd7c0..6100fcd4590 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3709,6 +3709,162 @@ from ma full join mb using (y) where ma.x is null order by y; 40 | | 4 (1 row) +-- A whole-row Var IS NULL is true only when every column is NULL, so it +-- works as an antijoin test whenever any column is provably non-null in +-- matching rows. +create temp table tbl_wr (b int, c int); +-- this is an antijoin: t2.a is defined NOT NULL +explain (costs off) +select * from tenk1 t1 left join tbl_anti t2 on true +where t2 is null; + QUERY PLAN +------------------------------------- + Nested Loop Anti Join + -> Seq Scan on tenk1 t1 + -> Materialize + -> Seq Scan on tbl_anti t2 +(4 rows) + +-- this is an antijoin: the strict join clause proves t2.b non-null +explain (costs off) +select * from tenk1 t1 left join tbl_wr t2 on t1.unique1 = t2.b +where t2 is null; + QUERY PLAN +---------------------------------- + Hash Right Anti Join + Hash Cond: (t2.b = t1.unique1) + -> Seq Scan on tbl_wr t2 + -> Hash + -> Seq Scan on tenk1 t1 +(5 rows) + +-- this is an antijoin: the strict join clause within the RHS subtree proves +-- t2.c non-null +explain (costs off) +select * from tenk1 t1 left join + (tbl_wr t2 join tbl_anti t3 on t2.c = t3.c) on true +where t2 is null; + QUERY PLAN +------------------------------------------------- + Nested Loop Anti Join + -> Seq Scan on tenk1 t1 + -> Materialize + -> Merge Join + Merge Cond: (t3.c = t2.c) + -> Sort + Sort Key: t3.c + -> Seq Scan on tbl_anti t3 + -> Sort + Sort Key: t2.c + -> Seq Scan on tbl_wr t2 +(11 rows) + +-- this is not an antijoin: nothing proves any column of t2 non-null +explain (costs off) +select * from tenk1 t1 left join tbl_wr t2 on true +where t2 is null; + QUERY PLAN +----------------------------------- + Nested Loop Left Join + Filter: (t2.* IS NULL) + -> Seq Scan on tenk1 t1 + -> Materialize + -> Seq Scan on tbl_wr t2 +(5 rows) + +-- nor is this: the strict record comparison proves only that the whole-row +-- datum is non-null, but record_eq treats NULL fields as equal, so a +-- matching row can still have all columns NULL and pass the row-format test +explain (costs off) +select * from tenk1 t1 left join + (tbl_wr t2 join tbl_wr t3 on t2 = t3) on true +where t2 is null; + QUERY PLAN +----------------------------------------------- + Nested Loop Left Join + Filter: (t2.* IS NULL) + -> Seq Scan on tenk1 t1 + -> Materialize + -> Merge Join + Merge Cond: (t2.* = t3.*) + -> Sort + Sort Key: t2.* + -> Seq Scan on tbl_wr t2 + -> Sort + Sort Key: t3.* + -> Seq Scan on tbl_wr t3 +(12 rows) + +-- nor is this: the join clause is strict only for t2's ctid, which is not +-- part of the row value, so a matching row can still have all columns NULL +explain (costs off) +select * from tbl_wr t1 left join tbl_wr t2 on t2.ctid = t1.ctid +where t2 is null; + QUERY PLAN +----------------------------------- + Hash Left Join + Hash Cond: (t1.ctid = t2.ctid) + Filter: (t2.* IS NULL) + -> Seq Scan on tbl_wr t1 + -> Hash + -> Seq Scan on tbl_wr t2 +(6 rows) + +-- this is not an antijoin: t3 can be nulled by the lower outer join, so its +-- NOT NULL constraint proves nothing here +explain (costs off) +select * from tenk1 t1 left join + (tbl_anti t2 left join tbl_anti t3 on t2.c = t3.c) on t1.unique1 = t2.b +where t3 is null; + QUERY PLAN +------------------------------------------- + Hash Right Join + Hash Cond: (t2.b = t1.unique1) + Filter: (t3.* IS NULL) + -> Merge Left Join + Merge Cond: (t2.c = t3.c) + -> Sort + Sort Key: t2.c + -> Seq Scan on tbl_anti t2 + -> Sort + Sort Key: t3.c + -> Seq Scan on tbl_anti t3 + -> Hash + -> Seq Scan on tenk1 t1 +(13 rows) + +-- whole-row tests work for full joins too: t2.a's NOT NULL constraint +-- reduces this one ... +explain (costs off) +select * from tbl_wr t1 full join tbl_anti t2 on t1.b = t2.b +where t2 is null; + QUERY PLAN +------------------------------------- + Hash Anti Join + Hash Cond: (t1.b = t2.b) + -> Seq Scan on tbl_wr t1 + -> Hash + -> Seq Scan on tbl_anti t2 +(5 rows) + +-- ... but nothing proves a column of t1 non-null, since the join clause +-- cannot serve as the proof here +explain (costs off) +select * from tbl_wr t1 full join tbl_anti t2 on t1.b = t2.b +where t1 is null; + QUERY PLAN +------------------------------------- + Merge Full Join + Merge Cond: (t2.b = t1.b) + Filter: (t1.* IS NULL) + -> Sort + Sort Key: t2.b + -> Seq Scan on tbl_anti t2 + -> Sort + Sort Key: t1.b + -> Seq Scan on tbl_wr t1 +(9 rows) + rollback; -- -- regression test for bogus RTE_GROUP entries diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 00933c81197..757edb5f091 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1024,6 +1024,66 @@ from ma full join mb using (y) where ma.x is null order by y; select y, ma.x as ax, mb.x as bx from ma full join mb using (y) where ma.x is null order by y; +-- A whole-row Var IS NULL is true only when every column is NULL, so it +-- works as an antijoin test whenever any column is provably non-null in +-- matching rows. +create temp table tbl_wr (b int, c int); + +-- this is an antijoin: t2.a is defined NOT NULL +explain (costs off) +select * from tenk1 t1 left join tbl_anti t2 on true +where t2 is null; + +-- this is an antijoin: the strict join clause proves t2.b non-null +explain (costs off) +select * from tenk1 t1 left join tbl_wr t2 on t1.unique1 = t2.b +where t2 is null; + +-- this is an antijoin: the strict join clause within the RHS subtree proves +-- t2.c non-null +explain (costs off) +select * from tenk1 t1 left join + (tbl_wr t2 join tbl_anti t3 on t2.c = t3.c) on true +where t2 is null; + +-- this is not an antijoin: nothing proves any column of t2 non-null +explain (costs off) +select * from tenk1 t1 left join tbl_wr t2 on true +where t2 is null; + +-- nor is this: the strict record comparison proves only that the whole-row +-- datum is non-null, but record_eq treats NULL fields as equal, so a +-- matching row can still have all columns NULL and pass the row-format test +explain (costs off) +select * from tenk1 t1 left join + (tbl_wr t2 join tbl_wr t3 on t2 = t3) on true +where t2 is null; + +-- nor is this: the join clause is strict only for t2's ctid, which is not +-- part of the row value, so a matching row can still have all columns NULL +explain (costs off) +select * from tbl_wr t1 left join tbl_wr t2 on t2.ctid = t1.ctid +where t2 is null; + +-- this is not an antijoin: t3 can be nulled by the lower outer join, so its +-- NOT NULL constraint proves nothing here +explain (costs off) +select * from tenk1 t1 left join + (tbl_anti t2 left join tbl_anti t3 on t2.c = t3.c) on t1.unique1 = t2.b +where t3 is null; + +-- whole-row tests work for full joins too: t2.a's NOT NULL constraint +-- reduces this one ... +explain (costs off) +select * from tbl_wr t1 full join tbl_anti t2 on t1.b = t2.b +where t2 is null; + +-- ... but nothing proves a column of t1 non-null, since the join clause +-- cannot serve as the proof here +explain (costs off) +select * from tbl_wr t1 full join tbl_anti t2 on t1.b = t2.b +where t1 is null; + rollback; -- -- 2.39.5 (Apple Git-154)