From 3a42bf4c2fedd847d1a597937f8a4118bc2074ff Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Mon, 15 Jun 2026 16:29:06 +0900 Subject: [PATCH v2 4/4] Relax strictness detection for row-format IS NOT NULL tests find_nonnullable_rels() and find_nonnullable_vars() declined to look through a NullTest with argisrow set, treating row-format IS NOT NULL as proving nothing. But such a test returns FALSE, not TRUE, both when the composite datum is NULL and when any of its fields is NULL, so its truth implies a non-null input just as the plain test does. That is the only property strictness detection relies on, so the argisrow restriction can simply be dropped. This lets "a LEFT JOIN b ... WHERE b IS NOT NULL" reduce to an inner join, the IS NOT NULL counterpart of the whole-row anti-join reduction in the preceding commit. make_outerjoininfo() likewise picks up such tests when it computes join strictness for outer-join ordering. Row-format tests on composite-type columns now also prove those columns non-null, which can feed the anti-join proofs. The stronger implication of a row-format test, that every field of the row is non-null, remains unexploited: a whole-row Var reported by find_nonnullable_vars() promises only a non-null datum, since the same entry can arise from contexts that are merely strict at the datum level, such as record comparisons. --- src/backend/optimizer/util/clauses.c | 24 ++++++-- src/test/regress/expected/join.out | 90 ++++++++++++++++++++++++++++ src/test/regress/sql/join.sql | 39 ++++++++++++ 3 files changed, 149 insertions(+), 4 deletions(-) diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 081f00ae814..9fc24d39442 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -1627,10 +1627,16 @@ find_nonnullable_rels_walker(Node *node, bool top_level) } else if (IsA(node, NullTest)) { - /* IS NOT NULL can be considered strict, but only at top level */ + /* + * IS NOT NULL can be considered strict, but only at top level. This + * holds for a row-format test too: it returns FALSE, not TRUE, both + * when the composite datum is NULL and when any of its fields is + * NULL, so its truth implies a non-null input just as the plain test + * does. + */ NullTest *expr = (NullTest *) node; - if (top_level && expr->nulltesttype == IS_NOT_NULL && !expr->argisrow) + if (top_level && expr->nulltesttype == IS_NOT_NULL) result = find_nonnullable_rels_walker((Node *) expr->arg, false); } else if (IsA(node, BooleanTest)) @@ -1885,10 +1891,20 @@ find_nonnullable_vars_walker(Node *node, bool top_level) } else if (IsA(node, NullTest)) { - /* IS NOT NULL can be considered strict, but only at top level */ + /* + * IS NOT NULL can be considered strict, but only at top level. This + * holds for a row-format test too: it returns FALSE, not TRUE, both + * when the composite datum is NULL and when any of its fields is + * NULL, so its truth implies a non-null input just as the plain test + * does. (It also implies that all the fields are non-null, but we + * have no way to represent that stronger fact here: a whole-row Var + * reported by this function promises only a non-null datum, since the + * entry can also arise from contexts that are merely strict at the + * datum level, such as record comparisons.) + */ NullTest *expr = (NullTest *) node; - if (top_level && expr->nulltesttype == IS_NOT_NULL && !expr->argisrow) + if (top_level && expr->nulltesttype == IS_NOT_NULL) result = find_nonnullable_vars_walker((Node *) expr->arg, false); } else if (IsA(node, BooleanTest)) diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 6100fcd4590..20cfc9552cd 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3865,6 +3865,96 @@ where t1 is null; -> Seq Scan on tbl_wr t1 (9 rows) +-- The IS NOT NULL counterpart: a row-format IS NOT NULL test is false both +-- for a null-extended row and when any column is NULL, so it reduces join +-- strength like any strict qual. +-- reduced to an inner join +explain (costs off) +select * from tenk1 t1 left join tbl_wr t2 on t1.unique1 = t2.b +where t2 is not null; + QUERY PLAN +------------------------------------ + Hash Join + Hash Cond: (t2.b = t1.unique1) + -> Seq Scan on tbl_wr t2 + Filter: (t2.* IS NOT NULL) + -> Hash + -> Seq Scan on tenk1 t1 +(6 rows) + +-- reduced to a left join +explain (costs off) +select * from tbl_wr t1 full join tbl_wr t2 on t1.b = t2.b +where t1 is not null; + QUERY PLAN +------------------------------------------ + Merge Left Join + Merge Cond: (t1.b = t2.b) + -> Sort + Sort Key: t1.b + -> Seq Scan on tbl_wr t1 + Filter: (t1.* IS NOT NULL) + -> Sort + Sort Key: t2.b + -> Seq Scan on tbl_wr t2 +(9 rows) + +-- composite-type columns: reduced to an inner join +create temp table tbl_comp (a int, w tbl_wr); +explain (costs off) +select * from tenk1 t1 left join tbl_comp t2 on t1.unique1 = t2.a +where t2.w is not null; + QUERY PLAN +---------------------------------- + Hash Join + Hash Cond: (t2.a = t1.unique1) + -> Seq Scan on tbl_comp t2 + Filter: (w IS NOT NULL) + -> Hash + -> Seq Scan on tenk1 t1 +(6 rows) + +-- composite-type columns: reduced to an antijoin +explain (costs off) +select * from tenk1 t1 left join tbl_comp t2 on t2.w is not null +where t2 is null; + QUERY PLAN +--------------------------------------- + Nested Loop Anti Join + -> Seq Scan on tenk1 t1 + -> Materialize + -> Seq Scan on tbl_comp t2 + Filter: (w IS NOT NULL) +(5 rows) + +-- For a zero-field row, IS NULL and IS NOT NULL are both true, so the join +-- clause here proves only that t2's datum is non-null, which must not refute +-- the whole-row IS NULL above: matched rows pass both tests. +create temp table tbl_zero (); +insert into tbl_zero default values; +-- should not be reduced +explain (costs off) +select * from (values (1), (2)) v(x) left join tbl_zero t2 on t2 is not null +where t2 is null; + QUERY PLAN +------------------------------------------ + Nested Loop Left Join + Filter: (t2.* IS NULL) + -> Values Scan on "*VALUES*" + -> Materialize + -> Seq Scan on tbl_zero t2 + Filter: (t2.* IS NOT NULL) +(6 rows) + +-- should return 2 rows +select * from (values (1), (2)) v(x) left join tbl_zero t2 on t2 is not null +where t2 is null; + x +--- + 1 + 2 +(2 rows) + rollback; -- -- regression test for bogus RTE_GROUP entries diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 757edb5f091..18e59c2a79b 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1084,6 +1084,45 @@ explain (costs off) select * from tbl_wr t1 full join tbl_anti t2 on t1.b = t2.b where t1 is null; +-- The IS NOT NULL counterpart: a row-format IS NOT NULL test is false both +-- for a null-extended row and when any column is NULL, so it reduces join +-- strength like any strict qual. + +-- reduced to an inner join +explain (costs off) +select * from tenk1 t1 left join tbl_wr t2 on t1.unique1 = t2.b +where t2 is not null; + +-- reduced to a left join +explain (costs off) +select * from tbl_wr t1 full join tbl_wr t2 on t1.b = t2.b +where t1 is not null; + +-- composite-type columns: reduced to an inner join +create temp table tbl_comp (a int, w tbl_wr); +explain (costs off) +select * from tenk1 t1 left join tbl_comp t2 on t1.unique1 = t2.a +where t2.w is not null; + +-- composite-type columns: reduced to an antijoin +explain (costs off) +select * from tenk1 t1 left join tbl_comp t2 on t2.w is not null +where t2 is null; + +-- For a zero-field row, IS NULL and IS NOT NULL are both true, so the join +-- clause here proves only that t2's datum is non-null, which must not refute +-- the whole-row IS NULL above: matched rows pass both tests. +create temp table tbl_zero (); +insert into tbl_zero default values; + +-- should not be reduced +explain (costs off) +select * from (values (1), (2)) v(x) left join tbl_zero t2 on t2 is not null +where t2 is null; +-- should return 2 rows +select * from (values (1), (2)) v(x) left join tbl_zero t2 on t2 is not null +where t2 is null; + rollback; -- -- 2.39.5 (Apple Git-154)