From fc6dcdc8770ea122c1368b3c6fd84aaf30ab4f08 Mon Sep 17 00:00:00 2001 From: Alena Rybakina Date: Mon, 30 Mar 2026 07:24:35 +0300 Subject: [PATCH] Enables pull-up of EXISTS subqueries that contain INNER joins, unlocking join reordering and earlier filtering. OUTER joins with outer references are safely excluded to preserve null-preserving semantics. To achieve this, introduce a mutator that performs a single conservative pass over the subquery jointree and stops transformation if subquery contains volatile quals, or OUTER joins with outer references, since hoisting would break null-preserving behavior. On the other hand, OUTER joins without such references remain intact. Add IS NOT NULL guards on hoisted outer Vars to avoid redundant null elements that obviously won't result after join operation. Replace affected subquery quals with true. Author: Alena Rybakina Reviewers: Ranier Vilela , Peter Petrov , Ilia Evdokimov --- src/backend/optimizer/plan/subselect.c | 309 +++++++++- src/test/regress/expected/join.out | 41 +- src/test/regress/expected/subselect.out | 786 ++++++++++++++++++++++++ src/test/regress/sql/subselect.sql | 430 +++++++++++++ 4 files changed, 1508 insertions(+), 58 deletions(-) diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index ccec1eaa7fe..6cf1f54667e 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -34,6 +34,7 @@ #include "optimizer/prep.h" #include "optimizer/subselect.h" #include "parser/parse_relation.h" +#include "parser/parsetree.h" #include "rewrite/rewriteManip.h" #include "utils/builtins.h" #include "utils/lsyscache.h" @@ -1583,10 +1584,250 @@ sublink_testexpr_is_not_nullable(PlannerInfo *root, SubLink *sublink) } /* - * convert_EXISTS_sublink_to_join: try to convert an EXISTS SubLink to a join + * HoistJoinQualsContext accumulates information while traversing + * a subquery's jointree. * - * The API of this function is identical to convert_ANY_sublink_to_join's. + * outer_clauses: List of expressions that reference parent-level variables and + * WHERE clauses and can become join conditions after the conversion. + * + * contain_outer_vars: Flag indicating whether the subquery contains + * parent-level variables. + * root: PlannerInfo for the parent query level. + * + * nullable_above: Set of relation IDs that are nullable due to outer joins + * (LEFT, RIGHT, FULL, ANTI) encountered above the current join + * node in the join tree. + * available_rels: Set of relations on the nonnullable side of join. */ +typedef struct HoistJoinQualsContext +{ + List *outer_clauses; + bool contain_outer_vars; + PlannerInfo *root; + Bitmapset *nullable_above; +} HoistJoinQualsContext; + +static Node * +preprocess_quals(Node *node) +{ + /* + * Run const-folding without planner context. + * + * IMPORTANT: Pass NULL as PlannerInfo here because we’re simplifying a + * *subquery’s* quals before its rtable has been merged with the parent. + * If we passed a non-NULL root, eval_const_expressions() could perform + * root-dependent transforms (e.g., fold NullTest on Var using + * var_is_nonnullable) against the *wrong* rangetable, risking + * out-of-bounds RTE access. See eval_const_expressions()’s contract: + * “root can be passed as NULL …” for exactly this use-case. + */ + node = eval_const_expressions(NULL, node); + node = (Node *) canonicalize_qual((Expr *) node, false); + + node = (Node *) make_ands_implicit((Expr *) node); + + return node; +} + + /* + * compute_nullable_side Return the set of relids that become NULL-able at + * this join node. + */ +static Relids +compute_nullable_side(JoinExpr *j, Relids left, Relids right) +{ + switch (j->jointype) + { + case JOIN_LEFT: + case JOIN_ANTI: + return bms_copy(right); + case JOIN_RIGHT: + return bms_copy(left); + case JOIN_FULL: + default: + return NULL; + } +} + + /* + * hoist_parent_quals_jointree_mutator + * + * Recursively traverse a subquery's jointree to identify and extract + * qualifiers that reference parent query variables. Returns NULL if the + * conversion cannot proceed. + */ +static Node * +hoist_parent_quals_jointree_mutator(Node *jtnode, HoistJoinQualsContext * context) +{ + if (jtnode == NULL) + return NULL; + + if (IsA(jtnode, RangeTblRef)) + return jtnode; /* nothing to change */ + + if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + JoinExpr *newj = makeNode(JoinExpr); + Node *qual; + Relids qual_rels, + lrels, + rrels; + bool touches_nullable; + List *processed_quals = NIL; + + /* Copy the JoinExpr */ + memcpy(newj, j, sizeof(JoinExpr)); + + /* Recurse into join inputs */ + newj->larg = (Node *) hoist_parent_quals_jointree_mutator(j->larg, context); + newj->rarg = (Node *) hoist_parent_quals_jointree_mutator(j->rarg, context); + + /* + * Fail the conversion if quals contain volatile functions. + */ + if (contain_volatile_functions(newj->quals) || + newj->larg == NULL || + newj->rarg == NULL) + return NULL; + + lrels = get_relids_in_jointree(newj->larg, true, true); + rrels = get_relids_in_jointree(newj->rarg, true, true); + + /* + * Include nullable relations above as anything lower can become + * nullable at this join node. + */ + context->nullable_above = bms_union(context->nullable_above, compute_nullable_side(newj, lrels, rrels)); + + qual = newj->quals; + processed_quals = (List *) preprocess_quals(qual); + + /* + * Check if any relations referenced in the join quals are nullable + * due to outer joins above this node. If quals touch nullable + * relations, hoisting them to the parent query could change query + * semantics. Moving quals that reference nullable relations can + * affect which rows are filtered out vs. which are extended with + * NULLs. Therefore, we can only hoist quals that don't reference + * nullable relations. + */ + qual_rels = pull_varnos(NULL, newj->quals); + touches_nullable = bms_overlap(qual_rels, context->nullable_above); + + Assert(j->jointype != JOIN_SEMI || j->jointype != JOIN_ANTI); + + + if (!touches_nullable) + { + /* + * If the processed quals contain parent query variables, we have + * to pull them up to the parent query. Add them to the + * outer_clauses list and replace them with TRUE. + */ + if (processed_quals != NIL && contain_vars_of_level((Node *) processed_quals, 1)) + { + context->contain_outer_vars = true; + context->outer_clauses = list_concat(context->outer_clauses, processed_quals); + newj->quals = (Node *) makeBoolConst(true, false); + } + else + { + /* No parent vars, keep the quals in the subquery */ + newj->quals = (Node *) make_ands_explicit(processed_quals); + } + } + else + { + /* + * If the quals reference nullable relations and contain parent + * query variables because we cannot continue the conversion at + * all, as we'd be unable to properly handle the correlation + * between parent and child queries with nullable relations + * involved. + */ + if (contain_vars_of_level(j->quals, 1)) + return NULL; + + /* + * No quals left here, replace them with TRUE + */ + newj->quals = (Node *) makeBoolConst(true, false); + } + + return (Node *) newj; + } + + if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + FromExpr *newf = makeNode(FromExpr); + ListCell *lc; + List *fromlist = NIL; + + /* Copy the FromExpr */ + memcpy(newf, f, sizeof(FromExpr)); + + /* Recurse into fromlist */ + foreach(lc, newf->fromlist) + { + Node *fnode = hoist_parent_quals_jointree_mutator(lfirst(lc), context); + + if (fnode == NULL) + return NULL; + fromlist = lappend(fromlist, fnode); + } + + newf->fromlist = fromlist; + + /* + * Fail the conversion if the WHERE clause contains volatile functions + */ + if (contain_volatile_functions(newf->quals)) + return NULL; + + if (newf->quals) + { + Node *qual = newf->quals; + + Relids qual_rels = pull_varnos(NULL, newf->quals); + bool touches_nullable = bms_overlap(qual_rels, context->nullable_above); + + if (touches_nullable) + return NULL; + + /* Quals (WHERE clause) may still contain sublinks etc */ + qual = preprocess_quals(qual); + + /* + * Add WHERE clause to the outer_clauses list. Set the + * contain_outer_vars flag as true - means that the subquery + * contains parent query variables. + */ + { + if (contain_vars_of_level((Node *) qual, 1)) + { + context->contain_outer_vars = true; + + } + context->outer_clauses = list_concat(context->outer_clauses, (List *) qual); + newf->quals = NULL; + } + } + + return (Node *) newf; + } + + return jtnode; /* quiet compiler */ +} + + /* + * convert_EXISTS_sublink_to_join: try to convert an EXISTS SubLink to a join + * + * The API of this function is identical to convert_ANY_sublink_to_join's, + * except that we also support the case where the caller has found NOT + * EXISTS, so we need an additional input parameter "under_not". + */ JoinExpr * convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, bool under_not, Relids available_rels) @@ -1601,6 +1842,13 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, Relids clause_varnos; Relids upper_varnos; + HoistJoinQualsContext hjq_context = { + NIL, /* outer_clauses */ + false, /* contain_outer_vars */ + root, /* root */ + NULL, /* nullable_above */ + }; + Assert(sublink->subLinkType == EXISTS_SUBLINK); /* @@ -1629,34 +1877,6 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, if (!simplify_EXISTS_query(root, subselect)) return NULL; - /* - * Separate out the WHERE clause. (We could theoretically also remove - * top-level plain JOIN/ON clauses, but it's probably not worth the - * trouble.) - */ - whereClause = subselect->jointree->quals; - subselect->jointree->quals = NULL; - - /* - * The rest of the sub-select must not refer to any Vars of the parent - * query. (Vars of higher levels should be okay, though.) - */ - if (contain_vars_of_level((Node *) subselect, 1)) - return NULL; - - /* - * On the other hand, the WHERE clause must contain some Vars of the - * parent query, else it's not gonna be a join. - */ - if (!contain_vars_of_level(whereClause, 1)) - return NULL; - - /* - * We don't risk optimizing if the WHERE clause is volatile, either. - */ - if (contain_volatile_functions(whereClause)) - return NULL; - /* * Scan the rangetable for relation RTEs and retrieve the necessary * catalog information for each relation. Using this information, clear @@ -1677,15 +1897,38 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, subroot.type = T_PlannerInfo; subroot.glob = root->glob; subroot.parse = subselect; - subselect->jointree->quals = whereClause; subselect = preprocess_relation_rtes(&subroot); + + subselect->jointree = + (FromExpr *) hoist_parent_quals_jointree_mutator((Node *) subselect->jointree, + &hjq_context); + /* - * Now separate out the WHERE clause again. + * Stop conversion if the jointree is NULL or no parent query variables + * were found. XXX: Add push down converse - we need to build the join + * tree and pull main table bellow! It's a solution for outer joins too! */ - whereClause = subselect->jointree->quals; + if (subselect->jointree == NULL || !hjq_context.contain_outer_vars) + return NULL; + + Assert(hjq_context.outer_clauses != NIL); + subselect->jointree->quals = NULL; + /* + * The hoisted WHERE clause accounts for the parent-level Var references + * in the EXISTS subquery's quals. But if any FROM-list items (e.g. a + * correlated subquery in the EXISTS FROM clause) still contain level-1 + * Vars, we cannot safely merge them into the outer rtable: they would + * end up with outer-level Var references but without a lateral=true + * flag, producing an inconsistent state that the planner cannot handle. + */ + if (contain_vars_of_level((Node *) subselect, 1)) + return NULL; + + whereClause = (Node *) make_ands_explicit(hjq_context.outer_clauses); + /* * The subquery must have a nonempty jointree, but we can make it so. */ diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 84872c6f04e..c81c6a05167 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3390,34 +3390,25 @@ where not exists ( ); QUERY PLAN --------------------------------------------------------- - Merge Anti Join - Merge Cond: (t1.c1 = t2.c2) - -> Sort - Sort Key: t1.c1 - -> Seq Scan on tt4x t1 - -> Sort - Sort Key: t2.c2 - -> Merge Right Join - Merge Cond: (t5.c1 = t3.c2) - -> Merge Join - Merge Cond: (t4.c2 = t5.c1) - -> Sort - Sort Key: t4.c2 - -> Seq Scan on tt4x t4 - -> Sort - Sort Key: t5.c1 - -> Seq Scan on tt4x t5 - -> Sort - Sort Key: t3.c2 + Hash Anti Join + Hash Cond: (t1.c1 = t2.c2) + -> Seq Scan on tt4x t1 + -> Hash + -> Nested Loop Left Join + -> Nested Loop Left Join + -> Seq Scan on tt4x t2 + -> Materialize + -> Seq Scan on tt4x t3 + -> Materialize -> Merge Left Join - Merge Cond: (t2.c3 = t3.c1) + Merge Cond: (t4.c2 = t5.c1) -> Sort - Sort Key: t2.c3 - -> Seq Scan on tt4x t2 + Sort Key: t4.c2 + -> Seq Scan on tt4x t4 -> Sort - Sort Key: t3.c1 - -> Seq Scan on tt4x t3 -(27 rows) + Sort Key: t5.c1 + -> Seq Scan on tt4x t5 +(18 rows) -- -- regression test for problems of the sort depicted in bug #3494 diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 200236a0a69..358eac8c494 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -1042,6 +1042,792 @@ where exists ( where road.name = ss.f1 ); rollback; -- +-- Test case for exist sublink where we can consider some undependent expression +-- with outer link +-- +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id +); + QUERY PLAN +------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on ta + -> Nested Loop + -> Index Only Scan using tb_pkey on tb + Index Cond: (id = ta.id) + -> Seq Scan on tc +(6 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tc.id +); + QUERY PLAN +------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on ta + -> Nested Loop + -> Index Only Scan using tc_pkey on tc + Index Cond: (id = ta.id) + -> Seq Scan on tb +(6 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON 1 = 1 + WHERE ta.id = tc.id +); + QUERY PLAN +------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on ta + -> Nested Loop + -> Index Only Scan using tc_pkey on tc + Index Cond: (id = ta.id) + -> Seq Scan on tb +(6 rows) + +-- Join compound expression +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tc.id + AND ta.id = tb.id +); + QUERY PLAN +------------------------------------ + Hash Right Semi Join + Hash Cond: (tc.id = ta.id) + -> Hash Join + Hash Cond: (tb.id = tc.id) + -> Seq Scan on tb + -> Hash + -> Seq Scan on tc + -> Hash + -> Seq Scan on ta +(9 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta ta1 +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON tb.aval = tc.aid + AND tb.aval = ta1.id +); + QUERY PLAN +---------------------------------------------------- + Hash Join + Hash Cond: (ta1.id = tb.aval) + -> Seq Scan on ta ta1 + -> Hash + -> Unique + -> Merge Join + Merge Cond: (tb.aval = tc.aid) + -> Sort + Sort Key: tb.aval + -> Seq Scan on tb + -> Sort + Sort Key: tc.aid + -> Seq Scan on tc +(13 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +JOIN tb ON true +WHERE EXISTS ( + SELECT 1 + FROM tb tb1 + JOIN tc ON ta.id = tb.id +); + QUERY PLAN +------------------------------------ + Nested Loop Semi Join + -> Hash Join + Hash Cond: (ta.id = tb.id) + -> Seq Scan on ta + -> Hash + -> Seq Scan on tb + -> Nested Loop + -> Seq Scan on tb tb1 + -> Materialize + -> Seq Scan on tc +(10 rows) + +-- Compound expression with const type or other type of expressions +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tc.id + AND ta.id = 1 +); + QUERY PLAN +------------------------------------------------- + Nested Loop Semi Join + -> Index Only Scan using ta_pkey on ta + Index Cond: (id = 1) + -> Nested Loop + -> Index Only Scan using tc_pkey on tc + Index Cond: (id = 1) + -> Seq Scan on tb +(7 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tc.id + AND tb.id = 1 +); + QUERY PLAN +------------------------------------------------- + Hash Right Semi Join + Hash Cond: (tc.id = ta.id) + -> Nested Loop + -> Index Only Scan using tb_pkey on tb + Index Cond: (id = 1) + -> Seq Scan on tc + -> Hash + -> Seq Scan on ta +(8 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + RIGHT JOIN tc ON ta.id = tc.id + WHERE ta.val = 1 +); + QUERY PLAN +------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on ta + Filter: (val = 1) + -> Nested Loop Left Join + -> Index Only Scan using tc_pkey on tc + Index Cond: (id = ta.id) + -> Seq Scan on tb +(7 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id + AND tb.aval = ANY ('{1}'::int[]) +); + QUERY PLAN +------------------------------------------------------------------------- + Hash Join + Hash Cond: (ta.id = tb.id) + -> Seq Scan on ta + -> Hash + -> HashAggregate + Group Key: tb.id + -> Nested Loop + -> Seq Scan on tc + -> Materialize + -> Seq Scan on tb + Filter: (aval = ANY ('{1}'::integer[])) +(11 rows) + +-- Exists SubLink expression within expression +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta ta1 +WHERE EXISTS ( + SELECT 1 + FROM ta + JOIN tb ON ta.id = ta1.id + AND ta1.val = 1 + WHERE EXISTS ( + SELECT 1 + FROM ta ta2 + WHERE ta2.id = ta1.id + ) +); + QUERY PLAN +------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on ta ta2 + Filter: (val = 1) + -> Nested Loop + -> Index Only Scan using ta_pkey on ta + Index Cond: (id = ta2.id) + -> Seq Scan on tb +(7 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta ta1 +WHERE EXISTS ( + SELECT 1 + FROM ta + JOIN tb ON ta.val = ta1.id + AND ta1.id = 1 + WHERE EXISTS ( + SELECT 1 + FROM ta ta2 + WHERE ta2.id = ta.id + ) +); + QUERY PLAN +----------------------------------------------- + Nested Loop Semi Join + -> Index Only Scan using ta_pkey on ta ta1 + Index Cond: (id = 1) + -> Nested Loop + -> Seq Scan on tb + -> Materialize + -> Seq Scan on ta ta2 + Filter: (val = 1) +(8 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + WHERE ta.id = tb.id + AND EXISTS ( + SELECT 1 + FROM tc + WHERE tc.id = tb.id + AND tc.aid + tb.aval > 0 + ) +); + QUERY PLAN +----------------------------------------------------- + Hash Semi Join + Hash Cond: (ta.id = tc.id) + -> Seq Scan on ta + -> Hash + -> Hash Join + Hash Cond: (tb.id = tc.id) + Join Filter: ((tc.aid + tb.aval) > 0) + -> Seq Scan on tb + -> Hash + -> Seq Scan on tc +(10 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + WHERE ta.id = tb.id + AND EXISTS ( + SELECT 1 + FROM tc + WHERE tc.id = tb.id + AND tc.aid + ta.val > 0 + ) +); + QUERY PLAN +----------------------------------------- + Hash Join + Hash Cond: (ta.id = tb.id) + Join Filter: EXISTS(SubPlan exists_1) + -> Seq Scan on ta + -> Hash + -> Seq Scan on tb + SubPlan exists_1 + -> Index Scan using tc_pkey on tc + Index Cond: (id = tb.id) + Filter: ((aid + ta.val) > 0) +(10 rows) + +-- Check with NULL and NOT NULL expressions +ALTER TABLE ta ADD COLUMN is_active bool; +UPDATE ta SET is_active = true; +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id + AND COALESCE(ta.is_active, true) +); + QUERY PLAN +------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on ta + Filter: COALESCE(is_active, true) + -> Nested Loop + -> Index Only Scan using tb_pkey on tb + Index Cond: (id = ta.id) + -> Seq Scan on tc +(7 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM tb +WHERE EXISTS ( + SELECT 1 + FROM ta + JOIN tc ON ta.id = tb.id + AND COALESCE(ta.is_active, true) +); + QUERY PLAN +------------------------------------------------------- + Merge Join + Merge Cond: (tb.id = ta.id) + -> Index Only Scan using tb_pkey on tb + -> Unique + -> Nested Loop + -> Index Scan using ta_pkey on ta + Filter: COALESCE(is_active, true) + -> Materialize + -> Seq Scan on tc +(9 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id + AND CASE + WHEN ta.is_active THEN true + ELSE false + END = true +); + QUERY PLAN +-------------------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on ta + Filter: CASE WHEN is_active THEN true ELSE false END + -> Nested Loop + -> Index Only Scan using tb_pkey on tb + Index Cond: (id = ta.id) + -> Seq Scan on tc +(7 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM tb +WHERE EXISTS ( + SELECT 1 + FROM ta + JOIN tc ON ta.id = tb.id + AND CASE + WHEN ta.is_active THEN true + ELSE false + END = true +); + QUERY PLAN +-------------------------------------------------------------------------- + Merge Join + Merge Cond: (tb.id = ta.id) + -> Index Only Scan using tb_pkey on tb + -> Unique + -> Nested Loop + -> Index Scan using ta_pkey on ta + Filter: CASE WHEN is_active THEN true ELSE false END + -> Materialize + -> Seq Scan on tc +(9 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id + AND ta.is_active +); + QUERY PLAN +------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on ta + Filter: is_active + -> Nested Loop + -> Index Only Scan using tb_pkey on tb + Index Cond: (id = ta.id) + -> Seq Scan on tc +(7 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id + AND ta.is_active IS NOT NULL +); + QUERY PLAN +------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on ta + Filter: (is_active IS NOT NULL) + -> Nested Loop + -> Index Only Scan using tb_pkey on tb + Index Cond: (id = ta.id) + -> Seq Scan on tc +(7 rows) + +-- Disabled pull up because it is applcapable for INNER JOIN connection +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + RIGHT JOIN tc ON ta.id = tc.id +); + QUERY PLAN +------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on ta + -> Nested Loop Left Join + -> Index Only Scan using tc_pkey on tc + Index Cond: (id = ta.id) + -> Seq Scan on tb +(6 rows) + +-- Disable pull-up due to lack of the outer var +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON tc.id = tb.id +); + QUERY PLAN +--------------------------------------------------- + Result + One-Time Filter: (InitPlan exists_1).col1 + InitPlan exists_1 + -> Nested Loop + -> Seq Scan on tb + -> Index Only Scan using tc_pkey on tc + Index Cond: (id = tb.id) + -> Seq Scan on ta +(8 rows) + +CREATE TABLE td (id int, tc_id bytea, val int); +INSERT INTO td +SELECT g.id, 'Test1'::bytea AS tc_id, 6 AS val +FROM generate_series(1, 25) AS g(id) +UNION ALL +SELECT g.id, 'Test2'::bytea AS tc_id, 7 AS val +FROM generate_series(26, 50) AS g(id) +UNION ALL +SELECT g.id, 'Test4'::bytea AS tc_id, 6 AS val +FROM generate_series(51, 75) AS g(id) +UNION ALL +SELECT g.id, 'Test5'::bytea AS tc_id, 7 AS val +FROM generate_series(76, 100) AS g(id); +EXPLAIN (COSTS OFF) + SELECT ta.id + FROM ta + WHERE EXISTS ( + SELECT 1 + FROM tb + WHERE tb.id = ta.id AND + EXISTS + (SELECT 1 + FROM tc + WHERE tc.id = tb.id) + ); + QUERY PLAN +------------------------------------ + Hash Right Semi Join + Hash Cond: (tc.id = ta.id) + -> Hash Join + Hash Cond: (tb.id = tc.id) + -> Seq Scan on tb + -> Hash + -> Seq Scan on tc + -> Hash + -> Seq Scan on ta +(9 rows) + +EXPLAIN (COSTS OFF) + SELECT ta.id + FROM ta + WHERE EXISTS ( + SELECT 1 + FROM tb + WHERE tb.id = ta.id AND + EXISTS + (SELECT 1 + FROM tc + WHERE tc.id = ta.id) + ); + QUERY PLAN +------------------------------------ + Hash Join + Hash Cond: (tc.id = tb.id) + -> Hash Join + Hash Cond: (tc.id = ta.id) + -> Seq Scan on tc + -> Hash + -> Seq Scan on ta + -> Hash + -> Seq Scan on tb +(9 rows) + +EXPLAIN (COSTS OFF) + SELECT ta.id + FROM ta + WHERE EXISTS ( + SELECT 1 + FROM tb + WHERE tb.id = ta.id + AND EXISTS + (SELECT 1 + FROM tc + WHERE tb.id = ta.id) + ); + QUERY PLAN +-------------------------------------------- + Hash Join + Hash Cond: (tb.id = ta.id) + Join Filter: EXISTS(SubPlan exists_1) + -> Seq Scan on tb + -> Hash + -> Seq Scan on ta + SubPlan exists_1 + -> Result + One-Time Filter: (tb.id = ta.id) + -> Seq Scan on tc +(10 rows) + +explain (COSTS OFF) + SELECT ta.id + FROM ta + WHERE EXISTS ( + SELECT 1 + FROM tb + join tc on tc.id = ta.id + AND EXISTS ( + SELECT 1 + FROM td + WHERE td.id = ta.id) + ); + QUERY PLAN +------------------------------------------------------- + Hash Join + Hash Cond: (ta.id = td.id) + -> Nested Loop Semi Join + -> Seq Scan on ta + -> Nested Loop + -> Index Only Scan using tc_pkey on tc + Index Cond: (id = ta.id) + -> Seq Scan on tb + -> Hash + -> HashAggregate + Group Key: td.id + -> Seq Scan on td +(12 rows) + +explain (COSTS OFF) + SELECT ta.id + FROM ta + WHERE EXISTS ( + SELECT 1 + FROM tb + join tc on tc.id = ta.id + AND EXISTS ( + SELECT 1 + FROM td + WHERE tb.id = ta.id) + ); + QUERY PLAN +-------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on ta + -> Nested Loop + Join Filter: EXISTS(SubPlan exists_1) + -> Index Only Scan using tc_pkey on tc + Index Cond: (id = ta.id) + -> Seq Scan on tb + SubPlan exists_1 + -> Result + One-Time Filter: (tb.id = ta.id) + -> Seq Scan on td +(11 rows) + +CREATE TABLE te (id int, tc_id bytea, val int); +INSERT INTO te SELECT * FROM td; +EXPLAIN (COSTS OFF) +SELECT t1.* +FROM ta t1 +WHERE EXISTS ( + SELECT 1 + FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL + JOIN tb t2 ON t2.id = t1.id + WHERE EXISTS ( + SELECT 1 + FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL + JOIN td t3 ON t3.tc_id IN ('Test1'::bytea, 'Test2'::bytea) + WHERE EXISTS ( + SELECT 1 + FROM te t4 + WHERE t4.tc_id = t3.tc_id + AND t4.val = t2.aval + ) = EXISTS ( + SELECT 1 + FROM tc t5 + WHERE t5.id = t3.id + ) + ) +); + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Merge Semi Join + Merge Cond: (t1.id = t2.id) + -> Index Scan using ta_pkey on ta t1 + -> Nested Loop Semi Join + Join Filter: ((ANY ((t3.tc_id = (hashed SubPlan exists_2).col1) AND (t2.aval = (hashed SubPlan exists_2).col2))) = (ANY (t3.id = (hashed SubPlan exists_4).col1))) + -> Index Scan using tb_pkey on tb t2 + -> Materialize + -> Seq Scan on td t3 + Filter: (tc_id = ANY ('{"\\x5465737431","\\x5465737432"}'::bytea[])) + SubPlan exists_2 + -> Seq Scan on te t4 + SubPlan exists_4 + -> Seq Scan on tc t5 +(13 rows) + +EXPLAIN (COSTS OFF) +SELECT ta.* +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON tc.id = tb.id + AND tb.id = ta.id + JOIN td ON td.id = tc.id +); + QUERY PLAN +------------------------------------------ + Hash Right Semi Join + Hash Cond: (td.id = ta.id) + -> Hash Join + Hash Cond: (td.id = tc.id) + -> Hash Join + Hash Cond: (td.id = tb.id) + -> Seq Scan on td + -> Hash + -> Seq Scan on tb + -> Hash + -> Seq Scan on tc + -> Hash + -> Seq Scan on ta +(13 rows) + +-- Test case for invalid reference to FROM-clause entry in nested join +-- Fixed: tb cannot be referenced in the join condition of nested join (tc join td) +-- Restructure: move tb.id>111 to WHERE clause or outer join condition +-- Preserve semantics: tc JOIN td is inner join, then LEFT JOIN with tb +EXPLAIN (COSTS OFF) +select * from ta +where exists ( + select * + from tb left join + (tc join td on (tc.id is null or td.id is null)) + on true + where tb.id > 111 or (tc.id is null or td.id is null) +); + QUERY PLAN +------------------------------------------------------------------------- + Result + One-Time Filter: (InitPlan exists_1).col1 + InitPlan exists_1 + -> Nested Loop Left Join + Filter: ((tb.id > 111) OR (tc.id IS NULL) OR (td.id IS NULL)) + -> Seq Scan on tb + -> Materialize + -> Nested Loop + -> Seq Scan on tc + -> Materialize + -> Seq Scan on td + Filter: (id IS NULL) + -> Seq Scan on ta +(13 rows) + +-- Test case for column reference error in lateral subquery +-- Fixed: subq_3.id doesn't exist, need to select id from tb in subq_3 +explain (costs off) +select + subq_3.id as c32 +from + ( + select + sample_0.id as c0 + from + ta as sample_0 + ) as subq_0, + lateral + ( + select + ref_0.id as id, + '' as c0 + from + tb as ref_0 + ) as subq_3 +where EXISTS +( + select + 1 + from + ( + select + subq_3.c0 as c2 + from + tc as sample_3 + ) as subq_4 + where subq_3.c0 >= subq_4.c2 +); + QUERY PLAN +--------------------------------------------- + Result + One-Time Filter: (InitPlan exists_1).col1 + InitPlan exists_1 + -> Seq Scan on tc sample_3 + -> Nested Loop + -> Seq Scan on tb ref_0 + -> Materialize + -> Seq Scan on ta sample_0 +(8 rows) + +DROP TABLE td, te; -- Test case for sublinks pushed down into subselects via join alias expansion -- select diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index 4cd016f4ac3..2c3f2373183 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -529,6 +529,436 @@ where exists ( rollback; -- +-- Test case for exist sublink where we can consider some undependent expression +-- with outer link +-- + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tc.id +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON 1 = 1 + WHERE ta.id = tc.id +); + +-- Join compound expression +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tc.id + AND ta.id = tb.id +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta ta1 +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON tb.aval = tc.aid + AND tb.aval = ta1.id +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +JOIN tb ON true +WHERE EXISTS ( + SELECT 1 + FROM tb tb1 + JOIN tc ON ta.id = tb.id +); + +-- Compound expression with const type or other type of expressions +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tc.id + AND ta.id = 1 +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tc.id + AND tb.id = 1 +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + RIGHT JOIN tc ON ta.id = tc.id + WHERE ta.val = 1 +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id + AND tb.aval = ANY ('{1}'::int[]) +); + +-- Exists SubLink expression within expression +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta ta1 +WHERE EXISTS ( + SELECT 1 + FROM ta + JOIN tb ON ta.id = ta1.id + AND ta1.val = 1 + WHERE EXISTS ( + SELECT 1 + FROM ta ta2 + WHERE ta2.id = ta1.id + ) +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta ta1 +WHERE EXISTS ( + SELECT 1 + FROM ta + JOIN tb ON ta.val = ta1.id + AND ta1.id = 1 + WHERE EXISTS ( + SELECT 1 + FROM ta ta2 + WHERE ta2.id = ta.id + ) +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + WHERE ta.id = tb.id + AND EXISTS ( + SELECT 1 + FROM tc + WHERE tc.id = tb.id + AND tc.aid + tb.aval > 0 + ) +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + WHERE ta.id = tb.id + AND EXISTS ( + SELECT 1 + FROM tc + WHERE tc.id = tb.id + AND tc.aid + ta.val > 0 + ) +); + +-- Check with NULL and NOT NULL expressions +ALTER TABLE ta ADD COLUMN is_active bool; +UPDATE ta SET is_active = true; + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id + AND COALESCE(ta.is_active, true) +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM tb +WHERE EXISTS ( + SELECT 1 + FROM ta + JOIN tc ON ta.id = tb.id + AND COALESCE(ta.is_active, true) +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id + AND CASE + WHEN ta.is_active THEN true + ELSE false + END = true +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM tb +WHERE EXISTS ( + SELECT 1 + FROM ta + JOIN tc ON ta.id = tb.id + AND CASE + WHEN ta.is_active THEN true + ELSE false + END = true +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id + AND ta.is_active +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id + AND ta.is_active IS NOT NULL +); + + +-- Disabled pull up because it is applcapable for INNER JOIN connection +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + RIGHT JOIN tc ON ta.id = tc.id +); + +-- Disable pull-up due to lack of the outer var +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON tc.id = tb.id +); + +CREATE TABLE td (id int, tc_id bytea, val int); + +INSERT INTO td +SELECT g.id, 'Test1'::bytea AS tc_id, 6 AS val +FROM generate_series(1, 25) AS g(id) + +UNION ALL + +SELECT g.id, 'Test2'::bytea AS tc_id, 7 AS val +FROM generate_series(26, 50) AS g(id) + +UNION ALL + +SELECT g.id, 'Test4'::bytea AS tc_id, 6 AS val +FROM generate_series(51, 75) AS g(id) + +UNION ALL + +SELECT g.id, 'Test5'::bytea AS tc_id, 7 AS val +FROM generate_series(76, 100) AS g(id); + +EXPLAIN (COSTS OFF) + SELECT ta.id + FROM ta + WHERE EXISTS ( + SELECT 1 + FROM tb + WHERE tb.id = ta.id AND + EXISTS + (SELECT 1 + FROM tc + WHERE tc.id = tb.id) + ); + +EXPLAIN (COSTS OFF) + SELECT ta.id + FROM ta + WHERE EXISTS ( + SELECT 1 + FROM tb + WHERE tb.id = ta.id AND + EXISTS + (SELECT 1 + FROM tc + WHERE tc.id = ta.id) + ); + +EXPLAIN (COSTS OFF) + SELECT ta.id + FROM ta + WHERE EXISTS ( + SELECT 1 + FROM tb + WHERE tb.id = ta.id + AND EXISTS + (SELECT 1 + FROM tc + WHERE tb.id = ta.id) + ); + +explain (COSTS OFF) + SELECT ta.id + FROM ta + WHERE EXISTS ( + SELECT 1 + FROM tb + join tc on tc.id = ta.id + AND EXISTS ( + SELECT 1 + FROM td + WHERE td.id = ta.id) + ); + +explain (COSTS OFF) + SELECT ta.id + FROM ta + WHERE EXISTS ( + SELECT 1 + FROM tb + join tc on tc.id = ta.id + AND EXISTS ( + SELECT 1 + FROM td + WHERE tb.id = ta.id) + ); + +CREATE TABLE te (id int, tc_id bytea, val int); +INSERT INTO te SELECT * FROM td; + +EXPLAIN (COSTS OFF) +SELECT t1.* +FROM ta t1 +WHERE EXISTS ( + SELECT 1 + FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL + JOIN tb t2 ON t2.id = t1.id + WHERE EXISTS ( + SELECT 1 + FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL + JOIN td t3 ON t3.tc_id IN ('Test1'::bytea, 'Test2'::bytea) + WHERE EXISTS ( + SELECT 1 + FROM te t4 + WHERE t4.tc_id = t3.tc_id + AND t4.val = t2.aval + ) = EXISTS ( + SELECT 1 + FROM tc t5 + WHERE t5.id = t3.id + ) + ) +); + +EXPLAIN (COSTS OFF) +SELECT ta.* +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON tc.id = tb.id + AND tb.id = ta.id + JOIN td ON td.id = tc.id +); + +-- Test case for invalid reference to FROM-clause entry in nested join +-- Fixed: tb cannot be referenced in the join condition of nested join (tc join td) +-- Restructure: move tb.id>111 to WHERE clause or outer join condition +-- Preserve semantics: tc JOIN td is inner join, then LEFT JOIN with tb +EXPLAIN (COSTS OFF) +select * from ta +where exists ( + select * + from tb left join + (tc join td on (tc.id is null or td.id is null)) + on true + where tb.id > 111 or (tc.id is null or td.id is null) +); + +-- Test case for column reference error in lateral subquery +-- Fixed: subq_3.id doesn't exist, need to select id from tb in subq_3 +explain (costs off) +select + subq_3.id as c32 +from + ( + select + sample_0.id as c0 + from + ta as sample_0 + ) as subq_0, + lateral + ( + select + ref_0.id as id, + '' as c0 + from + tb as ref_0 + ) as subq_3 +where EXISTS +( + select + 1 + from + ( + select + subq_3.c0 as c2 + from + tc as sample_3 + ) as subq_4 + where subq_3.c0 >= subq_4.c2 +); + +DROP TABLE td, te; + -- Test case for sublinks pushed down into subselects via join alias expansion -- -- 2.39.5 (Apple Git-154)