From c04add30999ecd64c51bde7db56a6e5637c16c74 Mon Sep 17 00:00:00 2001
From: "Andrei V. Lepikhov" <lepihov@gmail.com>
Date: Tue, 9 Jul 2024 12:25:23 +0700
Subject: [PATCH] Apply SJE to DML queries: Just don't include result relation
 to the set of SJE candidates.

Also, fix the subtle bug with RowMarks.
---
 src/backend/optimizer/plan/analyzejoins.c | 24 +++------
 src/test/regress/expected/join.out        | 61 +++++++++++++++++++++++
 src/test/regress/sql/join.sql             | 17 ++++++-
 3 files changed, 84 insertions(+), 18 deletions(-)

diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index bb14597762..d2b9ba7c08 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -1860,10 +1860,6 @@ remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark,
 	/* restore the rangetblref in a proper order. */
 	restore_rangetblref((Node *) root->parse, toKeep->relid, toRemove->relid, 0, 0);
 
-	/* See remove_self_joins_one_group() */
-	Assert(root->parse->resultRelation != toRemove->relid);
-	Assert(root->parse->resultRelation != toKeep->relid);
-
 	/* Replace links in the planner info */
 	remove_rel_from_query(root, toRemove, toKeep->relid, NULL, NULL);
 
@@ -2046,14 +2042,6 @@ remove_self_joins_one_group(PlannerInfo *root, Relids relids)
 	{
 		RelOptInfo *inner = root->simple_rel_array[r];
 
-		/*
-		 * We don't accept result relation as either source or target relation
-		 * of SJE, because result relation has different behavior in
-		 * EvalPlanQual() and RETURNING clause.
-		 */
-		if (root->parse->resultRelation == r)
-			continue;
-
 		k = r;
 
 		while ((k = bms_next_member(relids, k)) > 0)
@@ -2069,9 +2057,6 @@ remove_self_joins_one_group(PlannerInfo *root, Relids relids)
 			PlanRowMark *imark = NULL;
 			List	   *uclauses = NIL;
 
-			if (root->parse->resultRelation == k)
-				continue;
-
 			/* A sanity check: the relations have the same Oid. */
 			Assert(root->simple_rte_array[k]->relid ==
 				   root->simple_rte_array[r]->relid);
@@ -2121,7 +2106,8 @@ remove_self_joins_one_group(PlannerInfo *root, Relids relids)
 				if (omark && imark)
 					break;
 			}
-			if (omark && imark && omark->markType != imark->markType)
+			if (((omark == NULL) ^ (imark == NULL)) ||
+				(omark && omark->markType != imark->markType))
 				continue;
 
 			/*
@@ -2231,7 +2217,8 @@ remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove)
 			 */
 			if (rte->rtekind == RTE_RELATION &&
 				rte->relkind == RELKIND_RELATION &&
-				rte->tablesample == NULL)
+				rte->tablesample == NULL &&
+				varno != root->parse->resultRelation)
 			{
 				Assert(!bms_is_member(varno, relids));
 				relids = bms_add_member(relids, varno);
@@ -2300,6 +2287,9 @@ remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove)
 
 				relids = bms_del_members(relids, group);
 
+				/* Don't apply SJE to result relation */
+				Assert(!bms_is_member(root->parse->resultRelation, group));
+
 				/*
 				 * Try to remove self-joins from a group of identical entries.
 				 * Make the next attempt iteratively - if something is deleted
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 4e4cec633a..78dfcd4866 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -7068,6 +7068,18 @@ UPDATE sj sq SET b = 1 FROM sj as sz WHERE sq.a = sz.a;
                ->  Seq Scan on sj sz
 (6 rows)
 
+EXPLAIN (COSTS OFF)
+UPDATE sj sq SET b = sz.b FROM sj as sz WHERE sq.a = sz.a;
+             QUERY PLAN              
+-------------------------------------
+ Update on sj sq
+   ->  Nested Loop
+         Join Filter: (sq.a = sz.a)
+         ->  Seq Scan on sj sq
+         ->  Materialize
+               ->  Seq Scan on sj sz
+(6 rows)
+
 CREATE RULE sj_del_rule AS ON DELETE TO sj
   DO INSTEAD
     UPDATE sj SET a = 1 WHERE a = old.a;
@@ -7083,6 +7095,55 @@ EXPLAIN (COSTS OFF) DELETE FROM sj;
 (6 rows)
 
 DROP RULE sj_del_rule ON sj CASCADE;
+-- Allow SJE, remove (s2 JOIN s3).
+EXPLAIN (COSTS OFF)
+UPDATE sj s1 SET b = s2.b, a = s3.a FROM sj AS s2, sj AS s3
+WHERE s1.a = s2.a AND s1.a=s3.a RETURNING s1.*,s2.*,s3.*;
+                 QUERY PLAN                  
+---------------------------------------------
+ Update on sj s1
+   ->  Nested Loop
+         Join Filter: (s1.a = s3.a)
+         ->  Seq Scan on sj s1
+         ->  Materialize
+               ->  Seq Scan on sj s3
+                     Filter: (a IS NOT NULL)
+(7 rows)
+
+-- Allow SJE, but it is just impossible
+EXPLAIN (COSTS OFF)
+UPDATE sj s1 SET b = s2.b, a = s3.a FROM sj AS s2, sj AS s3
+WHERE s1.a = s2.a AND s1.b=s3.b;
+                   QUERY PLAN                    
+-------------------------------------------------
+ Update on sj s1
+   ->  Nested Loop
+         Join Filter: (s1.b = s3.b)
+         ->  Seq Scan on sj s3
+         ->  Materialize
+               ->  Nested Loop
+                     Join Filter: (s1.a = s2.a)
+                     ->  Seq Scan on sj s1
+                     ->  Materialize
+                           ->  Seq Scan on sj s2
+(10 rows)
+
+-- Allow SJE. One more shot for the case of subquery
+EXPLAIN (COSTS OFF)
+UPDATE sj s1 SET b = q.b, a = q.a FROM (
+  SELECT s2.a AS a, s3.b AS b FROM sj AS s2, sj AS s3
+  WHERE s2.a=s3.a) AS q WHERE s1.b=q.a;
+                 QUERY PLAN                  
+---------------------------------------------
+ Update on sj s1
+   ->  Nested Loop
+         Join Filter: (s3.a = s1.b)
+         ->  Seq Scan on sj s1
+         ->  Materialize
+               ->  Seq Scan on sj s3
+                     Filter: (a IS NOT NULL)
+(7 rows)
+
 -- Check that SJE does not mistakenly omit qual clauses (bug #18187)
 insert into emp1 values (1, 1);
 explain (costs off)
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 3e94e0af53..7b32a9bb95 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2724,13 +2724,28 @@ TRUNCATE emp1;
 
 EXPLAIN (COSTS OFF)
 UPDATE sj sq SET b = 1 FROM sj as sz WHERE sq.a = sz.a;
-
+EXPLAIN (COSTS OFF)
+UPDATE sj sq SET b = sz.b FROM sj as sz WHERE sq.a = sz.a;
 CREATE RULE sj_del_rule AS ON DELETE TO sj
   DO INSTEAD
     UPDATE sj SET a = 1 WHERE a = old.a;
 EXPLAIN (COSTS OFF) DELETE FROM sj;
 DROP RULE sj_del_rule ON sj CASCADE;
 
+-- Allow SJE, remove (s2 JOIN s3).
+EXPLAIN (COSTS OFF)
+UPDATE sj s1 SET b = s2.b, a = s3.a FROM sj AS s2, sj AS s3
+WHERE s1.a = s2.a AND s1.a=s3.a RETURNING s1.*,s2.*,s3.*;
+-- Allow SJE, but it is just impossible
+EXPLAIN (COSTS OFF)
+UPDATE sj s1 SET b = s2.b, a = s3.a FROM sj AS s2, sj AS s3
+WHERE s1.a = s2.a AND s1.b=s3.b;
+-- Allow SJE. One more shot for the case of subquery
+EXPLAIN (COSTS OFF)
+UPDATE sj s1 SET b = q.b, a = q.a FROM (
+  SELECT s2.a AS a, s3.b AS b FROM sj AS s2, sj AS s3
+  WHERE s2.a=s3.a) AS q WHERE s1.b=q.a;
+
 -- Check that SJE does not mistakenly omit qual clauses (bug #18187)
 insert into emp1 values (1, 1);
 explain (costs off)
-- 
2.39.2

