From dac8afd2095586921dfcf5564e7f2979e89b77de Mon Sep 17 00:00:00 2001
From: "Andrei V. Lepikhov" <lepihov@gmail.com>
Date: Thu, 2 May 2024 16:17:52 +0700
Subject: [PATCH] Forbid self-join elimination on table sampling scans.

Having custom table sampling methods we can stuck into unpredictable issues
replacing join with scan operation. It may had sense to analyse possible
situations and enable SJE, but the real profit from this operation looks
too low.
---
 src/backend/optimizer/plan/analyzejoins.c |  8 ++++++++
 src/backend/optimizer/plan/initsplan.c    |  5 ++++-
 src/test/regress/expected/join.out        | 13 +++++++++++++
 src/test/regress/sql/join.sql             |  5 +++++
 4 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 506fccd20c..bb89558dcd 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -2148,6 +2148,14 @@ remove_self_joins_one_group(PlannerInfo *root, Relids relids)
 			Assert(root->simple_rte_array[k]->relid ==
 				   root->simple_rte_array[r]->relid);
 
+			/*
+			 * To avoid corner cases with table sampling methods just forbid
+			 * SJE for table sampling entries.
+			 */
+			if (root->simple_rte_array[k]->tablesample ||
+				root->simple_rte_array[r]->tablesample)
+				continue;
+
 			/*
 			 * It is impossible to eliminate join of two relations if they
 			 * belong to different rules of order. Otherwise planner can't be
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index e2c68fe6f9..bf839bcaf6 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -415,7 +415,10 @@ extract_lateral_references(PlannerInfo *root, RelOptInfo *brel, Index rtindex)
 	if (!rte->lateral)
 		return;
 
-	/* Fetch the appropriate variables */
+	/* Fetch the appropriate variables.
+	 * Changes in this place may need changes in SJE logic, see
+	 * the remove_self_joins_one_group routine.
+	 */
 	if (rte->rtekind == RTE_RELATION)
 		vars = pull_vars_of_level((Node *) rte->tablesample, 0);
 	else if (rte->rtekind == RTE_SUBQUERY)
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 8b640c2fc2..63143fe55f 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -6900,6 +6900,19 @@ where s1.x = 1;
                Filter: (1 = 1)
 (9 rows)
 
+-- Check that SJE doesn't touch TABLESAMPLE joins
+explain (costs off)
+SELECT * FROM emp1 t1 NATURAL JOIN LATERAL
+  (SELECT * FROM emp1 t2 TABLESAMPLE SYSTEM (t1.code));
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Nested Loop
+   ->  Seq Scan on emp1 t1
+   ->  Sample Scan on emp1 t2
+         Sampling: system (t1.code)
+         Filter: ((t1.id = id) AND (t1.code = code))
+(5 rows)
+
 -- Check that PHVs do not impose any constraints on removing self joins
 explain (verbose, costs off)
 select * from emp1 t1 join emp1 t2 on t1.id = t2.id left join
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index c4c6c7b8ba..184fd0876b 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2652,6 +2652,11 @@ select 1 from emp1 t1 left join
     on true
 where s1.x = 1;
 
+-- Check that SJE doesn't touch TABLESAMPLE joins
+explain (costs off)
+SELECT * FROM emp1 t1 NATURAL JOIN LATERAL
+  (SELECT * FROM emp1 t2 TABLESAMPLE SYSTEM (t1.code));
+
 -- Check that PHVs do not impose any constraints on removing self joins
 explain (verbose, costs off)
 select * from emp1 t1 join emp1 t2 on t1.id = t2.id left join
-- 
2.39.2

