From 41273bc5db9785ba88038c15b762a1ffd367b762 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@vondra.me>
Date: Tue, 11 Nov 2025 23:12:39 +0100
Subject: [PATCH v5 2/2] Allow dimensions with some join restrictions

---
 src/backend/optimizer/plan/analyzejoins.c | 154 +++++++++++++++++-----
 1 file changed, 119 insertions(+), 35 deletions(-)

diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 79a7f0c8608..bc19c2b537c 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -2782,7 +2782,8 @@ starjoin_match_to_foreign_key(PlannerInfo *root, RelOptInfo *rel)
  * with respect to the rels after it).
  */
 static bool
-starjoin_is_dimension(PlannerInfo *root, RangeTblRef *rtr)
+starjoin_is_dimension(PlannerInfo *root, RangeTblRef *rtr,
+					  bool allow_restrictions)
 {
 	Index		rti = rtr->rtindex;
 	RangeTblEntry *rte = root->simple_rte_array[rti];
@@ -2815,7 +2816,7 @@ starjoin_is_dimension(PlannerInfo *root, RangeTblRef *rtr)
 	 * XXX This blocks the simplified planning for LEFT (or OUTER) joins,
 	 * because outer joins imply restrictions.
 	 */
-	if (has_join_restriction(root, rel))
+	if (!allow_restrictions && has_join_restriction(root, rel))
 		return false;
 
 	/*
@@ -2953,6 +2954,28 @@ starjoin_is_dimension(PlannerInfo *root, RangeTblRef *rtr)
  * to disable the optimization if needed, I think - don't collapse the
  * dimensions into the "group" join item. It would require changes to
  * the generic join search, to be aware of the new item type.
+ *
+ * The search for dimensions may perform multiple passes over the list, to
+ * allow treating some rels with restrictions as dimensions. Relations
+ * without restrictions can be moved to an arbitrary place in the join
+ * tree. We leverage that by moving it to the list of dimensions, which
+ * may skip over various other relations.
+ *
+ * Relations with join order do not allow these arbitrary moves. But we can
+ * allow treating them dimensions in some cases. A join restriction does not
+ * imply we can't move the relation at all, otherwise we wouldn't be allowed
+ * to move any relations when there's a single relation with a restriction.
+ * It means we can't change the relative order of restricted relations.
+ *
+ * This means we can treat a relation with a restriction as a dimension,
+ * as long as it's the last in the current joinlist (after some relations
+ * were already moved to list of dimensions).
+ *
+ * To do this we walk the joinlist multiple times, and in each iteration
+ * we try to identify as many dimensions as possible. We walk the list in
+ * reverse, and we add dimensions to the beginning of the list. This way
+ * we preserve the original syntactic join order. If we find no dimensions
+ * in a loop, we're done.
  */
 List *
 starjoin_adjust_joins(PlannerInfo *root, List *joinlist)
@@ -2961,6 +2984,8 @@ starjoin_adjust_joins(PlannerInfo *root, List *joinlist)
 	List	   *newlist = NIL;
 	List	   *dimensions = NIL;
 	int			nlist = list_length(joinlist);
+	int			nitems;
+	Node	  **items;
 
 	/* Do nothing if starjoin optimization not enabled. */
 	if (!enable_starjoin_join_search)
@@ -2978,6 +3003,15 @@ starjoin_adjust_joins(PlannerInfo *root, List *joinlist)
 		(nlist == 1 && !IsA(linitial(joinlist), List)))
 		return joinlist;
 
+	/* expand the list into an array, to make backwards processing easier */
+	items = palloc_array(Node *, nlist);
+
+	nitems = 0;
+	foreach(lc, joinlist)
+	{
+		items[nitems++] = (Node *) lfirst(lc);
+	}
+
 	/*
 	 * Process the current join problem - split the elements into dimensions
 	 * and non-dimensions. If there are dimensions, add them back at the end,
@@ -2989,6 +3023,9 @@ starjoin_adjust_joins(PlannerInfo *root, List *joinlist)
 	 * to check if it's a dimension. Other types of elements are just added
 	 * back to the list as-is.
 	 *
+	 * Walk the list backwards, to preserve syntactic join order. This allows
+	 * tracking "last" relation. If we find no dimension, we're done.
+	 *
 	 * XXX I think we need to be careful to keep the order of the list (for
 	 * the non-dimension entries). The join_search_one_level() relies on that
 	 * when handling join order restrictions.
@@ -2998,47 +3035,94 @@ starjoin_adjust_joins(PlannerInfo *root, List *joinlist)
 	 * something they don't need. A mutable iterator might be a way, but I'm
 	 * not sure how expensive this really is.
 	 */
-	foreach(lc, joinlist)
+	for (;;)
 	{
-		Node	   *item = (Node *) lfirst(lc);
+		bool		found = false;	/* found at least one dimension */
+		bool		last = true;	/* is this the current last rel */
 
-		/* a separate join search problem, handle it recursively */
-		if (IsA(item, List))
+		for (int i = (nitems - 1); i >= 0; i--)
 		{
-			newlist = lappend(newlist,
-							  starjoin_adjust_joins(root, (List *) item));
-			continue;
+			Node	   *item = items[i];
+
+			/* skip empty items (already moved to dimensions) */
+			if (item == NULL)
+				continue;
+
+			/* do nothing about join subproblems, leave them in place */
+			if (IsA(item, List))
+			{
+				/* XXX do we need to disable "false" for join subtree? */
+				last = false;
+				continue;
+			}
+
+			/*
+			 * If it's not a List, it has to be a RangeTblRef - jinlists can't
+			 * contain any other elements (see make_rel_from_joinlist).
+			 */
+			Assert(IsA(item, RangeTblRef));
+
+			/*
+			 * Is it a dimension?
+			 *
+			 * An entry representing a baserel. If it's a dimension, save it
+			 * in a separate list, and we'll add it at the "top" of the join
+			 * at the end. Otherwise add it to the list just like other
+			 * elements.
+			 *
+			 * We do this only when the joinlist has at least 3 items. For
+			 * fewer rels the optimization does not matter, there's only a
+			 * single join order anyway. That only skips the optimization on
+			 * this level - we still do the recursion, and that might hit a
+			 * larger join problem.
+			 *
+			 * XXX If we decide to treat the rel as a dimension, don't update
+			 * the "last" flag. The next relation will be the last one.
+			 *
+			 * XXX We might have a new GUC to customize the cutoff limit, but
+			 * for now it seems good enough to do it whenever applicable. If
+			 * we find it's not worth it for less than N rels, we can add it
+			 * later.
+			 */
+			if ((nlist >= 3) &&
+				starjoin_is_dimension(root, (RangeTblRef *) item, last))
+			{
+				/* add it to the beginning of the list */
+				dimensions = lcons(item, dimensions);
+				items[i] = NULL;
+				found = true;
+				continue;
+			}
+
+			/*
+			 * Not a dimension. Leave it in the array, but remember the next
+			 * item (backwards) is no longer the last one.
+			 *
+			 * XXX Maybe we don't need to reset "last" if the item does not
+			 * have join restrictions?
+			 */
+			last = false;
 		}
 
-		/*
-		 * If it's not a List, it has to be a RangeTblRef - jinlists can't
-		 * contain any other elements (see make_rel_from_joinlist).
-		 */
-		Assert(IsA(item, RangeTblRef));
+		/* terminate when a loop finds no dimension */
+		if (!found)
+			break;
+	}
 
-		/*
-		 * An entry representing a baserel. If it's a dimension, save it in a
-		 * separate list, and we'll add it at the "top" of the join at the
-		 * end. Otherwise add it to the list just like other elements.
-		 *
-		 * We do this only when the joinlist has at least 3 items. For fewer
-		 * rels the optimization does not matter, there's only a single join
-		 * order anyway. That only skips the optimization on this level - we
-		 * still do the recursion, and that might hit a larger join problem.
-		 *
-		 * XXX We might have a new GUC to customize the cutoff limit, but for
-		 * now it seems good enough to do it whenever applicable. If we find
-		 * it's not worth it for less than N rels, we can add it later.
-		 */
-		if ((nlist >= 3) &&
-			starjoin_is_dimension(root, (RangeTblRef *) item))
-		{
-			dimensions = lappend(dimensions, item);
+	/*
+	 * Add items remaining in the input array to the newlist. We need to do
+	 * this every time, even without dimensions, because we need to recurse to
+	 * the nested join problems.
+	 */
+	for (int i = 0; i < nitems; i++)
+	{
+		if (items[i] == NULL)
 			continue;
-		}
 
-		/* not a dimension, add it to the list directly */
-		newlist = lappend(newlist, item);
+		if (IsA(items[i], List))
+			items[i] = (Node *) starjoin_adjust_joins(root, (List *) items[i]);
+
+		newlist = lappend(newlist, items[i]);
 	}
 
 	/*
-- 
2.51.1

