From 04219da3cbf5be5e1e6243dc3615a57a3925c7de Mon Sep 17 00:00:00 2001
From: Antonin Houska <ah@cybertec.at>
Date: Fri, 4 Nov 2022 15:02:57 +0100
Subject: [PATCH 1/3] Introduce RelInfoList structure.

This patch puts join_rel_list and join_rel_hash fields of PlannerInfo
structure into a new structure RelInfoList. It also adjusts add_join_rel() and
find_join_rel() functions so they only call add_rel_info() and find_rel_info()
respectively.

fetch_upper_rel() now uses the new API and the hash table as well because the
list stored in root->upper_rels[UPPERREL_PARTIAL_GROUP_AGG] will contain many
relations as soon as the aggregate push-down feature is added.
---
 contrib/postgres_fdw/postgres_fdw.c    |   3 +-
 src/backend/optimizer/geqo/geqo_eval.c |  12 +-
 src/backend/optimizer/plan/planmain.c  |   3 +-
 src/backend/optimizer/util/relnode.c   | 170 ++++++++++++++-----------
 src/include/nodes/pathnodes.h          |  31 +++--
 5 files changed, 126 insertions(+), 93 deletions(-)

diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 8d7500abfb..bb1125e57c 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -5777,7 +5777,8 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype,
 	 */
 	Assert(fpinfo->relation_index == 0);	/* shouldn't be set yet */
 	fpinfo->relation_index =
-		list_length(root->parse->rtable) + list_length(root->join_rel_list);
+		list_length(root->parse->rtable) +
+		list_length(root->join_rel_list->items);
 
 	return true;
 }
diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c
index 004481d608..7ad0baaa0f 100644
--- a/src/backend/optimizer/geqo/geqo_eval.c
+++ b/src/backend/optimizer/geqo/geqo_eval.c
@@ -92,11 +92,11 @@ geqo_eval(PlannerInfo *root, Gene *tour, int num_gene)
 	 *
 	 * join_rel_level[] shouldn't be in use, so just Assert it isn't.
 	 */
-	savelength = list_length(root->join_rel_list);
-	savehash = root->join_rel_hash;
+	savelength = list_length(root->join_rel_list->items);
+	savehash = root->join_rel_list->hash;
 	Assert(root->join_rel_level == NULL);
 
-	root->join_rel_hash = NULL;
+	root->join_rel_list->hash = NULL;
 
 	/* construct the best path for the given combination of relations */
 	joinrel = gimme_tree(root, tour, num_gene);
@@ -121,9 +121,9 @@ geqo_eval(PlannerInfo *root, Gene *tour, int num_gene)
 	 * Restore join_rel_list to its former state, and put back original
 	 * hashtable if any.
 	 */
-	root->join_rel_list = list_truncate(root->join_rel_list,
-										savelength);
-	root->join_rel_hash = savehash;
+	root->join_rel_list->items = list_truncate(root->join_rel_list->items,
+											   savelength);
+	root->join_rel_list->hash = savehash;
 
 	/* release all the memory acquired within gimme_tree */
 	MemoryContextSwitchTo(oldcxt);
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index 63deed27c9..55de28f073 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -65,8 +65,7 @@ query_planner(PlannerInfo *root,
 	 * NOTE: append_rel_list was set up by subquery_planner, so do not touch
 	 * here.
 	 */
-	root->join_rel_list = NIL;
-	root->join_rel_hash = NULL;
+	root->join_rel_list = makeNode(RelInfoList);
 	root->join_rel_level = NULL;
 	root->join_cur_level = 0;
 	root->canon_pathkeys = NIL;
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 1786a3dadd..c75d2d1f19 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -32,11 +32,15 @@
 #include "utils/lsyscache.h"
 
 
-typedef struct JoinHashEntry
+/*
+ * An entry of a hash table that we use to make lookup for RelOptInfo
+ * structures more efficient.
+ */
+typedef struct RelInfoEntry
 {
-	Relids		join_relids;	/* hash key --- MUST BE FIRST */
-	RelOptInfo *join_rel;
-} JoinHashEntry;
+	Relids		relids;			/* hash key --- MUST BE FIRST */
+	void	   *data;
+} RelInfoEntry;
 
 static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
 								RelOptInfo *input_rel);
@@ -389,11 +393,11 @@ find_base_rel(PlannerInfo *root, int relid)
 }
 
 /*
- * build_join_rel_hash
- *	  Construct the auxiliary hash table for join relations.
+ * build_rel_hash
+ *	  Construct the auxiliary hash table for relation specific data.
  */
 static void
-build_join_rel_hash(PlannerInfo *root)
+build_rel_hash(RelInfoList *list)
 {
 	HTAB	   *hashtab;
 	HASHCTL		hash_ctl;
@@ -401,47 +405,49 @@ build_join_rel_hash(PlannerInfo *root)
 
 	/* Create the hash table */
 	hash_ctl.keysize = sizeof(Relids);
-	hash_ctl.entrysize = sizeof(JoinHashEntry);
+	hash_ctl.entrysize = sizeof(RelInfoEntry);
 	hash_ctl.hash = bitmap_hash;
 	hash_ctl.match = bitmap_match;
 	hash_ctl.hcxt = CurrentMemoryContext;
-	hashtab = hash_create("JoinRelHashTable",
+	hashtab = hash_create("RelHashTable",
 						  256L,
 						  &hash_ctl,
 						  HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
 
 	/* Insert all the already-existing joinrels */
-	foreach(l, root->join_rel_list)
+	foreach(l, list->items)
 	{
-		RelOptInfo *rel = (RelOptInfo *) lfirst(l);
-		JoinHashEntry *hentry;
+		RelOptInfo	   *rel = lfirst_node(RelOptInfo, l);
+		RelInfoEntry *hentry;
 		bool		found;
 
-		hentry = (JoinHashEntry *) hash_search(hashtab,
-											   &(rel->relids),
-											   HASH_ENTER,
-											   &found);
+		hentry = (RelInfoEntry *) hash_search(hashtab,
+											  &rel->relids,
+											  HASH_ENTER,
+											  &found);
 		Assert(!found);
-		hentry->join_rel = rel;
+		hentry->data = rel;
 	}
 
-	root->join_rel_hash = hashtab;
+	list->hash = hashtab;
 }
 
 /*
- * find_join_rel
- *	  Returns relation entry corresponding to 'relids' (a set of RT indexes),
- *	  or NULL if none exists.  This is for join relations.
+ * find_rel_info
+ *	  Find a base or join relation entry.
  */
-RelOptInfo *
-find_join_rel(PlannerInfo *root, Relids relids)
+static void *
+find_rel_info(RelInfoList *list, Relids relids)
 {
+	if (list == NULL)
+		return NULL;
+
 	/*
 	 * Switch to using hash lookup when list grows "too long".  The threshold
 	 * is arbitrary and is known only here.
 	 */
-	if (!root->join_rel_hash && list_length(root->join_rel_list) > 32)
-		build_join_rel_hash(root);
+	if (!list->hash && list_length(list->items) > 32)
+		build_rel_hash(list);
 
 	/*
 	 * Use either hashtable lookup or linear search, as appropriate.
@@ -451,34 +457,82 @@ find_join_rel(PlannerInfo *root, Relids relids)
 	 * so would force relids out of a register and thus probably slow down the
 	 * list-search case.
 	 */
-	if (root->join_rel_hash)
+	if (list->hash)
 	{
 		Relids		hashkey = relids;
-		JoinHashEntry *hentry;
+		RelInfoEntry *hentry;
 
-		hentry = (JoinHashEntry *) hash_search(root->join_rel_hash,
-											   &hashkey,
-											   HASH_FIND,
-											   NULL);
+		hentry = (RelInfoEntry *) hash_search(list->hash,
+											  &hashkey,
+											  HASH_FIND,
+											  NULL);
 		if (hentry)
-			return hentry->join_rel;
+			return hentry->data;
 	}
 	else
 	{
 		ListCell   *l;
 
-		foreach(l, root->join_rel_list)
+		foreach(l, list->items)
 		{
-			RelOptInfo *rel = (RelOptInfo *) lfirst(l);
+			RelOptInfo   *item = lfirst_node(RelOptInfo, l);
 
-			if (bms_equal(rel->relids, relids))
-				return rel;
+			if (bms_equal(item->relids, relids))
+				return item;
 		}
 	}
 
 	return NULL;
 }
 
+/*
+ * find_join_rel
+ *	  Returns relation entry corresponding to 'relids' (a set of RT indexes),
+ *	  or NULL if none exists.  This is for join relations.
+ */
+RelOptInfo *
+find_join_rel(PlannerInfo *root, Relids relids)
+{
+	return (RelOptInfo *) find_rel_info(root->join_rel_list, relids);
+}
+
+/*
+ * add_rel_info
+ *		Add relation specific info to a list, and also add it to the auxiliary
+ *		hashtable if there is one.
+ */
+static void
+add_rel_info(RelInfoList *list, RelOptInfo *rel)
+{
+	/* GEQO requires us to append the new joinrel to the end of the list! */
+	list->items = lappend(list->items, rel);
+
+	/* store it into the auxiliary hashtable if there is one. */
+	if (list->hash)
+	{
+		RelInfoEntry *hentry;
+		bool		found;
+
+		hentry = (RelInfoEntry *) hash_search(list->hash,
+											  &rel->relids,
+											  HASH_ENTER,
+											  &found);
+		Assert(!found);
+		hentry->data = rel;
+	}
+}
+
+/*
+ * add_join_rel
+ *		Add given join relation to the list of join relations in the given
+ *		PlannerInfo.
+ */
+static void
+add_join_rel(PlannerInfo *root, RelOptInfo *joinrel)
+{
+	add_rel_info(root->join_rel_list, joinrel);
+}
+
 /*
  * set_foreign_rel_properties
  *		Set up foreign-join fields if outer and inner relation are foreign
@@ -529,32 +583,6 @@ set_foreign_rel_properties(RelOptInfo *joinrel, RelOptInfo *outer_rel,
 	}
 }
 
-/*
- * add_join_rel
- *		Add given join relation to the list of join relations in the given
- *		PlannerInfo. Also add it to the auxiliary hashtable if there is one.
- */
-static void
-add_join_rel(PlannerInfo *root, RelOptInfo *joinrel)
-{
-	/* GEQO requires us to append the new joinrel to the end of the list! */
-	root->join_rel_list = lappend(root->join_rel_list, joinrel);
-
-	/* store it into the auxiliary hashtable if there is one. */
-	if (root->join_rel_hash)
-	{
-		JoinHashEntry *hentry;
-		bool		found;
-
-		hentry = (JoinHashEntry *) hash_search(root->join_rel_hash,
-											   &(joinrel->relids),
-											   HASH_ENTER,
-											   &found);
-		Assert(!found);
-		hentry->join_rel = joinrel;
-	}
-}
-
 /*
  * build_join_rel
  *	  Returns relation entry corresponding to the union of two given rels,
@@ -1223,22 +1251,14 @@ subbuild_joinrel_joinlist(RelOptInfo *joinrel,
 RelOptInfo *
 fetch_upper_rel(PlannerInfo *root, UpperRelationKind kind, Relids relids)
 {
+	RelInfoList *list = &root->upper_rels[kind];
 	RelOptInfo *upperrel;
-	ListCell   *lc;
-
-	/*
-	 * For the moment, our indexing data structure is just a List for each
-	 * relation kind.  If we ever get so many of one kind that this stops
-	 * working well, we can improve it.  No code outside this function should
-	 * assume anything about how to find a particular upperrel.
-	 */
 
 	/* If we already made this upperrel for the query, return it */
-	foreach(lc, root->upper_rels[kind])
+	if (list)
 	{
-		upperrel = (RelOptInfo *) lfirst(lc);
-
-		if (bms_equal(upperrel->relids, relids))
+		upperrel = find_rel_info(list, relids);
+		if (upperrel)
 			return upperrel;
 	}
 
@@ -1257,7 +1277,7 @@ fetch_upper_rel(PlannerInfo *root, UpperRelationKind kind, Relids relids)
 	upperrel->cheapest_unique_path = NULL;
 	upperrel->cheapest_parameterized_paths = NIL;
 
-	root->upper_rels[kind] = lappend(root->upper_rels[kind], upperrel);
+	add_rel_info(&root->upper_rels[kind], upperrel);
 
 	return upperrel;
 }
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 09342d128d..0ca7d5ab51 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -80,6 +80,25 @@ typedef enum UpperRelationKind
 	/* NB: UPPERREL_FINAL must be last enum entry; it's used to size arrays */
 } UpperRelationKind;
 
+/*
+ * Hashed list to store relation specific info and to retrieve it by relids.
+ *
+ * For small problems we just scan the list to do lookups, but when there are
+ * many relations we build a hash table for faster lookups. The hash table is
+ * present and valid when rel_hash is not NULL.  Note that we still maintain
+ * the list even when using the hash table for lookups; this simplifies life
+ * for GEQO.
+ */
+typedef struct RelInfoList
+{
+	pg_node_attr(no_copy_equal, no_read)
+
+	NodeTag		type;
+
+	List	   *items;
+	struct HTAB *hash pg_node_attr(read_write_ignore);
+} RelInfoList;
+
 /*----------
  * PlannerGlobal
  *		Global information for planning/optimization
@@ -260,15 +279,9 @@ struct PlannerInfo
 
 	/*
 	 * join_rel_list is a list of all join-relation RelOptInfos we have
-	 * considered in this planning run.  For small problems we just scan the
-	 * list to do lookups, but when there are many join relations we build a
-	 * hash table for faster lookups.  The hash table is present and valid
-	 * when join_rel_hash is not NULL.  Note that we still maintain the list
-	 * even when using the hash table for lookups; this simplifies life for
-	 * GEQO.
+	 * considered in this planning run.
 	 */
-	List	   *join_rel_list;
-	struct HTAB *join_rel_hash pg_node_attr(read_write_ignore);
+	struct RelInfoList *join_rel_list;	/* list of join-relation RelOptInfos */
 
 	/*
 	 * When doing a dynamic-programming-style join search, join_rel_level[k]
@@ -395,7 +408,7 @@ struct PlannerInfo
 	 * Upper-rel RelOptInfos. Use fetch_upper_rel() to get any particular
 	 * upper rel.
 	 */
-	List	   *upper_rels[UPPERREL_FINAL + 1] pg_node_attr(read_write_ignore);
+	RelInfoList	   upper_rels[UPPERREL_FINAL + 1] pg_node_attr(read_write_ignore);;
 
 	/* Result tlists chosen by grouping_planner for upper-stage processing */
 	struct PathTarget *upper_targets[UPPERREL_FINAL + 1] pg_node_attr(read_write_ignore);
-- 
2.31.1

