commit b98559d7c9a576a306b3367eb967ddda6ae18a85
Author: Tom Lane <tgl@sss.pgh.pa.us>
Date:   Thu Oct 27 14:56:08 2022 -0400

    Add Var.varnullingrels and PlaceHolderVar.phnullingrels fields.
    
    These fields are always empty as of this commit, so they don't
    affect any behavior, even though equal() will compare them.
    
    Update backend/nodes/ and backend/rewrite/ infrastructure as needed.
    Also add some rewrite functions we'll need later.
    
    Note this will require a catversion bump when committed.

diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c
index c85d8fe975..cced668f58 100644
--- a/src/backend/nodes/makefuncs.c
+++ b/src/backend/nodes/makefuncs.c
@@ -80,11 +80,13 @@ makeVar(int varno,
 	var->varlevelsup = varlevelsup;
 
 	/*
-	 * Only a few callers need to make Var nodes with varnosyn/varattnosyn
-	 * different from varno/varattno.  We don't provide separate arguments for
-	 * them, but just initialize them to the given varno/varattno.  This
-	 * reduces code clutter and chance of error for most callers.
+	 * Only a few callers need to make Var nodes with non-null varnullingrels,
+	 * or with varnosyn/varattnosyn different from varno/varattno.  We don't
+	 * provide separate arguments for them, but just initialize them to NULL
+	 * and the given varno/varattno.  This reduces code clutter and chance of
+	 * error for most callers.
 	 */
+	var->varnullingrels = NULL;
 	var->varnosyn = (Index) varno;
 	var->varattnosyn = varattno;
 
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index 0a7b22f97e..692f45daa5 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -2663,6 +2663,7 @@ expression_tree_mutator_impl(Node *node,
 				Var		   *newnode;
 
 				FLATCOPY(newnode, var, Var);
+				/* Assume we need not copy the varnullingrels bitmapset */
 				return (Node *) newnode;
 			}
 			break;
@@ -3257,7 +3258,7 @@ expression_tree_mutator_impl(Node *node,
 
 				FLATCOPY(newnode, phv, PlaceHolderVar);
 				MUTATE(newnode->phexpr, phv->phexpr, Expr *);
-				/* Assume we need not copy the relids bitmapset */
+				/* Assume we need not copy the relids bitmapsets */
 				return (Node *) newnode;
 			}
 			break;
diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c
index 101c39553a..ab24547e6d 100644
--- a/src/backend/rewrite/rewriteManip.c
+++ b/src/backend/rewrite/rewriteManip.c
@@ -40,6 +40,20 @@ typedef struct
 	int			win_location;
 } locate_windowfunc_context;
 
+typedef struct
+{
+	const Bitmapset *target_relids;
+	const Bitmapset *added_relids;
+	int			sublevels_up;
+} add_nulling_relids_context;
+
+typedef struct
+{
+	const Bitmapset *removable_relids;
+	const Bitmapset *except_relids;
+	int			sublevels_up;
+} remove_nulling_relids_context;
+
 static bool contain_aggs_of_level_walker(Node *node,
 										 contain_aggs_of_level_context *context);
 static bool locate_agg_of_level_walker(Node *node,
@@ -50,6 +64,10 @@ static bool locate_windowfunc_walker(Node *node,
 static bool checkExprHasSubLink_walker(Node *node, void *context);
 static Relids offset_relid_set(Relids relids, int offset);
 static Relids adjust_relid_set(Relids relids, int oldrelid, int newrelid);
+static Node *add_nulling_relids_mutator(Node *node,
+										add_nulling_relids_context *context);
+static Node *remove_nulling_relids_mutator(Node *node,
+										   remove_nulling_relids_context *context);
 
 
 /*
@@ -348,6 +366,8 @@ OffsetVarNodes_walker(Node *node, OffsetVarNodes_context *context)
 		if (var->varlevelsup == context->sublevels_up)
 		{
 			var->varno += context->offset;
+			var->varnullingrels = offset_relid_set(var->varnullingrels,
+												   context->offset);
 			if (var->varnosyn > 0)
 				var->varnosyn += context->offset;
 		}
@@ -386,6 +406,8 @@ OffsetVarNodes_walker(Node *node, OffsetVarNodes_context *context)
 		{
 			phv->phrels = offset_relid_set(phv->phrels,
 										   context->offset);
+			phv->phnullingrels = offset_relid_set(phv->phnullingrels,
+												  context->offset);
 		}
 		/* fall through to examine children */
 	}
@@ -510,11 +532,13 @@ ChangeVarNodes_walker(Node *node, ChangeVarNodes_context *context)
 	{
 		Var		   *var = (Var *) node;
 
-		if (var->varlevelsup == context->sublevels_up &&
-			var->varno == context->rt_index)
+		if (var->varlevelsup == context->sublevels_up)
 		{
-			var->varno = context->new_index;
-			/* If the syntactic referent is same RTE, fix it too */
+			if (var->varno == context->rt_index)
+				var->varno = context->new_index;
+			var->varnullingrels = adjust_relid_set(var->varnullingrels,
+												   context->rt_index,
+												   context->new_index);
 			if (var->varnosyn == context->rt_index)
 				var->varnosyn = context->new_index;
 		}
@@ -557,6 +581,9 @@ ChangeVarNodes_walker(Node *node, ChangeVarNodes_context *context)
 			phv->phrels = adjust_relid_set(phv->phrels,
 										   context->rt_index,
 										   context->new_index);
+			phv->phnullingrels = adjust_relid_set(phv->phnullingrels,
+												  context->rt_index,
+												  context->new_index);
 		}
 		/* fall through to examine children */
 	}
@@ -833,7 +860,8 @@ rangeTableEntry_used_walker(Node *node,
 		Var		   *var = (Var *) node;
 
 		if (var->varlevelsup == context->sublevels_up &&
-			var->varno == context->rt_index)
+			(var->varno == context->rt_index ||
+			 bms_is_member(context->rt_index, var->varnullingrels)))
 			return true;
 		return false;
 	}
@@ -1061,6 +1089,195 @@ AddInvertedQual(Query *parsetree, Node *qual)
 }
 
 
+/*
+ * add_nulling_relids() finds Vars and PlaceHolderVars that belong to any
+ * of the target_relids, and adds added_relids to their varnullingrels
+ * and phnullingrels fields.
+ */
+Node *
+add_nulling_relids(Node *node,
+				   const Bitmapset *target_relids,
+				   const Bitmapset *added_relids)
+{
+	add_nulling_relids_context context;
+
+	context.target_relids = target_relids;
+	context.added_relids = added_relids;
+	context.sublevels_up = 0;
+	return query_or_expression_tree_mutator(node,
+											add_nulling_relids_mutator,
+											&context,
+											0);
+}
+
+static Node *
+add_nulling_relids_mutator(Node *node,
+						   add_nulling_relids_context *context)
+{
+	if (node == NULL)
+		return NULL;
+	if (IsA(node, Var))
+	{
+		Var		   *var = (Var *) node;
+
+		if (var->varlevelsup == context->sublevels_up &&
+			bms_is_member(var->varno, context->target_relids))
+		{
+			Relids		newnullingrels = bms_union(var->varnullingrels,
+												   context->added_relids);
+
+			/* Copy the Var ... */
+			var = copyObject(var);
+			/* ... and replace the copy's varnullingrels field */
+			var->varnullingrels = newnullingrels;
+			return (Node *) var;
+		}
+		/* Otherwise fall through to copy the Var normally */
+	}
+	else if (IsA(node, PlaceHolderVar))
+	{
+		PlaceHolderVar *phv = (PlaceHolderVar *) node;
+
+		if (phv->phlevelsup == context->sublevels_up &&
+			bms_overlap(phv->phrels, context->target_relids))
+		{
+			Relids		newnullingrels = bms_union(phv->phnullingrels,
+												   context->added_relids);
+
+			/*
+			 * We don't modify the contents of the PHV's expression, only add
+			 * to phnullingrels.  This corresponds to assuming that the PHV
+			 * will be evaluated at the same level as before, then perhaps be
+			 * nulled as it bubbles up.  Hence, just flat-copy the node ...
+			 */
+			phv = makeNode(PlaceHolderVar);
+			memcpy(phv, node, sizeof(PlaceHolderVar));
+			/* ... and replace the copy's phnullingrels field */
+			phv->phnullingrels = newnullingrels;
+			return (Node *) phv;
+		}
+		/* Otherwise fall through to copy the PlaceHolderVar normally */
+	}
+	else if (IsA(node, Query))
+	{
+		/* Recurse into RTE or sublink subquery */
+		Query	   *newnode;
+
+		context->sublevels_up++;
+		newnode = query_tree_mutator((Query *) node,
+									 add_nulling_relids_mutator,
+									 (void *) context,
+									 0);
+		context->sublevels_up--;
+		return (Node *) newnode;
+	}
+	return expression_tree_mutator(node, add_nulling_relids_mutator,
+								   (void *) context);
+}
+
+/*
+ * remove_nulling_relids() removes mentions of the specified RT index(es)
+ * in Var.varnullingrels and PlaceHolderVar.phnullingrels fields within
+ * the given expression, except in nodes belonging to rels listed in
+ * except_relids.
+ */
+Node *
+remove_nulling_relids(Node *node,
+					  const Bitmapset *removable_relids,
+					  const Bitmapset *except_relids)
+{
+	remove_nulling_relids_context context;
+
+	context.removable_relids = removable_relids;
+	context.except_relids = except_relids;
+	context.sublevels_up = 0;
+	return query_or_expression_tree_mutator(node,
+											remove_nulling_relids_mutator,
+											&context,
+											0);
+}
+
+static Node *
+remove_nulling_relids_mutator(Node *node,
+							  remove_nulling_relids_context *context)
+{
+	if (node == NULL)
+		return NULL;
+	if (IsA(node, Var))
+	{
+		Var		   *var = (Var *) node;
+
+		if (var->varlevelsup == context->sublevels_up &&
+			!bms_is_member(var->varno, context->except_relids) &&
+			bms_overlap(var->varnullingrels, context->removable_relids))
+		{
+			Relids		newnullingrels = bms_difference(var->varnullingrels,
+														context->removable_relids);
+
+			/* Micro-optimization: ensure nullingrels is NULL if empty */
+			if (bms_is_empty(newnullingrels))
+				newnullingrels = NULL;
+			/* Copy the Var ... */
+			var = copyObject(var);
+			/* ... and replace the copy's varnullingrels field */
+			var->varnullingrels = newnullingrels;
+			return (Node *) var;
+		}
+		/* Otherwise fall through to copy the Var normally */
+	}
+	else if (IsA(node, PlaceHolderVar))
+	{
+		PlaceHolderVar *phv = (PlaceHolderVar *) node;
+
+		if (phv->phlevelsup == context->sublevels_up &&
+			!bms_overlap(phv->phrels, context->except_relids))
+		{
+			Relids		newnullingrels = bms_difference(phv->phnullingrels,
+														context->removable_relids);
+
+			/*
+			 * Micro-optimization: ensure nullingrels is NULL if empty.
+			 *
+			 * Note: it might seem desirable to remove the PHV altogether if
+			 * phnullingrels goes to empty.  Currently we dare not do that
+			 * because we use PHVs in some cases to enforce separate identity
+			 * of subexpressions; see wrap_non_vars usages in prepjointree.c.
+			 */
+			if (bms_is_empty(newnullingrels))
+				newnullingrels = NULL;
+			/* Copy the PlaceHolderVar and mutate what's below ... */
+			phv = (PlaceHolderVar *)
+				expression_tree_mutator(node,
+										remove_nulling_relids_mutator,
+										(void *) context);
+			/* ... and replace the copy's phnullingrels field */
+			phv->phnullingrels = newnullingrels;
+			/* We must also update phrels, if it contains a removable RTI */
+			phv->phrels = bms_difference(phv->phrels,
+										 context->removable_relids);
+			Assert(!bms_is_empty(phv->phrels));
+			return (Node *) phv;
+		}
+		/* Otherwise fall through to copy the PlaceHolderVar normally */
+	}
+	else if (IsA(node, Query))
+	{
+		/* Recurse into RTE or sublink subquery */
+		Query	   *newnode;
+
+		context->sublevels_up++;
+		newnode = query_tree_mutator((Query *) node,
+									 remove_nulling_relids_mutator,
+									 (void *) context,
+									 0);
+		context->sublevels_up--;
+		return (Node *) newnode;
+	}
+	return expression_tree_mutator(node, remove_nulling_relids_mutator,
+								   (void *) context);
+}
+
+
 /*
  * replace_rte_variables() finds all Vars in an expression tree
  * that reference a particular RTE, and replaces them with substitute
diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c
index a8508463e7..c0b254fc6e 100644
--- a/src/backend/utils/misc/queryjumble.c
+++ b/src/backend/utils/misc/queryjumble.c
@@ -383,6 +383,11 @@ JumbleExpr(JumbleState *jstate, Node *node)
 				APP_JUMB(var->varno);
 				APP_JUMB(var->varattno);
 				APP_JUMB(var->varlevelsup);
+
+				/*
+				 * We can omit varnullingrels, because it's fully determined
+				 * by varno/varlevelsup plus the Var's query location.
+				 */
 			}
 			break;
 		case T_Const:
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 09342d128d..2c6d5ca58f 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -874,7 +874,7 @@ typedef struct RelOptInfo
 	int32	   *attr_widths pg_node_attr(read_write_ignore);
 	/* LATERAL Vars and PHVs referenced by rel */
 	List	   *lateral_vars;
-	/* rels that reference me laterally */
+	/* rels that reference this baserel laterally */
 	Relids		lateral_referencers;
 	/* list of IndexOptInfo */
 	List	   *indexlist;
@@ -884,10 +884,7 @@ typedef struct RelOptInfo
 	BlockNumber pages;
 	Cardinality tuples;
 	double		allvisfrac;
-
-	/*
-	 * Indexes in PlannerInfo's eq_classes list of ECs that mention this rel
-	 */
+	/* indexes in PlannerInfo's eq_classes list of ECs that mention this rel */
 	Bitmapset  *eclass_indexes;
 	PlannerInfo *subroot;		/* if subquery */
 	List	   *subplan_params; /* if subquery */
@@ -2586,10 +2583,15 @@ typedef struct MergeScanSelCache
  * of a plan tree.  This is used during planning to represent the contained
  * expression.  At the end of the planning process it is replaced by either
  * the contained expression or a Var referring to a lower-level evaluation of
- * the contained expression.  Typically the evaluation occurs below an outer
+ * the contained expression.  Generally the evaluation occurs below an outer
  * join, and Var references above the outer join might thereby yield NULL
  * instead of the expression value.
  *
+ * phrels and phlevelsup correspond to the varno/varlevelsup fields of a
+ * plain Var, except that phrels has to be a relid set since the evaluation
+ * level of a PlaceHolderVar might be a join rather than a base relation.
+ * Likewise, phnullingrels corresponds to varnullingrels.
+ *
  * Although the planner treats this as an expression node type, it is not
  * recognized by the parser or executor, so we declare it here rather than
  * in primnodes.h.
@@ -2602,8 +2604,10 @@ typedef struct MergeScanSelCache
  * PHV.  Another way in which it can happen is that initplan sublinks
  * could get replaced by differently-numbered Params when sublink folding
  * is done.  (The end result of such a situation would be some
- * unreferenced initplans, which is annoying but not really a problem.) On
- * the same reasoning, there is no need to examine phrels.
+ * unreferenced initplans, which is annoying but not really a problem.)
+ * On the same reasoning, there is no need to examine phrels.  But we do
+ * need to compare phnullingrels, as that represents effects that are
+ * external to the original value of the PHV.
  */
 
 typedef struct PlaceHolderVar
@@ -2613,9 +2617,12 @@ typedef struct PlaceHolderVar
 	/* the represented expression */
 	Expr	   *phexpr pg_node_attr(equal_ignore);
 
-	/* base relids syntactically within expr src */
+	/* base+OJ relids syntactically within expr src */
 	Relids		phrels pg_node_attr(equal_ignore);
 
+	/* RT indexes of outer joins that can null PHV's value */
+	Relids		phnullingrels;
+
 	/* ID for PHV (unique within planner run) */
 	Index		phid;
 
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index f71f551782..8f133e12ac 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -180,6 +180,14 @@ typedef struct Expr
  * row identity information during UPDATE/DELETE/MERGE.  This value should
  * never be seen outside the planner.
  *
+ * varnullingrels is the set of RT indexes of outer joins that can force
+ * the Var's value to null (at the point where it appears in the query).
+ * See optimizer/README for discussion of that.
+ *
+ * varlevelsup is greater than zero in Vars that represent outer references.
+ * Note that it affects the meaning of all of varno, varnullingrels, and
+ * varnosyn, all of which refer to the range table of that query level.
+ *
  * In the parser, varnosyn and varattnosyn are either identical to
  * varno/varattno, or they specify the column's position in an aliased JOIN
  * RTE that hides the semantic referent RTE's refname.  This is a syntactic
@@ -222,6 +230,8 @@ typedef struct Var
 	int32		vartypmod;
 	/* OID of collation, or InvalidOid if none */
 	Oid			varcollid;
+	/* RT indexes of outer joins that can replace the Var's value with null */
+	Bitmapset  *varnullingrels;
 
 	/*
 	 * for subquery variables referencing outer relations; 0 in a normal var,
diff --git a/src/include/rewrite/rewriteManip.h b/src/include/rewrite/rewriteManip.h
index f001ca41bb..351ec15612 100644
--- a/src/include/rewrite/rewriteManip.h
+++ b/src/include/rewrite/rewriteManip.h
@@ -63,6 +63,13 @@ extern bool contain_windowfuncs(Node *node);
 extern int	locate_windowfunc(Node *node);
 extern bool checkExprHasSubLink(Node *node);
 
+extern Node *add_nulling_relids(Node *node,
+								const Bitmapset *target_relids,
+								const Bitmapset *added_relids);
+extern Node *remove_nulling_relids(Node *node,
+								   const Bitmapset *removable_relids,
+								   const Bitmapset *except_relids);
+
 extern Node *replace_rte_variables(Node *node,
 								   int target_varno, int sublevels_up,
 								   replace_rte_variables_callback callback,
