commit 4e2aa1ee681f751928083d50a9488a23703be16d
Author: Tom Lane <tgl@sss.pgh.pa.us>
Date:   Sat Nov 5 17:25:11 2022 -0400

    Fix flatten_join_alias_vars() to handle varnullingrels correctly.
    
    The remaining core regression test failures occur because
    flatten_join_alias_vars() isn't doing the right thing.  The
    alias Var it needs to replace may have acquired varnullingrels
    bits signifying the effect of upper outer joins, and if so we
    must preserve that information in the replacement expression.
    
    The simplest way to do that is to wrap the replacement expression
    in a PlaceHolderVar, and that's what we have to do in the general
    case where subquery pullup has mutated the replacement joinaliasvars
    entry into an arbitrary expression.  But in simpler cases, such as
    where the joinaliasvars entry is just a Var, we'd prefer to do it
    by merging the alias Var's varnullingrels into the replacement Var.
    In that way the flattened alias will compare equal() to semantically
    equivalent references that didn't use the alias name.
    
    Moreover, the parser also uses this code while checking certain
    semantic constraints, and in that context we *must not* generate
    PlaceHolderVars.  PHVs shouldn't appear in parse-time expressions,
    and adding one would certainly cause the parser to decide the
    query is invalid (because the result wouldn't compare equal() to
    what it needs to).  Fortunately, during parsing the set of possible
    contents of a joinaliasvars entry is quite constrained, so we can
    guarantee to apply the nullingrels info to the Vars therein.
    
    The result of this step passes all core regression tests, but there
    are still loose ends for FDWs (so that contrib/postgres_fdw will fail).

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index d52c2a3595..dc089306ae 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -901,7 +901,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 			 */
 			if (rte->lateral && root->hasJoinRTEs)
 				rte->subquery = (Query *)
-					flatten_join_alias_vars(root->parse,
+					flatten_join_alias_vars(root, root->parse,
 											(Node *) rte->subquery);
 		}
 		else if (rte->rtekind == RTE_FUNCTION)
@@ -1102,7 +1102,7 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
 		  kind == EXPRKIND_VALUES ||
 		  kind == EXPRKIND_TABLESAMPLE ||
 		  kind == EXPRKIND_TABLEFUNC))
-		expr = flatten_join_alias_vars(root->parse, expr);
+		expr = flatten_join_alias_vars(root, root->parse, expr);
 
 	/*
 	 * Simplify constant expressions.  For function RTEs, this was already
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 0a327e1d71..74fcca3d9a 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -1077,7 +1077,8 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 	 * maybe even in the rewriter; but for now let's just fix this case here.)
 	 */
 	subquery->targetList = (List *)
-		flatten_join_alias_vars(subroot->parse, (Node *) subquery->targetList);
+		flatten_join_alias_vars(subroot, subroot->parse,
+								(Node *) subquery->targetList);
 
 	/*
 	 * Adjust level-0 varnos in subquery so that we can append its rangetable
diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c
index 8d8c9136f8..69c2019553 100644
--- a/src/backend/optimizer/util/var.c
+++ b/src/backend/optimizer/util/var.c
@@ -62,6 +62,7 @@ typedef struct
 
 typedef struct
 {
+	PlannerInfo *root;			/* could be NULL! */
 	Query	   *query;			/* outer Query */
 	int			sublevels_up;
 	bool		possible_sublink;	/* could aliases include a SubLink? */
@@ -80,6 +81,10 @@ static bool pull_var_clause_walker(Node *node,
 								   pull_var_clause_context *context);
 static Node *flatten_join_alias_vars_mutator(Node *node,
 											 flatten_join_alias_vars_context *context);
+static Node *add_nullingrels_if_needed(PlannerInfo *root, Node *newnode,
+									   Var *oldvar);
+static bool is_standard_join_alias_expression(Node *newnode, Var *oldvar);
+static void adjust_standard_join_alias_expression(Node *newnode, Var *oldvar);
 static Relids alias_relid_set(Query *query, Relids relids);
 
 
@@ -722,26 +727,42 @@ pull_var_clause_walker(Node *node, pull_var_clause_context *context)
  *	  is the only way that the executor can directly handle whole-row Vars.
  *
  * This also adjusts relid sets found in some expression node types to
- * substitute the contained base rels for any join relid.
+ * substitute the contained base+OJ rels for any join relid.
  *
  * If a JOIN contains sub-selects that have been flattened, its join alias
  * entries might now be arbitrary expressions, not just Vars.  This affects
- * this function in one important way: we might find ourselves inserting
- * SubLink expressions into subqueries, and we must make sure that their
- * Query.hasSubLinks fields get set to true if so.  If there are any
+ * this function in two important ways.  First, we might find ourselves
+ * inserting SubLink expressions into subqueries, and we must make sure that
+ * their Query.hasSubLinks fields get set to true if so.  If there are any
  * SubLinks in the join alias lists, the outer Query should already have
  * hasSubLinks = true, so this is only relevant to un-flattened subqueries.
+ * Second, we have to preserve any varnullingrels info attached to the
+ * alias Vars we're replacing.  If the replacement expression is a Var or
+ * PlaceHolderVar or constructed from those, we can just add the
+ * varnullingrels bits to the existing nullingrels field(s); otherwise
+ * we have to add a PlaceHolderVar wrapper.
  *
- * NOTE: this is used on not-yet-planned expressions.  We do not expect it
- * to be applied directly to the whole Query, so if we see a Query to start
- * with, we do want to increment sublevels_up (this occurs for LATERAL
- * subqueries).
+ * NOTE: this is also used by the parser, to expand join alias Vars before
+ * checking GROUP BY validity.  For that use-case, root will be NULL, which
+ * is why we have to pass the Query separately.  We need the root itself only
+ * for making PlaceHolderVars.  We can avoid making PlaceHolderVars in the
+ * parser's usage because it won't be dealing with arbitrary expressions:
+ * so long as adjust_standard_join_alias_expression can handle everything
+ * the parser would make as a join alias expression, we're OK.
  */
 Node *
-flatten_join_alias_vars(Query *query, Node *node)
+flatten_join_alias_vars(PlannerInfo *root, Query *query, Node *node)
 {
 	flatten_join_alias_vars_context context;
 
+	/*
+	 * We do not expect this to be applied to the whole Query, only to
+	 * expressions or LATERAL subqueries.  Hence, if the top node is a Query,
+	 * it's okay to immediately increment sublevels_up.
+	 */
+	Assert(node != (Node *) query);
+
+	context.root = root;
 	context.query = query;
 	context.sublevels_up = 0;
 	/* flag whether join aliases could possibly contain SubLinks */
@@ -812,7 +833,9 @@ flatten_join_alias_vars_mutator(Node *node,
 			rowexpr->colnames = colnames;
 			rowexpr->location = var->location;
 
-			return (Node *) rowexpr;
+			/* Lastly, add any varnullingrels to the replacement expression */
+			return add_nullingrels_if_needed(context->root, (Node *) rowexpr,
+											 var);
 		}
 
 		/* Expand join alias reference */
@@ -839,7 +862,8 @@ flatten_join_alias_vars_mutator(Node *node,
 		if (context->possible_sublink && !context->inserted_sublink)
 			context->inserted_sublink = checkExprHasSubLink(newvar);
 
-		return newvar;
+		/* Lastly, add any varnullingrels to the replacement expression */
+		return add_nullingrels_if_needed(context->root, newvar, var);
 	}
 	if (IsA(node, PlaceHolderVar))
 	{
@@ -854,6 +878,7 @@ flatten_join_alias_vars_mutator(Node *node,
 		{
 			phv->phrels = alias_relid_set(context->query,
 										  phv->phrels);
+			/* we *don't* change phnullingrels */
 		}
 		return (Node *) phv;
 	}
@@ -887,9 +912,145 @@ flatten_join_alias_vars_mutator(Node *node,
 								   (void *) context);
 }
 
+/*
+ * Add oldvar's varnullingrels, if any, to a flattened join alias expression.
+ * The newnode has been copied, so we can modify it freely.
+ */
+static Node *
+add_nullingrels_if_needed(PlannerInfo *root, Node *newnode, Var *oldvar)
+{
+	if (oldvar->varnullingrels == NULL)
+		return newnode;			/* nothing to do */
+	/* If possible, do it by adding to existing nullingrel fields */
+	if (is_standard_join_alias_expression(newnode, oldvar))
+		adjust_standard_join_alias_expression(newnode, oldvar);
+	else if (root)
+	{
+		/* We can insert a PlaceHolderVar to carry the nullingrels */
+		PlaceHolderVar *newphv;
+		Relids		phrels = pull_varnos(root, newnode);
+
+		/* XXX what if phrels is empty? */
+		Assert(!bms_is_empty(phrels));	/* probably wrong */
+		newphv = make_placeholder_expr(root, (Expr *) newnode, phrels);
+		/* newphv has zero phlevelsup and NULL phnullingrels; fix it */
+		newphv->phlevelsup = oldvar->varlevelsup;
+		newphv->phnullingrels = bms_copy(oldvar->varnullingrels);
+		newnode = (Node *) newphv;
+	}
+	else
+	{
+		/* ooops, we're missing support for something the parser can make */
+		elog(ERROR, "unsupported join alias expression");
+	}
+	return newnode;
+}
+
+/*
+ * Check to see if we can insert nullingrels into this join alias expression
+ * without use of a separate PlaceHolderVar.
+ *
+ * This will handle Vars, PlaceHolderVars, and implicit-coercion and COALESCE
+ * expressions built from those.  This coverage needs to handle anything
+ * that the parser would put into joinaliasvars.
+ * XXX it's probably incomplete at the moment.
+ */
+static bool
+is_standard_join_alias_expression(Node *newnode, Var *oldvar)
+{
+	if (newnode == NULL)
+		return false;
+	if (IsA(newnode, Var) &&
+		((Var *) newnode)->varlevelsup == oldvar->varlevelsup)
+		return true;
+	else if (IsA(newnode, PlaceHolderVar) &&
+			 ((PlaceHolderVar *) newnode)->phlevelsup == oldvar->varlevelsup)
+		return true;
+	else if (IsA(newnode, FuncExpr))
+	{
+		FuncExpr   *fexpr = (FuncExpr *) newnode;
+
+		/*
+		 * We need to assume that the function wouldn't produce non-NULL from
+		 * NULL, which is reasonable for implicit coercions but otherwise not
+		 * so much.  (Looking at its strictness is likely overkill, and anyway
+		 * it would cause us to fail if someone forgot to mark an implicit
+		 * coercion as strict.)
+		 */
+		if (fexpr->funcformat != COERCE_IMPLICIT_CAST ||
+			fexpr->args == NIL)
+			return false;
+
+		/*
+		 * Examine only the first argument --- coercions might have additional
+		 * arguments that are constants.
+		 */
+		return is_standard_join_alias_expression(linitial(fexpr->args), oldvar);
+	}
+	else if (IsA(newnode, CoalesceExpr))
+	{
+		CoalesceExpr *cexpr = (CoalesceExpr *) newnode;
+		ListCell   *lc;
+
+		Assert(cexpr->args != NIL);
+		foreach(lc, cexpr->args)
+		{
+			if (!is_standard_join_alias_expression(lfirst(lc), oldvar))
+				return false;
+		}
+		return true;
+	}
+	else
+		return false;
+}
+
+/*
+ * Insert nullingrels into an expression accepted by
+ * is_standard_join_alias_expression.
+ */
+static void
+adjust_standard_join_alias_expression(Node *newnode, Var *oldvar)
+{
+	if (IsA(newnode, Var) &&
+		((Var *) newnode)->varlevelsup == oldvar->varlevelsup)
+	{
+		Var		   *newvar = (Var *) newnode;
+
+		newvar->varnullingrels = bms_add_members(newvar->varnullingrels,
+												 oldvar->varnullingrels);
+	}
+	else if (IsA(newnode, PlaceHolderVar) &&
+			 ((PlaceHolderVar *) newnode)->phlevelsup == oldvar->varlevelsup)
+	{
+		PlaceHolderVar *newphv = (PlaceHolderVar *) newnode;
+
+		newphv->phnullingrels = bms_add_members(newphv->phnullingrels,
+												oldvar->varnullingrels);
+	}
+	else if (IsA(newnode, FuncExpr))
+	{
+		FuncExpr   *fexpr = (FuncExpr *) newnode;
+
+		adjust_standard_join_alias_expression(linitial(fexpr->args), oldvar);
+	}
+	else if (IsA(newnode, CoalesceExpr))
+	{
+		CoalesceExpr *cexpr = (CoalesceExpr *) newnode;
+		ListCell   *lc;
+
+		Assert(cexpr->args != NIL);
+		foreach(lc, cexpr->args)
+		{
+			adjust_standard_join_alias_expression(lfirst(lc), oldvar);
+		}
+	}
+	else
+		Assert(false);
+}
+
 /*
  * alias_relid_set: in a set of RT indexes, replace joins by their
- * underlying base relids
+ * underlying base+OJ relids
  */
 static Relids
 alias_relid_set(Query *query, Relids relids)
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index 3ef9e8ee5e..c15fab0f68 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -1162,7 +1162,7 @@ parseCheckAggregates(ParseState *pstate, Query *qry)
 	 * entries are RTE_JOIN kind.
 	 */
 	if (hasJoinRTEs)
-		groupClauses = (List *) flatten_join_alias_vars(qry,
+		groupClauses = (List *) flatten_join_alias_vars(NULL, qry,
 														(Node *) groupClauses);
 
 	/*
@@ -1206,7 +1206,7 @@ parseCheckAggregates(ParseState *pstate, Query *qry)
 							groupClauses, hasJoinRTEs,
 							have_non_var_grouping);
 	if (hasJoinRTEs)
-		clause = flatten_join_alias_vars(qry, clause);
+		clause = flatten_join_alias_vars(NULL, qry, clause);
 	check_ungrouped_columns(clause, pstate, qry,
 							groupClauses, groupClauseCommonVars,
 							have_non_var_grouping,
@@ -1217,7 +1217,7 @@ parseCheckAggregates(ParseState *pstate, Query *qry)
 							groupClauses, hasJoinRTEs,
 							have_non_var_grouping);
 	if (hasJoinRTEs)
-		clause = flatten_join_alias_vars(qry, clause);
+		clause = flatten_join_alias_vars(NULL, qry, clause);
 	check_ungrouped_columns(clause, pstate, qry,
 							groupClauses, groupClauseCommonVars,
 							have_non_var_grouping,
@@ -1546,7 +1546,7 @@ finalize_grouping_exprs_walker(Node *node,
 				Index		ref = 0;
 
 				if (context->hasJoinRTEs)
-					expr = flatten_join_alias_vars(context->qry, expr);
+					expr = flatten_join_alias_vars(NULL, context->qry, expr);
 
 				/*
 				 * Each expression must match a grouping entry at the current
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h
index 409005bae9..95f3461a3d 100644
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -197,6 +197,6 @@ extern bool contain_var_clause(Node *node);
 extern bool contain_vars_of_level(Node *node, int levelsup);
 extern int	locate_var_of_level(Node *node, int levelsup);
 extern List *pull_var_clause(Node *node, int flags);
-extern Node *flatten_join_alias_vars(Query *query, Node *node);
+extern Node *flatten_join_alias_vars(PlannerInfo *root, Query *query, Node *node);
 
 #endif							/* OPTIMIZER_H */
