From 41e1a39e77e01f038b73f2e6389a876a8347f406 Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii <ishii@postgresql.org>
Date: Wed, 28 Feb 2024 22:59:45 +0900
Subject: [PATCH v14 2/8] Row pattern recognition patch (parse/analysis).

---
 src/backend/parser/parse_agg.c    |   7 +
 src/backend/parser/parse_clause.c | 296 +++++++++++++++++++++++++++++-
 src/backend/parser/parse_expr.c   |   4 +
 src/backend/parser/parse_func.c   |   3 +
 4 files changed, 309 insertions(+), 1 deletion(-)

diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index 9d151a880b..a36218b103 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -576,6 +576,10 @@ check_agglevels_and_constraints(ParseState *pstate, Node *expr)
 			errkind = true;
 			break;
 
+		case EXPR_KIND_RPR_DEFINE:
+			errkind = true;
+			break;
+
 			/*
 			 * There is intentionally no default: case here, so that the
 			 * compiler will warn if we add a new ParseExprKind without
@@ -965,6 +969,9 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc,
 		case EXPR_KIND_CYCLE_MARK:
 			errkind = true;
 			break;
+		case EXPR_KIND_RPR_DEFINE:
+			errkind = true;
+			break;
 
 			/*
 			 * There is intentionally no default: case here, so that the
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 4b50278fd0..8a4f8f24d2 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -100,7 +100,14 @@ static WindowClause *findWindowClause(List *wclist, const char *name);
 static Node *transformFrameOffset(ParseState *pstate, int frameOptions,
 								  Oid rangeopfamily, Oid rangeopcintype, Oid *inRangeFunc,
 								  Node *clause);
-
+static void transformRPR(ParseState *pstate, WindowClause *wc, WindowDef *windef,
+						 List **targetlist);
+static List *transformDefineClause(ParseState *pstate, WindowClause *wc, WindowDef *windef,
+								   List **targetlist);
+static void transformPatternClause(ParseState *pstate, WindowClause *wc,
+								   WindowDef *windef);
+static List *transformMeasureClause(ParseState *pstate, WindowClause *wc,
+									WindowDef *windef);
 
 /*
  * transformFromClause -
@@ -2950,6 +2957,10 @@ transformWindowDefinitions(ParseState *pstate,
 											 rangeopfamily, rangeopcintype,
 											 &wc->endInRangeFunc,
 											 windef->endOffset);
+
+		/* Process Row Pattern Recognition related clauses */
+		transformRPR(pstate, wc, windef, targetlist);
+
 		wc->runCondition = NIL;
 		wc->winref = winref;
 
@@ -3815,3 +3826,286 @@ transformFrameOffset(ParseState *pstate, int frameOptions,
 
 	return node;
 }
+
+/*
+ * transformRPR
+ *		Process Row Pattern Recognition related clauses
+ */
+static void
+transformRPR(ParseState *pstate, WindowClause *wc, WindowDef *windef,
+			 List **targetlist)
+{
+	/*
+	 * Window definition exists?
+	 */
+	if (windef == NULL)
+		return;
+
+	/*
+	 * Row Pattern Common Syntax clause exists?
+	 */
+	if (windef->rpCommonSyntax == NULL)
+		return;
+
+	/* Check Frame option. Frame must start at current row */
+	if ((wc->frameOptions & FRAMEOPTION_START_CURRENT_ROW) == 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("FRAME must start at current row when row patttern recognition is used")));
+
+	/* Transform AFTER MACH SKIP TO clause */
+	wc->rpSkipTo = windef->rpCommonSyntax->rpSkipTo;
+
+	/* Transform AFTER MACH SKIP TO variable */
+	wc->rpSkipVariable = windef->rpCommonSyntax->rpSkipVariable;
+
+	/* Transform SEEK or INITIAL clause */
+	wc->initial = windef->rpCommonSyntax->initial;
+
+	/* Transform DEFINE clause into list of TargetEntry's */
+	wc->defineClause = transformDefineClause(pstate, wc, windef, targetlist);
+
+	/* Check PATTERN clause and copy to patternClause */
+	transformPatternClause(pstate, wc, windef);
+
+	/* Transform MEASURE clause */
+	transformMeasureClause(pstate, wc, windef);
+}
+
+/*
+ * transformDefineClause Process DEFINE clause and transform ResTarget into
+ *		list of TargetEntry.
+ *
+ * XXX we only support column reference in row pattern definition search
+ * condition, e.g. "price". <row pattern definition variable name>.<column
+ * reference> is not supported, e.g. "A.price".
+ */
+static List *
+transformDefineClause(ParseState *pstate, WindowClause *wc, WindowDef *windef,
+					  List **targetlist)
+{
+	/* DEFINE variable name initials */
+	static char *defineVariableInitials = "abcdefghijklmnopqrstuvwxyz";
+
+	ListCell   *lc,
+			   *l;
+	ResTarget  *restarget,
+			   *r;
+	List	   *restargets;
+	List	   *defineClause;
+	char	   *name;
+	int			initialLen;
+	int			i;
+
+	/*
+	 * If Row Definition Common Syntax exists, DEFINE clause must exist. (the
+	 * raw parser should have already checked it.)
+	 */
+	Assert(windef->rpCommonSyntax->rpDefs != NULL);
+
+	/*
+	 * Check and add "A AS A IS TRUE" if pattern variable is missing in DEFINE
+	 * per the SQL standard.
+	 */
+	restargets = NIL;
+	foreach(lc, windef->rpCommonSyntax->rpPatterns)
+	{
+		A_Expr	   *a;
+		bool		found = false;
+
+		if (!IsA(lfirst(lc), A_Expr))
+			ereport(ERROR,
+					errmsg("node type is not A_Expr"));
+
+		a = (A_Expr *) lfirst(lc);
+		name = strVal(a->lexpr);
+
+		foreach(l, windef->rpCommonSyntax->rpDefs)
+		{
+			restarget = (ResTarget *) lfirst(l);
+
+			if (!strcmp(restarget->name, name))
+			{
+				found = true;
+				break;
+			}
+		}
+
+		if (!found)
+		{
+			/*
+			 * "name" is missing. So create "name AS name IS TRUE" ResTarget
+			 * node and add it to the temporary list.
+			 */
+			A_Const    *n;
+
+			restarget = makeNode(ResTarget);
+			n = makeNode(A_Const);
+			n->val.boolval.type = T_Boolean;
+			n->val.boolval.boolval = true;
+			n->location = -1;
+			restarget->name = pstrdup(name);
+			restarget->indirection = NIL;
+			restarget->val = (Node *) n;
+			restarget->location = -1;
+			restargets = lappend((List *) restargets, restarget);
+		}
+	}
+
+	if (list_length(restargets) >= 1)
+	{
+		/* add missing DEFINEs */
+		windef->rpCommonSyntax->rpDefs =
+			list_concat(windef->rpCommonSyntax->rpDefs, restargets);
+		list_free(restargets);
+	}
+
+	/*
+	 * Check for duplicate row pattern definition variables.  The standard
+	 * requires that no two row pattern definition variable names shall be
+	 * equivalent.
+	 */
+	restargets = NIL;
+	foreach(lc, windef->rpCommonSyntax->rpDefs)
+	{
+		restarget = (ResTarget *) lfirst(lc);
+		name = restarget->name;
+
+		/*
+		 * Add DEFINE expression (Restarget->val) to the targetlist as a
+		 * TargetEntry if it does not exist yet. Planner will add the column
+		 * ref var node to the outer plan's target list later on. This makes
+		 * DEFINE expression could access the outer tuple while evaluating
+		 * PATTERN.
+		 *
+		 * XXX: adding whole expressions of DEFINE to the plan.targetlist is
+		 * not so good, because it's not necessary to evalute the expression
+		 * in the target list while running the plan. We should extract the
+		 * var nodes only then add them to the plan.targetlist.
+		 */
+		findTargetlistEntrySQL99(pstate, (Node *) restarget->val,
+								 targetlist, EXPR_KIND_RPR_DEFINE);
+
+		/*
+		 * Make sure that the row pattern definition search condition is a
+		 * boolean expression.
+		 */
+		transformWhereClause(pstate, restarget->val,
+							 EXPR_KIND_RPR_DEFINE, "DEFINE");
+
+		foreach(l, restargets)
+		{
+			char	   *n;
+
+			r = (ResTarget *) lfirst(l);
+			n = r->name;
+
+			if (!strcmp(n, name))
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("row pattern definition variable name \"%s\" appears more than once in DEFINE clause",
+								name),
+						 parser_errposition(pstate, exprLocation((Node *) r))));
+		}
+		restargets = lappend(restargets, restarget);
+	}
+	list_free(restargets);
+
+	/*
+	 * Create list of row pattern DEFINE variable name's initial. We assign
+	 * [a-z] to them (up to 26 variable names are allowed).
+	 */
+	restargets = NIL;
+	i = 0;
+	initialLen = strlen(defineVariableInitials);
+
+	foreach(lc, windef->rpCommonSyntax->rpDefs)
+	{
+		char		initial[2];
+
+		restarget = (ResTarget *) lfirst(lc);
+		name = restarget->name;
+
+		if (i >= initialLen)
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("number of row pattern definition variable names exceeds %d",
+							initialLen),
+					 parser_errposition(pstate,
+										exprLocation((Node *) restarget))));
+		}
+		initial[0] = defineVariableInitials[i++];
+		initial[1] = '\0';
+		wc->defineInitial = lappend(wc->defineInitial,
+									makeString(pstrdup(initial)));
+	}
+
+	defineClause = transformTargetList(pstate, windef->rpCommonSyntax->rpDefs,
+									   EXPR_KIND_RPR_DEFINE);
+
+	/* mark column origins */
+	markTargetListOrigins(pstate, defineClause);
+
+	/* mark all nodes in the DEFINE clause tree with collation information */
+	assign_expr_collations(pstate, (Node *) defineClause);
+
+	return defineClause;
+}
+
+/*
+ * transformPatternClause
+ *		Process PATTERN clause and return PATTERN clause in the raw parse tree
+ */
+static void
+transformPatternClause(ParseState *pstate, WindowClause *wc,
+					   WindowDef *windef)
+{
+	ListCell   *lc;
+
+	/*
+	 * Row Pattern Common Syntax clause exists?
+	 */
+	if (windef->rpCommonSyntax == NULL)
+		return;
+
+	wc->patternVariable = NIL;
+	wc->patternRegexp = NIL;
+	foreach(lc, windef->rpCommonSyntax->rpPatterns)
+	{
+		A_Expr	   *a;
+		char	   *name;
+		char	   *regexp;
+
+		if (!IsA(lfirst(lc), A_Expr))
+			ereport(ERROR,
+					errmsg("node type is not A_Expr"));
+
+		a = (A_Expr *) lfirst(lc);
+		name = strVal(a->lexpr);
+
+		wc->patternVariable = lappend(wc->patternVariable, makeString(pstrdup(name)));
+		regexp = strVal(lfirst(list_head(a->name)));
+
+		wc->patternRegexp = lappend(wc->patternRegexp, makeString(pstrdup(regexp)));
+	}
+}
+
+/*
+ * transformMeasureClause
+ *		Process MEASURE clause
+ *	XXX MEASURE clause is not supported yet
+ */
+static List *
+transformMeasureClause(ParseState *pstate, WindowClause *wc,
+					   WindowDef *windef)
+{
+	if (windef->rowPatternMeasures == NIL)
+		return NIL;
+
+	ereport(ERROR,
+			(errcode(ERRCODE_SYNTAX_ERROR),
+			 errmsg("%s", "MEASURE clause is not supported yet"),
+			 parser_errposition(pstate, exprLocation((Node *) windef->rowPatternMeasures))));
+	return NIL;
+}
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 9300c7b9ab..da4f42677b 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -557,6 +557,7 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
 		case EXPR_KIND_COPY_WHERE:
 		case EXPR_KIND_GENERATED_COLUMN:
 		case EXPR_KIND_CYCLE_MARK:
+		case EXPR_KIND_RPR_DEFINE:
 			/* okay */
 			break;
 
@@ -1770,6 +1771,7 @@ transformSubLink(ParseState *pstate, SubLink *sublink)
 		case EXPR_KIND_VALUES:
 		case EXPR_KIND_VALUES_SINGLE:
 		case EXPR_KIND_CYCLE_MARK:
+		case EXPR_KIND_RPR_DEFINE:
 			/* okay */
 			break;
 		case EXPR_KIND_CHECK_CONSTRAINT:
@@ -3149,6 +3151,8 @@ ParseExprKindName(ParseExprKind exprKind)
 			return "GENERATED AS";
 		case EXPR_KIND_CYCLE_MARK:
 			return "CYCLE";
+		case EXPR_KIND_RPR_DEFINE:
+			return "DEFINE";
 
 			/*
 			 * There is intentionally no default: case here, so that the
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index fdb3e6df33..bf07085a15 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -2656,6 +2656,9 @@ check_srf_call_placement(ParseState *pstate, Node *last_srf, int location)
 		case EXPR_KIND_CYCLE_MARK:
 			errkind = true;
 			break;
+		case EXPR_KIND_RPR_DEFINE:
+			errkind = true;
+			break;
 
 			/*
 			 * There is intentionally no default: case here, so that the
-- 
2.25.1

