From a5397819a34ddc68dc464aa835ffb924dc113f55 Mon Sep 17 00:00:00 2001 From: Tatsuo Ishii Date: Sat, 13 Jun 2026 16:46:30 +0900 Subject: [PATCH v48 1/9] Row pattern recognition patch for raw parser v48. The series of patches are to implement the row pattern recognition (SQL/RPR) feature. Currently the implementation is a subset of SQL/RPR (ISO/IEC 19075-2:2016). Namely, implementation of some features of R020 (WINDOW clause). R010 (MATCH_RECOGNIZE) is out of the scope of the patches. Currently following features are implemented in the patches. - PATTERN - PATTERN regular expressions (+, *, ?) alternation (|), grouping () , {n}, {n,}, {n,m}, {,m} reluctant quantifiers (*? etc.), Empty pattern ("PATTERN ()") are not permitted by the standard Anchors (^, $) are not permitted in R020 by the standard - DEFINE - INITIAL - AFTER MATCH SKIP TO PAST LAST ROW - AFTER MATCH SKIP TO NEXT ROW - Row pattern navigation (FIRST, LAST, PREV, NEXT and their compound forms) Currently following features are not implemented in the patches. - MEASURES - Pattern variable name qualified column reference (e.g. A.price) - SUBSET - SEEK - AFTER MATCH SKIP TO - AFTER MATCH SKIP TO FIRST - AFTER MATCH SKIP TO LAST - PATTERN regular expression {- and -} - PERMUTE - CLASSIFIER Author: Tatsuo Ishii Author: Henson Choi Reviewed-by: Vik Fearing Reviewed-by: Jacob Champion Reviewed-by: Peter Eisentraut Reviewed-by: NINGWEI CHEN Reviewed-by: "David G. Johnston" Reviewed-by: Chao Li Reviewed-by: SungJun Jang Reviewed-by: Zsolt Parragi Reviewed-by: jian he Discussion: https://postgr.es/m/20230625.210509.1276733411677577841.t-ishii%40sranhm.sra.co.jp Major changes since v47 include: - Disallow Move DEFINE clause volatile expression prohibiting check from parse analysis to planner. - Reject empty pattern "PATTERN ()" per the standard - Now up to 240 pattern variables are allowed (previously 252) - Fix multiple corner case bugs - Prevent unsafe AST optimization --- src/backend/parser/gram.y | 614 +++++++++++++++++++++++++++++++- src/include/nodes/parsenodes.h | 91 +++++ src/include/parser/kwlist.h | 5 + src/include/parser/parse_node.h | 2 + 4 files changed, 696 insertions(+), 16 deletions(-) diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index ff4e1388c55..4ae951aaeba 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -210,6 +210,11 @@ static void preprocess_pub_all_objtype_list(List *all_objects_list, static void preprocess_pubobj_list(List *pubobjspec_list, core_yyscan_t yyscanner); static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); +static RPRPatternNode *makeRPRSeqOrSingle(List *children, int location); +static RPRPatternNode *splitRPRTrailingAlt(RPRPatternNode *node, core_yyscan_t yyscanner); +static RPRPatternNode *makeRPRQuantifier(int32 min, int32 max, bool reluctant, + int location); +static const char *rpr_invalid_quantifier_token(const char *tok); %} @@ -718,6 +723,15 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); label_term %type opt_colid +%type row_pattern_definition +%type opt_row_pattern_common_syntax + row_pattern row_pattern_alt row_pattern_seq + row_pattern_term row_pattern_primary + row_pattern_quantifier_opt +%type row_pattern_definition_list +%type opt_row_pattern_skip_to +%type opt_row_pattern_initial_or_seek + /* * Non-keyword token types. These are hard-wired into the "flex" lexer. * They must be listed first so that their numeric codes do not depend on @@ -760,7 +774,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE DATA_P DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS - DEFERRABLE DEFERRED DEFINER DELETE_P DELIMITER DELIMITERS DEPENDS DEPTH DESC DESTINATION + DEFERRABLE DEFERRED DEFINE DEFINER DELETE_P DELIMITER DELIMITERS DEPENDS DEPTH DESC DESTINATION DETACH DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP @@ -776,7 +790,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); HANDLER HAVING HEADER_P HOLD HOUR_P IDENTITY_P IF_P IGNORE_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P INCLUDE - INCLUDING INCREMENT INDENT INDEX INDEXES INHERIT INHERITS INITIALLY INLINE_P + INCLUDING INCREMENT INDENT INDEX INDEXES INHERIT INHERITS INITIAL INITIALLY INLINE_P INNER_P INOUT INPUT_P INSENSITIVE INSERT INSTEAD INT_P INTEGER INTERSECT INTERVAL INTO INVOKER IS ISNULL ISOLATION @@ -801,8 +815,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); ORDER ORDINALITY OTHERS OUT_P OUTER_P OVER OVERLAPS OVERLAY OVERRIDING OWNED OWNER - PARALLEL PARAMETER PARSER PARTIAL PARTITION PARTITIONS PASSING PASSWORD PATH - PERIOD PLACING PLAN PLANS POLICY PORTION + PARALLEL PARAMETER PARSER PARTIAL PARTITION PARTITIONS PASSING PASSWORD PAST PATH + PATTERN_P PERIOD PLACING PLAN PLANS POLICY PORTION POSITION PRECEDING PRECISION PRESERVE PREPARE PREPARED PRIMARY PRIOR PRIVILEGES PROCEDURAL PROCEDURE PROCEDURES PROGRAM PROPERTIES PROPERTY PUBLICATION @@ -813,7 +827,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); RESET RESPECT_P RESTART RESTRICT RETURN RETURNING RETURNS REVOKE RIGHT ROLE ROLLBACK ROLLUP ROUTINE ROUTINES ROW ROWS RULE - SAVEPOINT SCALAR SCHEMA SCHEMAS SCROLL SEARCH SECOND_P SECURITY SELECT + SAVEPOINT SCALAR SCHEMA SCHEMAS SCROLL SEARCH SECOND_P SECURITY SEEK SELECT SEQUENCE SEQUENCES SERIALIZABLE SERVER SESSION SESSION_USER SET SETS SETOF SHARE SHOW SIMILAR SIMPLE SKIP SMALLINT SNAPSHOT SOME SPLIT SOURCE SQL_P STABLE STANDALONE_P @@ -896,8 +910,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); * reference point for a precedence level that we can assign to other * keywords that lack a natural precedence level. * - * We need to do this for PARTITION, RANGE, ROWS, and GROUPS to support - * opt_existing_window_name (see comment there). + * We need to do this for PARTITION, RANGE, ROWS, GROUPS, AFTER, INITIAL, + * SEEK, PATTERN_P to support opt_existing_window_name (see comment there). * * The frame_bound productions UNBOUNDED PRECEDING and UNBOUNDED FOLLOWING * are even messier: since UNBOUNDED is an unreserved keyword (per spec!), @@ -930,6 +944,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %nonassoc UNBOUNDED NESTED /* ideally would have same precedence as IDENT */ %nonassoc IDENT PARTITION RANGE ROWS GROUPS PRECEDING FOLLOWING CUBE ROLLUP SET KEYS OBJECT_P SCALAR TO USING VALUE_P WITH WITHOUT PATH + AFTER INITIAL SEEK PATTERN_P %left Op OPERATOR RIGHT_ARROW '|' /* multi-character ops and user-defined operators */ %left '+' '-' %left '*' '/' '%' @@ -17335,6 +17350,8 @@ over_clause: OVER window_specification n->startOffset = NULL; n->endOffset = NULL; n->location = @2; + n->frameLocation = -1; + n->excludeLocation = -1; $$ = n; } | /*EMPTY*/ @@ -17342,7 +17359,8 @@ over_clause: OVER window_specification ; window_specification: '(' opt_existing_window_name opt_partition_clause - opt_sort_clause opt_frame_clause ')' + opt_sort_clause opt_frame_clause + opt_row_pattern_common_syntax ')' { WindowDef *n = makeNode(WindowDef); @@ -17354,20 +17372,23 @@ window_specification: '(' opt_existing_window_name opt_partition_clause n->frameOptions = $5->frameOptions; n->startOffset = $5->startOffset; n->endOffset = $5->endOffset; + n->frameLocation = $5->frameLocation; + n->excludeLocation = $5->excludeLocation; + n->rpCommonSyntax = (RPCommonSyntax *)$6; n->location = @1; $$ = n; } ; /* - * If we see PARTITION, RANGE, ROWS or GROUPS as the first token after the '(' - * of a window_specification, we want the assumption to be that there is - * no existing_window_name; but those keywords are unreserved and so could - * be ColIds. We fix this by making them have the same precedence as IDENT - * and giving the empty production here a slightly higher precedence, so - * that the shift/reduce conflict is resolved in favor of reducing the rule. - * These keywords are thus precluded from being an existing_window_name but - * are not reserved for any other purpose. + * If we see PARTITION, RANGE, ROWS, GROUPS, AFTER, INITIAL, SEEK or PATTERN_P + * as the first token after the '(' of a window_specification, we want the + * assumption to be that there is no existing_window_name; but those keywords + * are unreserved and so could be ColIds. We fix this by making them have the + * same precedence as IDENT and giving the empty production here a slightly + * higher precedence, so that the shift/reduce conflict is resolved in favor + * of reducing the rule. These keywords are thus precluded from being an + * existing_window_name but are not reserved for any other purpose. */ opt_existing_window_name: ColId { $$ = $1; } | /*EMPTY*/ %prec Op { $$ = NULL; } @@ -17388,6 +17409,9 @@ opt_frame_clause: n->frameOptions |= FRAMEOPTION_NONDEFAULT | FRAMEOPTION_RANGE; n->frameOptions |= $3; + n->frameLocation = @1; + /* -1 when no EXCLUDE clause (opt_window_exclusion_clause returns 0) */ + n->excludeLocation = ($3 != 0) ? @3 : -1; $$ = n; } | ROWS frame_extent opt_window_exclusion_clause @@ -17396,6 +17420,9 @@ opt_frame_clause: n->frameOptions |= FRAMEOPTION_NONDEFAULT | FRAMEOPTION_ROWS; n->frameOptions |= $3; + n->frameLocation = @1; + /* -1 when no EXCLUDE clause (opt_window_exclusion_clause returns 0) */ + n->excludeLocation = ($3 != 0) ? @3 : -1; $$ = n; } | GROUPS frame_extent opt_window_exclusion_clause @@ -17404,6 +17431,9 @@ opt_frame_clause: n->frameOptions |= FRAMEOPTION_NONDEFAULT | FRAMEOPTION_GROUPS; n->frameOptions |= $3; + n->frameLocation = @1; + /* -1 when no EXCLUDE clause (opt_window_exclusion_clause returns 0) */ + n->excludeLocation = ($3 != 0) ? @3 : -1; $$ = n; } | /*EMPTY*/ @@ -17413,6 +17443,8 @@ opt_frame_clause: n->frameOptions = FRAMEOPTION_DEFAULTS; n->startOffset = NULL; n->endOffset = NULL; + n->frameLocation = -1; + n->excludeLocation = -1; $$ = n; } ; @@ -17488,6 +17520,8 @@ frame_bound: n->frameOptions = FRAMEOPTION_START_UNBOUNDED_PRECEDING; n->startOffset = NULL; n->endOffset = NULL; + n->frameLocation = -1; + n->excludeLocation = -1; $$ = n; } | UNBOUNDED FOLLOWING @@ -17497,6 +17531,8 @@ frame_bound: n->frameOptions = FRAMEOPTION_START_UNBOUNDED_FOLLOWING; n->startOffset = NULL; n->endOffset = NULL; + n->frameLocation = -1; + n->excludeLocation = -1; $$ = n; } | CURRENT_P ROW @@ -17506,6 +17542,8 @@ frame_bound: n->frameOptions = FRAMEOPTION_START_CURRENT_ROW; n->startOffset = NULL; n->endOffset = NULL; + n->frameLocation = -1; + n->excludeLocation = -1; $$ = n; } | a_expr PRECEDING @@ -17515,6 +17553,8 @@ frame_bound: n->frameOptions = FRAMEOPTION_START_OFFSET_PRECEDING; n->startOffset = $1; n->endOffset = NULL; + n->frameLocation = -1; + n->excludeLocation = -1; $$ = n; } | a_expr FOLLOWING @@ -17524,6 +17564,8 @@ frame_bound: n->frameOptions = FRAMEOPTION_START_OFFSET_FOLLOWING; n->startOffset = $1; n->endOffset = NULL; + n->frameLocation = -1; + n->excludeLocation = -1; $$ = n; } ; @@ -17536,6 +17578,414 @@ opt_window_exclusion_clause: | /*EMPTY*/ { $$ = 0; } ; +opt_row_pattern_common_syntax: +opt_row_pattern_skip_to opt_row_pattern_initial_or_seek + PATTERN_P '(' row_pattern ')' + DEFINE row_pattern_definition_list + { + RPCommonSyntax *n = makeNode(RPCommonSyntax); + n->rpSkipTo = $1; + n->initial = $2; + n->rpPattern = (RPRPatternNode *) $5; + n->rpDefs = $8; + n->location = @3; + $$ = (Node *) n; + } + | /*EMPTY*/ { $$ = NULL; } + ; + +opt_row_pattern_skip_to: + AFTER MATCH SKIP TO NEXT ROW + { + $$ = ST_NEXT_ROW; + } + | AFTER MATCH SKIP PAST LAST_P ROW + { + $$ = ST_PAST_LAST_ROW; + } + | /*EMPTY*/ + { + $$ = ST_PAST_LAST_ROW; + } + ; + +opt_row_pattern_initial_or_seek: + INITIAL { $$ = true; } + | SEEK + { + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("SEEK is not supported"), + errhint("Use INITIAL instead."), + parser_errposition(@1)); + } + | /*EMPTY*/ { $$ = true; } + ; + +row_pattern: + row_pattern_alt { $$ = $1; } + ; + +row_pattern_alt: + row_pattern_seq + { + $$ = (Node *) splitRPRTrailingAlt((RPRPatternNode *) $1, + yyscanner); + } + | row_pattern_alt '|' row_pattern_seq + { + RPRPatternNode *n; + RPRPatternNode *rhs = splitRPRTrailingAlt((RPRPatternNode *) $3, + yyscanner); + + /* If left side is already ALT, append to it */ + if (IsA($1, RPRPatternNode) && + ((RPRPatternNode *) $1)->nodeType == RPR_PATTERN_ALT) + { + n = (RPRPatternNode *) $1; + n->children = lappend(n->children, rhs); + $$ = (Node *) n; + } + else + { + n = makeNode(RPRPatternNode); + n->nodeType = RPR_PATTERN_ALT; + n->children = list_make2($1, rhs); + n->min = 1; + n->max = 1; + n->reluctant = false; + n->location = @1; + $$ = (Node *) n; + } + } + ; + +row_pattern_seq: + row_pattern_term { $$ = $1; } + | row_pattern_seq row_pattern_term + { + RPRPatternNode *n; + + /* + * If left side is already SEQ, append to it. A glued + * quantifier's trailing_alt stays on the child term; + * row_pattern_alt splits on it once the seq is complete. + */ + if (IsA($1, RPRPatternNode) && + ((RPRPatternNode *) $1)->nodeType == RPR_PATTERN_SEQ) + { + n = (RPRPatternNode *) $1; + n->children = lappend(n->children, $2); + $$ = (Node *) n; + } + else + { + n = makeNode(RPRPatternNode); + n->nodeType = RPR_PATTERN_SEQ; + n->children = list_make2($1, $2); + n->min = 1; + n->max = 1; + n->reluctant = false; + n->location = @1; + $$ = (Node *) n; + } + } + ; + +row_pattern_term: + row_pattern_primary row_pattern_quantifier_opt + { + RPRPatternNode *n = (RPRPatternNode *) $1; + RPRPatternNode *q = (RPRPatternNode *) $2; + + n->min = q->min; + n->max = q->max; + n->reluctant = q->reluctant; + n->trailing_alt = q->trailing_alt; + $$ = (Node *) n; + } + ; + +row_pattern_primary: + ColId + { + RPRPatternNode *n = makeNode(RPRPatternNode); + n->nodeType = RPR_PATTERN_VAR; + n->varName = $1; + n->min = 1; + n->max = 1; + n->reluctant = false; + n->children = NIL; + n->location = @1; + $$ = (Node *) n; + } + | '(' row_pattern ')' + { + RPRPatternNode *inner = (RPRPatternNode *) $2; + RPRPatternNode *n = makeNode(RPRPatternNode); + n->nodeType = RPR_PATTERN_GROUP; + n->children = list_make1(inner); + n->min = 1; + n->max = 1; + n->reluctant = false; + n->location = @1; + $$ = (Node *) n; + } + ; + +row_pattern_quantifier_opt: + /* EMPTY - no quantifier means exactly once; @$ is unused since + * min=max=1 never produces an error */ + { + $$ = (Node *) makeRPRQuantifier(1, 1, false, @$); + } + | '*' + { + $$ = (Node *) makeRPRQuantifier(0, PG_INT32_MAX, false, @1); + } + | '+' + { + $$ = (Node *) makeRPRQuantifier(1, PG_INT32_MAX, false, @1); + } + | Op + { + /* Handle single Op: ? or reluctant quantifiers *?, +?, ?? */ + if (strcmp($1, "?") == 0) + $$ = (Node *) makeRPRQuantifier(0, 1, false, @1); + else if (strcmp($1, "*?") == 0) + $$ = (Node *) makeRPRQuantifier(0, PG_INT32_MAX, true, @1); + else if (strcmp($1, "+?") == 0) + $$ = (Node *) makeRPRQuantifier(1, PG_INT32_MAX, true, @1); + else if (strcmp($1, "??") == 0) + $$ = (Node *) makeRPRQuantifier(0, 1, true, @1); + else if (strcmp($1, "*|") == 0) + { + $$ = (Node *) makeRPRQuantifier(0, PG_INT32_MAX, false, @1); + ((RPRPatternNode *) $$)->trailing_alt = true; + } + else if (strcmp($1, "+|") == 0) + { + $$ = (Node *) makeRPRQuantifier(1, PG_INT32_MAX, false, @1); + ((RPRPatternNode *) $$)->trailing_alt = true; + } + else if (strcmp($1, "?|") == 0) + { + $$ = (Node *) makeRPRQuantifier(0, 1, false, @1); + ((RPRPatternNode *) $$)->trailing_alt = true; + } + else if (strcmp($1, "*?|") == 0) + { + $$ = (Node *) makeRPRQuantifier(0, PG_INT32_MAX, true, @1); + ((RPRPatternNode *) $$)->trailing_alt = true; + } + else if (strcmp($1, "+?|") == 0) + { + $$ = (Node *) makeRPRQuantifier(1, PG_INT32_MAX, true, @1); + ((RPRPatternNode *) $$)->trailing_alt = true; + } + else if (strcmp($1, "??|") == 0) + { + $$ = (Node *) makeRPRQuantifier(0, 1, true, @1); + ((RPRPatternNode *) $$)->trailing_alt = true; + } + else + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unsupported quantifier \"%s\"", $1), + errhint("Valid quantifiers are: *, +, ?, *?, +?, ??, {n}, {n,}, {,m}, {n,m} and their reluctant versions."), + parser_errposition(@1)); + } + /* RELUCTANT quantifiers (when lexer separates tokens) */ + | '*' Op + { + if (strcmp($2, "?") == 0) + $$ = (Node *) makeRPRQuantifier(0, PG_INT32_MAX, true, @1); + else if (strcmp($2, "?|") == 0) + { + /* "A* ?|B" = reluctant "A*?" plus alternation */ + $$ = (Node *) makeRPRQuantifier(0, PG_INT32_MAX, true, @1); + ((RPRPatternNode *) $$)->trailing_alt = true; + } + else + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid token \"%s\" after \"*\" quantifier", rpr_invalid_quantifier_token($2)), + errhint("Did you mean \"*?\" for reluctant quantifier?"), + parser_errposition(@2)); + } + | '+' Op + { + if (strcmp($2, "?") == 0) + $$ = (Node *) makeRPRQuantifier(1, PG_INT32_MAX, true, @1); + else if (strcmp($2, "?|") == 0) + { + /* "A+ ?|B" = reluctant "A+?" plus alternation */ + $$ = (Node *) makeRPRQuantifier(1, PG_INT32_MAX, true, @1); + ((RPRPatternNode *) $$)->trailing_alt = true; + } + else + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid token \"%s\" after \"+\" quantifier", rpr_invalid_quantifier_token($2)), + errhint("Did you mean \"+?\" for reluctant quantifier?"), + parser_errposition(@2)); + } + | Op Op + { + if (strcmp($1, "?") != 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid quantifier combination: \"%s%s\"", $1, $2), + errhint("Did you mean \"??\" for reluctant quantifier?"), + parser_errposition(@1)); + if (strcmp($2, "?") == 0) + $$ = (Node *) makeRPRQuantifier(0, 1, true, @1); + else if (strcmp($2, "?|") == 0) + { + /* "A? ?|B" = reluctant "A??" plus alternation */ + $$ = (Node *) makeRPRQuantifier(0, 1, true, @1); + ((RPRPatternNode *) $$)->trailing_alt = true; + } + else + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid quantifier combination"), + errhint("Did you mean \"??\" for reluctant quantifier?"), + parser_errposition(@1)); + } + /* {n}, {n,}, {,m}, {n,m} quantifiers */ + | '{' Iconst '}' + { + if ($2 <= 0 || $2 >= PG_INT32_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must be between 1 and %d", PG_INT32_MAX - 1), + parser_errposition(@2)); + $$ = (Node *) makeRPRQuantifier($2, $2, false, @1); + } + | '{' Iconst ',' '}' + { + if ($2 < 0 || $2 >= PG_INT32_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must be between 0 and %d", PG_INT32_MAX - 1), + parser_errposition(@2)); + $$ = (Node *) makeRPRQuantifier($2, PG_INT32_MAX, false, @1); + } + | '{' ',' Iconst '}' + { + if ($3 <= 0 || $3 >= PG_INT32_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must be between 1 and %d", PG_INT32_MAX - 1), + parser_errposition(@3)); + $$ = (Node *) makeRPRQuantifier(0, $3, false, @1); + } + | '{' Iconst ',' Iconst '}' + { + if ($2 < 0 || $4 <= 0 || $2 >= PG_INT32_MAX || $4 >= PG_INT32_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bounds must be between 0 and %d with max >= 1", PG_INT32_MAX - 1), + parser_errposition(@2)); + if ($2 > $4) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier minimum bound must not exceed maximum"), + parser_errposition(@2)); + $$ = (Node *) makeRPRQuantifier($2, $4, false, @1); + } + /* Reluctant versions: {n}?, {n,}?, {,m}?, {n,m}? */ + | '{' Iconst '}' Op + { + if (strcmp($4, "?") != 0 && strcmp($4, "?|") != 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid token \"%s\" after range quantifier", rpr_invalid_quantifier_token($4)), + errhint("Only \"?\" is allowed after {n} to make it reluctant."), + parser_errposition(@4)); + if ($2 <= 0 || $2 >= PG_INT32_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must be between 1 and %d", PG_INT32_MAX - 1), + parser_errposition(@2)); + $$ = (Node *) makeRPRQuantifier($2, $2, true, @1); + if (strcmp($4, "?|") == 0) + ((RPRPatternNode *) $$)->trailing_alt = true; + } + | '{' Iconst ',' '}' Op + { + if (strcmp($5, "?") != 0 && strcmp($5, "?|") != 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid token \"%s\" after range quantifier", rpr_invalid_quantifier_token($5)), + errhint("Only \"?\" is allowed after {n,} or {,m} to make it reluctant."), + parser_errposition(@5)); + if ($2 < 0 || $2 >= PG_INT32_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must be between 0 and %d", PG_INT32_MAX - 1), + parser_errposition(@2)); + $$ = (Node *) makeRPRQuantifier($2, PG_INT32_MAX, true, @1); + if (strcmp($5, "?|") == 0) + ((RPRPatternNode *) $$)->trailing_alt = true; + } + | '{' ',' Iconst '}' Op + { + if (strcmp($5, "?") != 0 && strcmp($5, "?|") != 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid token \"%s\" after range quantifier", rpr_invalid_quantifier_token($5)), + errhint("Only \"?\" is allowed after {n,} or {,m} to make it reluctant."), + parser_errposition(@5)); + if ($3 <= 0 || $3 >= PG_INT32_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must be between 1 and %d", PG_INT32_MAX - 1), + parser_errposition(@3)); + $$ = (Node *) makeRPRQuantifier(0, $3, true, @1); + if (strcmp($5, "?|") == 0) + ((RPRPatternNode *) $$)->trailing_alt = true; + } + | '{' Iconst ',' Iconst '}' Op + { + if (strcmp($6, "?") != 0 && strcmp($6, "?|") != 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid token \"%s\" after range quantifier", rpr_invalid_quantifier_token($6)), + errhint("Only \"?\" is allowed after {n,m} to make it reluctant."), + parser_errposition(@6)); + if ($2 < 0 || $4 <= 0 || $2 >= PG_INT32_MAX || $4 >= PG_INT32_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bounds must be between 0 and %d with max >= 1", PG_INT32_MAX - 1), + parser_errposition(@2)); + if ($2 > $4) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier minimum bound must not exceed maximum"), + parser_errposition(@2)); + $$ = (Node *) makeRPRQuantifier($2, $4, true, @1); + if (strcmp($6, "?|") == 0) + ((RPRPatternNode *) $$)->trailing_alt = true; + } + ; + +row_pattern_definition_list: + row_pattern_definition { $$ = list_make1($1); } + | row_pattern_definition_list ',' row_pattern_definition { $$ = lappend($1, $3); } + ; + +row_pattern_definition: + ColId AS a_expr + { + $$ = makeNode(ResTarget); + $$->name = $1; + $$->indirection = NIL; + $$->val = (Node *) $3; + $$->location = @1; + } + ; /* * Supporting nonterminals for expressions. @@ -18885,6 +19335,7 @@ unreserved_keyword: | DECLARE | DEFAULTS | DEFERRED + | DEFINE | DEFINER | DELETE_P | DELIMITER @@ -18953,6 +19404,7 @@ unreserved_keyword: | INDEXES | INHERIT | INHERITS + | INITIAL | INLINE_P | INPUT_P | INSENSITIVE @@ -19029,7 +19481,9 @@ unreserved_keyword: | PARTITIONS | PASSING | PASSWORD + | PAST | PATH + | PATTERN_P | PERIOD | PLAN | PLANS @@ -19088,6 +19542,7 @@ unreserved_keyword: | SEARCH | SECOND_P | SECURITY + | SEEK | SEQUENCE | SEQUENCES | SERIALIZABLE @@ -19477,6 +19932,7 @@ bare_label_keyword: | DEFAULTS | DEFERRABLE | DEFERRED + | DEFINE | DEFINER | DELETE_P | DELIMITER @@ -19559,6 +20015,7 @@ bare_label_keyword: | INDEXES | INHERIT | INHERITS + | INITIAL | INITIALLY | INLINE_P | INNER_P @@ -19673,7 +20130,9 @@ bare_label_keyword: | PARTITIONS | PASSING | PASSWORD + | PAST | PATH + | PATTERN_P | PERIOD | PLACING | PLAN @@ -19737,6 +20196,7 @@ bare_label_keyword: | SCROLL | SEARCH | SECURITY + | SEEK | SELECT | SEQUENCE | SEQUENCES @@ -20931,6 +21391,128 @@ makeRecursiveViewSelect(char *relname, List *aliases, Node *query) return (Node *) s; } +/* + * makeRPRQuantifier + * Create an RPRPatternNode with specified quantifier bounds. + */ +static RPRPatternNode * +makeRPRQuantifier(int32 min, int32 max, bool reluctant, int location) +{ + RPRPatternNode *n = makeNode(RPRPatternNode); + + n->min = min; + n->max = max; + n->reluctant = reluctant; + n->location = location; + return n; +} + +/* + * Build a SEQ node from children, or return the lone child unchanged. + */ +static RPRPatternNode * +makeRPRSeqOrSingle(List *children, int location) +{ + RPRPatternNode *n; + + if (list_length(children) == 1) + return (RPRPatternNode *) linitial(children); + + n = makeNode(RPRPatternNode); + n->nodeType = RPR_PATTERN_SEQ; + n->children = children; + n->min = 1; + n->max = 1; + n->reluctant = false; + n->location = location; + return n; +} + +/* + * A glued quantifier such as "A*|" leaves trailing_alt set on its term while + * the enclosing sequence is built. Once the sequence is complete, split it at + * the flagged term into alt(left, right), where the right operand is the whole + * remaining sequence -- this keeps "|" as the lowest-precedence operator, so + * "A*|B C" parses as "A* | (B C)", identical to the spaced form. A flag with + * nothing to its right is a dangling "|" and is rejected. + */ +static RPRPatternNode * +splitRPRTrailingAlt(RPRPatternNode *node, core_yyscan_t yyscanner) +{ + ListCell *lc; + int i = 0; + + if (node->nodeType != RPR_PATTERN_SEQ) + { + if (node->trailing_alt) + { + node->trailing_alt = false; + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("alternation operator \"|\" requires a pattern on both sides"), + parser_errposition(node->location)); + } + return node; + } + + foreach(lc, node->children) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + + if (child->trailing_alt) + { + List *lefthalf = list_copy_head(node->children, i + 1); + List *righthalf = list_copy_tail(node->children, i + 1); + RPRPatternNode *altn; + RPRPatternNode *rightnode; + + child->trailing_alt = false; + if (righthalf == NIL) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("alternation operator \"|\" requires a pattern on both sides"), + parser_errposition(node->location)); + + /* the right branch starts at its own first element, not the seq start */ + rightnode = splitRPRTrailingAlt(makeRPRSeqOrSingle(righthalf, + ((RPRPatternNode *) linitial(righthalf))->location), + yyscanner); + altn = makeNode(RPRPatternNode); + altn->nodeType = RPR_PATTERN_ALT; + altn->children = list_make2(makeRPRSeqOrSingle(lefthalf, node->location), + rightnode); + altn->min = 1; + altn->max = 1; + altn->reluctant = false; + altn->location = node->location; + return altn; + } + i++; + } + return node; +} + +/* + * rpr_invalid_quantifier_token + * Return the offending part of an invalid token following a quantifier. + * + * The lexer glues a quantifier and a trailing alternation operator into a + * single token (for example "*|"). When such a glued token appears in an + * invalid position, drop the trailing '|': it is the alternation operator, + * not part of the offending quantifier, so "*|" reports '*' and "*?|" + * reports "*?". Tokens without a trailing '|' (such as "??" or "?+") are + * reported unchanged. + */ +static const char * +rpr_invalid_quantifier_token(const char *tok) +{ + size_t len = strlen(tok); + + if (len > 1 && tok[len - 1] == '|') + return pnstrdup(tok, len - 1); + return tok; +} + /* parser_init() * Initialize to parse one query string */ diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 91377a6cde3..3266c11a711 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -581,6 +581,81 @@ typedef struct SortBy ParseLoc location; /* operator location, or -1 if none/unknown */ } SortBy; +/* + * AFTER MATCH row pattern skip to types in row pattern common syntax + */ +typedef enum RPSkipTo +{ + ST_NONE, /* no AFTER MATCH clause; default for non-RPR + * windows */ + ST_NEXT_ROW, /* SKIP TO NEXT ROW */ + ST_PAST_LAST_ROW, /* SKIP TO PAST LAST ROW */ +} RPSkipTo; + +/* + * RPRNavOffsetKind - status of navigation offset for tuplestore trim. + * + * The planner computes navMaxOffset/navFirstOffset for tuplestore mark + * optimization. This enum tracks whether the value is a resolved constant, + * needs runtime evaluation, or cannot be determined (retain all rows). + */ +typedef enum RPRNavOffsetKind +{ + RPR_NAV_OFFSET_FIXED, /* resolved constant; use the offset value */ + RPR_NAV_OFFSET_NEEDS_EVAL, /* non-constant offset; evaluate at executor + * init */ + RPR_NAV_OFFSET_RETAIN_ALL, /* cannot determine; retain all rows (no trim) */ +} RPRNavOffsetKind; + +/* + * RPRPatternNodeType - Row Pattern Recognition pattern node types + */ +typedef enum RPRPatternNodeType +{ + RPR_PATTERN_VAR, /* variable reference */ + RPR_PATTERN_SEQ, /* sequence (concatenation) */ + RPR_PATTERN_ALT, /* alternation (|) */ + RPR_PATTERN_GROUP, /* group (parentheses) */ +} RPRPatternNodeType; + +/* + * RPRPatternNode - Row Pattern Recognition pattern AST node + */ +typedef struct RPRPatternNode +{ + NodeTag type; /* T_RPRPatternNode */ + RPRPatternNodeType nodeType; /* VAR, SEQ, ALT, GROUP */ + int32 min; /* minimum repetitions (0 for *, ?) */ + int32 max; /* maximum repetitions (PG_INT32_MAX for *, +) */ + bool reluctant; /* true for reluctant (non-greedy) */ + ParseLoc location; /* token location, or -1 */ + char *varName; /* VAR: variable name */ + List *children; /* SEQ, ALT, GROUP: child nodes */ + + /* + * Transient parse flag, cleared by splitRPRTrailingAlt before the tree is + * finalized: a glued quantifier such as "*|" is immediately followed by + * the alternation operator '|'. It is always false in a finalized tree, + * so it has no effect on the pg_stat_statements queryid. + */ + bool trailing_alt; +} RPRPatternNode; + +/* + * RowPatternCommonSyntax - raw representation of row pattern common syntax + */ +typedef struct RPCommonSyntax +{ + NodeTag type; + RPSkipTo rpSkipTo; /* Row Pattern AFTER MATCH SKIP type */ + bool initial; /* true if is + * initial */ + RPRPatternNode *rpPattern; /* PATTERN clause AST */ + List *rpDefs; /* row pattern definitions clause (list of + * ResTarget) */ + ParseLoc location; /* PATTERN keyword location, or -1 */ +} RPCommonSyntax; + /* * WindowDef - raw representation of WINDOW and OVER clauses * @@ -596,10 +671,13 @@ typedef struct WindowDef char *refname; /* referenced window name, if any */ List *partitionClause; /* PARTITION BY expression list */ List *orderClause; /* ORDER BY (list of SortBy) */ + RPCommonSyntax *rpCommonSyntax; /* row pattern common syntax */ int frameOptions; /* frame_clause options, see below */ Node *startOffset; /* expression for starting bound, if any */ Node *endOffset; /* expression for ending bound, if any */ ParseLoc location; /* parse location, or -1 if none/unknown */ + ParseLoc frameLocation; /* ROWS/RANGE/GROUPS location, or -1 */ + ParseLoc excludeLocation; /* EXCLUDE location, or -1 */ } WindowDef; /* @@ -1648,6 +1726,11 @@ typedef struct GroupingSet * the orderClause might or might not be copied (see copiedOrder); the framing * options are never copied, per spec. * + * "defineClause" is Row Pattern Recognition DEFINE clause (list of + * TargetEntry). TargetEntry.resname represents row pattern definition + * variable name. "rpPattern" represents PATTERN clause as an AST tree + * (RPRPatternNode). + * * The information relevant for the query jumbling is the partition clause * type and its bounds. */ @@ -1677,6 +1760,14 @@ typedef struct WindowClause Index winref; /* ID referenced by window functions */ /* did we copy orderClause from refname? */ bool copiedOrder pg_node_attr(query_jumble_ignore); + /* Row Pattern AFTER MATCH SKIP clause */ + RPSkipTo rpSkipTo; /* Row Pattern Skip To type */ + bool initial; /* true if is + * initial */ + /* Row Pattern DEFINE clause (list of TargetEntry) */ + List *defineClause; + /* Row Pattern PATTERN clause AST */ + RPRPatternNode *rpPattern; } WindowClause; /* diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 51ead54f015..3894fad9023 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -129,6 +129,7 @@ PG_KEYWORD("default", DEFAULT, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("defaults", DEFAULTS, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("deferrable", DEFERRABLE, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("deferred", DEFERRED, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("define", DEFINE, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("definer", DEFINER, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("delete", DELETE_P, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("delimiter", DELIMITER, UNRESERVED_KEYWORD, BARE_LABEL) @@ -221,6 +222,7 @@ PG_KEYWORD("index", INDEX, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("indexes", INDEXES, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("inherit", INHERIT, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("inherits", INHERITS, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("initial", INITIAL, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("initially", INITIALLY, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("inline", INLINE_P, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("inner", INNER_P, TYPE_FUNC_NAME_KEYWORD, BARE_LABEL) @@ -347,7 +349,9 @@ PG_KEYWORD("partition", PARTITION, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("partitions", PARTITIONS, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("passing", PASSING, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("password", PASSWORD, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("past", PAST, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("path", PATH, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("pattern", PATTERN_P, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("period", PERIOD, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("placing", PLACING, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("plan", PLAN, UNRESERVED_KEYWORD, BARE_LABEL) @@ -415,6 +419,7 @@ PG_KEYWORD("scroll", SCROLL, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("search", SEARCH, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("second", SECOND_P, UNRESERVED_KEYWORD, AS_LABEL) PG_KEYWORD("security", SECURITY, UNRESERVED_KEYWORD, BARE_LABEL) +PG_KEYWORD("seek", SEEK, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("select", SELECT, RESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("sequence", SEQUENCE, UNRESERVED_KEYWORD, BARE_LABEL) PG_KEYWORD("sequences", SEQUENCES, UNRESERVED_KEYWORD, BARE_LABEL) diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h index f7f4ba6c2a8..cb9d02c00c4 100644 --- a/src/include/parser/parse_node.h +++ b/src/include/parser/parse_node.h @@ -51,6 +51,7 @@ typedef enum ParseExprKind EXPR_KIND_WINDOW_FRAME_RANGE, /* window frame clause with RANGE */ EXPR_KIND_WINDOW_FRAME_ROWS, /* window frame clause with ROWS */ EXPR_KIND_WINDOW_FRAME_GROUPS, /* window frame clause with GROUPS */ + EXPR_KIND_RPR_DEFINE, /* DEFINE */ EXPR_KIND_SELECT_TARGET, /* SELECT target list item */ EXPR_KIND_INSERT_TARGET, /* INSERT target list item */ EXPR_KIND_UPDATE_SOURCE, /* UPDATE assignment source item */ @@ -230,6 +231,7 @@ struct ParseState ParseNamespaceItem *p_grouping_nsitem; /* NSItem for grouping, or NULL */ List *p_windowdefs; /* raw representations of window clauses */ ParseExprKind p_expr_kind; /* what kind of expression we're parsing */ + List *p_rpr_pattern_vars; /* RPR variable names for DEFINE clause */ int p_next_resno; /* next targetlist resno to assign */ List *p_multiassign_exprs; /* junk tlist entries for multiassign */ List *p_locking_clause; /* raw FOR UPDATE/FOR SHARE info */ -- 2.43.0