From 6be166670fbdaa233d343125cbf81b3af82d83bc Mon Sep 17 00:00:00 2001 From: Henson Choi Date: Fri, 6 Feb 2026 14:15:45 +0900 Subject: [PATCH] Fix RPR pattern compilation crash and refactor EXPLAIN deparse Fix several bugs in RPR (Row Pattern Recognition) NFA bytecode compilation, pattern optimization, and EXPLAIN output. Bugs fixed: - Fix server crash in fillRPRPatternAlt() when an inner ALT is the last element of an outer ALT's branch. Both alternations tried to set the 'next' pointer on the same endpoint element, triggering Assert(pat->elements[endPos].next == RPR_ELEMIDX_INVALID). Fix by redirecting all elements in the branch that share the old target to point to the outer ALT's afterAlt instead. (Test: PATTERN (C (A | B) | D) -- inner ALT at end of outer branch) - Add BEGIN element to fillRPRPatternGroup(). Previously only END was emitted for quantified groups, so groups with min=0 had no skip path to bypass the group content. Change to BEGIN/END pairs where BEGIN.jump points past END (skip) and END.jump points to the first child (loop-back). Add nfa_advance_begin() to the NFA executor for group entry and skip handling. (Test: PATTERN (A ((B | C) (D | E))* F?) -- * group matching 0 times) - Fix deparse output producing wrong parenthesization for nested ALT patterns. The flat-loop approach could not correctly suppress double parentheses for single-ALT groups or handle depth transitions around alternation separators. Refactor into mutual recursion: deparse_rpr_elements, deparse_rpr_group, deparse_rpr_alt, and deparse_rpr_var. - Fix deparse separator ordering for nested ALT: the ' | ' separator was emitted before closing inner parentheses, producing output like '(c (a | b | )d)' instead of '(c (a | b) | d)'. Close parens to match separator depth before emitting ' | '. - Fix missing ALT separator registration in deparse_rpr_alt() when an ALT is the first element of an outer ALT's branch. The code only checked elem->next (always -1 for ALT markers) but not elem->jump, which carries the outer branch's separator position. - Fix altEndPositions/altBranchStarts length mismatch caused by the 'if (*idx > branchStart)' guard that skipped empty branches. Remove the guard so both lists always have matching entries. - Fix RPRPatternNode.reluctant initialization in gram.y. ALT, SEQ, VAR, and GROUP primary productions used false (0) or left it uninitialized (defaulting to 0 from palloc0), but 0 is a valid ParseLoc meaning "location at offset 0". Change all four creation sites to use -1 (no location), matching the convention used by ParseLoc throughout the parser. - Fix reluctant quantifier display in both explain.c and ruleutils.c. A bare variable with reluctant ? (e.g. A?) was displayed as just 'a' since there was no quantifier to attach ? to. Now emit '{1}?' to make the reluctant marker unambiguous. New optimization: - Add mergeConsecutiveAlts() to the SEQ optimization pipeline. After GROUP{1,1} unwrap, bare alternations like (A | B) become ALT nodes in the SEQ. This step detects consecutive identical ALT nodes and wraps them in a GROUP with the appropriate quantifier, e.g. (A | B) (A | B) (A | B) -> (A | B){3}. Combined with the existing mergeGroupPrefixSuffix, patterns like (A | B) (A | B)+ (A | B) further reduce to (A | B){3,}. - Extend tryMultiplyQuantifiers() to fold nested GROUP quantifiers (e.g. ((A B)+)* -> (A B)*) in addition to VAR quantifiers. Other changes: - Add RPR_VARID_BEGIN (252) to rpr.h for the new control element type. Reduce RPR_VARID_MAX from 252 to 251 accordingly and update the maximum-variables boundary tests. - Add RPR_DEPTH_NONE (255) as sentinel for top-level elements that have no enclosing group. Reduce RPR_DEPTH_MAX to 254 accordingly. - Add BEGIN handling to computeAbsorbabilityRecursive() so that absorbability flags propagate correctly through group boundaries. - Extract show_rpr_nfa_stats() from show_windowagg_info() for NFA statistics display. - Replace defensive NULL check in collectPatternVariables() with Assert, since the caller guarantees a non-NULL pattern. - Pair each of the 82 EXPLAIN test queries in rpr_explain.sql with a pg_get_viewdef() check via CREATE TEMP VIEW, verifying that both the ruleutils (parse tree) and explain (bytecode) deparse paths produce consistent PATTERN output. Also add EXPLAIN test cases for nested ALT patterns, consecutive ALT merging, and ALT prefix/suffix merging. --- doc/src/sgml/advanced.sgml | 58 +- doc/src/sgml/ref/select.sgml | 52 +- src/backend/commands/explain.c | 529 +++-- src/backend/executor/nodeWindowAgg.c | 39 + src/backend/optimizer/plan/rpr.c | 224 ++- src/backend/parser/gram.y | 6 +- src/backend/utils/adt/ruleutils.c | 6 +- src/include/optimizer/rpr.h | 15 +- src/test/regress/expected/rpr.out | 15 +- src/test/regress/expected/rpr_base.out | 87 +- src/test/regress/expected/rpr_explain.out | 2166 +++++++++++++++++++-- src/test/regress/sql/rpr.sql | 6 +- src/test/regress/sql/rpr_base.sql | 60 +- src/test/regress/sql/rpr_explain.sql | 1517 +++++++++++++-- 14 files changed, 4211 insertions(+), 569 deletions(-) diff --git a/doc/src/sgml/advanced.sgml b/doc/src/sgml/advanced.sgml index eec2a0a9346..241c7e03a5a 100644 --- a/doc/src/sgml/advanced.sgml +++ b/doc/src/sgml/advanced.sgml @@ -574,9 +574,15 @@ DEFINE conditions defined in the DEFINE clause. For example following PATTERN defines a sequence of rows starting with the a row satisfying "LOWPRICE", then one or more rows satisfying - "UP" and finally one or more rows satisfying "DOWN". Note that "+" means - one or more matches. Also you can use "*", which means zero or more - matches. If a sequence of rows which satisfies the PATTERN is found, in + "UP" and finally one or more rows satisfying "DOWN". Pattern variables + can be followed by quantifiers: "+" means one or more matches, + "*" means zero or more matches, "?" means zero or one match (optional), + "{n}" means exactly n matches, "{n,}" means at least n matches, + "{,m}" means at most m matches, and "{n,m}" means between n and m matches. + Patterns can be grouped using parentheses and combined using alternation + (the vertical bar "|" for OR). For example, "(UP DOWN)+" matches one or + more repetitions of UP followed by DOWN. + If a sequence of rows which satisfies the PATTERN is found, in the starting row all columns or functions are shown in the target list. Note that aggregations only look into the matched rows, rather than the whole frame. On the second or subsequent rows all window functions are @@ -622,6 +628,52 @@ FROM stock + + Row pattern recognition internally uses a nondeterministic finite + automaton (NFA) to match patterns. For patterns with unbounded + quantifiers (e.g., A+ or (A B)+), + the NFA may need to track many active matching contexts simultaneously, + which could potentially lead to O(n2) + complexity as the number of rows increases. + + + + Before execution, PostgreSQL automatically + optimizes patterns to simplify their structure. This includes flattening + nested sequences and alternations, merging consecutive identical variables + (e.g., A{2,3} A{1,2} becomes A{3,5}), + removing duplicate alternatives + (e.g., (A | B | A) becomes (A | B)), + and simplifying nested quantifiers + (e.g., (A*)* becomes A*). + These optimizations reduce pattern complexity and also decrease + nesting depth, making the 255-level depth limit rarely encountered. + They are applied transparently and can be observed + in EXPLAIN output. + + + + To mitigate this, PostgreSQL employs + a context absorption optimization. When a pattern starts with a greedy + unbounded element, newer matching contexts cannot produce longer matches + than older contexts. By detecting and eliminating these redundant + contexts, the matching complexity is reduced from + O(n2) to O(n) for many common patterns. + + + + When examining query plans for row pattern recognition with + EXPLAIN, the pattern output may include special + markers that indicate optimization opportunities. A double quote + " marks where pattern absorption can occur, + and a single quote ' marks absorbable elements + within a branch. For example, A+" indicates that + repeated matches of A can be absorbed, while (A' B')+" + shows that both A and B within the group are absorbable. + These markers are primarily useful for understanding internal + optimization behavior. + + When a query involves multiple window functions, it is possible to write out each one with a separate OVER clause, but this is diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml index 49b3c00d9f2..7ec7760f472 100644 --- a/doc/src/sgml/ref/select.sgml +++ b/doc/src/sgml/ref/select.sgml @@ -979,8 +979,8 @@ WINDOW window_name AS ( frame_clause can be one of -{ RANGE | ROWS | GROUPS } frame_start [ frame_exclusion ] [row_pattern_common_syntax] -{ RANGE | ROWS | GROUPS } BETWEEN frame_start AND frame_end [ frame_exclusion ] [row_pattern_common_syntax] +{ RANGE | ROWS | GROUPS } frame_start [ frame_exclusion ] [ row_pattern_common_syntax ] +{ RANGE | ROWS | GROUPS } BETWEEN frame_start AND frame_end [ frame_exclusion ] [ row_pattern_common_syntax ] where frame_start @@ -1098,15 +1098,15 @@ EXCLUDE NO OTHERS [ { AFTER MATCH SKIP PAST LAST ROW | AFTER MATCH SKIP TO NEXT ROW } ] [ INITIAL | SEEK ] -PATTERN ( pattern_variable_name[*|+|?] [, ...] ) -DEFINE definition_varible_name AS expression [, ...] +PATTERN ( pattern_variable_name [ quantifier ] [, ...] ) +DEFINE definition_variable_name AS expression [, ...] AFTER MATCH SKIP PAST LAST ROW or AFTER MATCH SKIP TO NEXT ROW controls how to proceed to next row position after a match found. With AFTER MATCH SKIP PAST LAST ROW (the default) next row position is next to the last row of previous match. On the other hand, with AFTER MATCH SKIP TO NEXT - ROW next row position is always next to the last row of previous + ROW next row position is next to the first row of previous match. INITIAL or SEEK defines how a successful pattern matching starts from which row in a frame. If INITIAL is specified, the match must start @@ -1117,16 +1117,48 @@ DEFINE definition_varible_name AS < defines definition variables along with a boolean expression. PATTERN defines a sequence of rows that satisfies certain conditions using variables defined - in DEFINE clause. If the variable is not defined in - the DEFINE clause, it is implicitly assumed following - is defined in the DEFINE clause. + in DEFINE clause. Each pattern variable can be + followed by a quantifier to specify how many times it should match: + * (zero or more), + + (one or more), + ? (zero or one), + {n} (exactly n times), + {n,} (at least n times), + {,m} (at most m times), or + {n,m} + (between n and m times). + Reluctant quantifiers (e.g., *?, +?, + ??, {n,m}?) + are not supported. + Patterns can be grouped using parentheses, and alternation (OR) can be + expressed using the vertical bar |. + For example, (A B)+ matches one or more repetitions + of the sequence A followed by B, and A | B matches + either A or B. + If a pattern variable is not defined in + the DEFINE clause, it is not automatically added + to the DEFINE clause. Instead, the executor evaluates + the variable as TRUE at execution time, behaving as if + the following definition existed. variable_name AS TRUE - Note that the maximum number of variables defined - in DEFINE clause is 26. + Conversely, variables defined in the DEFINE clause + but not used in the PATTERN clause are filtered out + during query planning. + + + + Note that the maximum number of unique pattern variables + used in PATTERN clause is 251. + If this limit is exceeded, an error will be raised. + Additionally, the maximum nesting depth of pattern groups + (parentheses) is 255 levels. + However, pattern optimizations such as flattening nested sequences + and simplifying nested quantifiers may reduce the effective depth, + so this limit is rarely reached in practice. diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index cc762d7b21b..96542e9538d 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -120,6 +120,18 @@ static void show_window_keys(StringInfo buf, PlanState *planstate, List *ancestors, ExplainState *es); static void append_rpr_quantifier(StringInfo buf, RPRPatternElement *elem); static char *deparse_rpr_pattern(RPRPattern *pattern); +static void deparse_rpr_elements(RPRPattern *pattern, int *idx, + StringInfoData *buf, RPRDepth groupDepth, + RPRDepth *prevDepth, bool *needSpace); +static void deparse_rpr_group(RPRPattern *pattern, int *idx, + StringInfoData *buf, RPRDepth *prevDepth, + bool *needSpace); +static void deparse_rpr_alt(RPRPattern *pattern, int *idx, + StringInfoData *buf, RPRDepth *prevDepth, + bool *needSpace, List **altSeps); +static void deparse_rpr_var(RPRPattern *pattern, int *idx, + StringInfoData *buf, RPRDepth *prevDepth, + bool *needSpace, List **altSeps); static void show_storage_info(char *maxStorageType, int64 maxSpaceUsed, ExplainState *es); static void show_tablesample(TableSampleClause *tsc, PlanState *planstate, @@ -130,6 +142,7 @@ static void show_incremental_sort_info(IncrementalSortState *incrsortstate, static void show_hash_info(HashState *hashstate, ExplainState *es); static void show_material_info(MaterialState *mstate, ExplainState *es); static void show_windowagg_info(WindowAggState *winstate, ExplainState *es); +static void show_rpr_nfa_stats(WindowAggState *winstate, ExplainState *es); static void show_ctescan_info(CteScanState *ctescanstate, ExplainState *es); static void show_table_func_scan_info(TableFuncScanState *tscanstate, ExplainState *es); @@ -2913,126 +2926,261 @@ append_rpr_quantifier(StringInfo buf, RPRPatternElement *elem) appendStringInfo(buf, "{%d,%d}", elem->min, elem->max); if (RPRElemIsReluctant(elem)) + { + if (elem->min == 1 && elem->max == 1) + appendStringInfo(buf, "{1}"); /* make reluctant ? unambiguous */ appendStringInfoChar(buf, '?'); + } /* Append absorption markers: " for judgment point, ' for branch only */ if (RPRElemIsAbsorbable(elem)) + { + Assert(elem->max == RPR_QUANTITY_INF); appendStringInfoChar(buf, '"'); + } else if (RPRElemIsAbsorbableBranch(elem)) appendStringInfoChar(buf, '\''); } /* * Deparse a compiled RPRPattern (bytecode) back to pattern string. - * Simple approach: output parens for each depth level as-is. + * + * Walks the flat bytecode array using mutual recursion: deparse_rpr_elements + * processes sequential elements, and deparse_rpr_group handles BEGIN...END + * groups by recursing back into deparse_rpr_elements for the group content. */ static char * deparse_rpr_pattern(RPRPattern *pattern) { StringInfoData buf; - int i; + int idx = 0; RPRDepth prevDepth = 0; bool needSpace = false; - List *altSepList = NIL; /* list of pending ALT separator positions */ - if (pattern == NULL || pattern->numElements == 0) - return NULL; + Assert(pattern != NULL && pattern->numElements >= 2); initStringInfo(&buf); - for (i = 0; i < pattern->numElements; i++) + deparse_rpr_elements(pattern, &idx, &buf, RPR_DEPTH_NONE, + &prevDepth, &needSpace); + + /* Close remaining open parens */ + while (prevDepth > 0) + { + appendStringInfoChar(&buf, ')'); + prevDepth--; + } + + return buf.data; +} + +/* + * Process pattern elements sequentially until FIN or END at groupDepth. + * + * When groupDepth >= 0, stops at the matching END element (leaving idx + * pointing to it) so the caller (deparse_rpr_group) can consume it. + * When groupDepth < 0, processes until FIN (top-level call). + */ +static void +deparse_rpr_elements(RPRPattern *pattern, int *idx, StringInfoData *buf, + RPRDepth groupDepth, RPRDepth *prevDepth, + bool *needSpace) +{ + List *altSeps = NIL; /* pending alternation separator indices */ + + while (*idx < pattern->numElements) { - RPRPatternElement *elem = &pattern->elements[i]; + RPRPatternElement *elem = &pattern->elements[*idx]; if (RPRElemIsFin(elem)) break; - /* Alternation separator - check if current position is in the list */ - if (list_member_int(altSepList, i)) - { - appendStringInfoString(&buf, " | "); - needSpace = false; - altSepList = list_delete_int(altSepList, i); - } + /* Stop at END matching our group depth; caller handles it */ + if (RPRElemIsEnd(elem) && elem->depth == groupDepth) + break; - if (RPRElemIsAlt(elem)) + /* Alternation separator */ + if (list_member_int(altSeps, *idx)) { - /* Open parens up to ALT's depth (content is at depth+1) */ - while (prevDepth < elem->depth) + /* Close parens to match separator depth first */ + while (*prevDepth > elem->depth) { - if (needSpace) - appendStringInfoChar(&buf, ' '); - appendStringInfoChar(&buf, '('); - prevDepth++; - needSpace = false; + appendStringInfoChar(buf, ')'); + (*prevDepth)--; } - /* ALT's first element may have jump to next alternative */ - if (elem->next != RPR_ELEMIDX_INVALID) - { - RPRPatternElement *firstElem = &pattern->elements[elem->next]; - - if (firstElem->jump != RPR_ELEMIDX_INVALID) - altSepList = list_append_unique_int(altSepList, firstElem->jump); - } - continue; + appendStringInfoString(buf, " | "); + *needSpace = false; + altSeps = list_delete_int(altSeps, *idx); } - if (RPRElemIsEnd(elem)) + /* Dispatch to element-type handlers */ + if (RPRElemIsAlt(elem)) + deparse_rpr_alt(pattern, idx, buf, prevDepth, + needSpace, &altSeps); + else if (RPRElemIsBegin(elem)) + deparse_rpr_group(pattern, idx, buf, prevDepth, + needSpace); + else if (RPRElemIsVar(elem)) + deparse_rpr_var(pattern, idx, buf, prevDepth, + needSpace, &altSeps); + } + list_free(altSeps); +} + +/* + * Process a BEGIN...END group. + * + * Consumes BEGIN, recurses into deparse_rpr_elements for group content, + * then consumes END and outputs the group quantifier. + * + * When the group wraps a single ALT with no siblings, the group-level + * parenthesis is suppressed since the ALT-to-children depth transition + * already provides it (avoids double parens like "((a | b))+"). + */ +static void +deparse_rpr_group(RPRPattern *pattern, int *idx, StringInfoData *buf, + RPRDepth *prevDepth, bool *needSpace) +{ + RPRPatternElement *begin = &pattern->elements[*idx]; + RPRDepth childDepth = begin->depth + 1; + bool singleAlt = false; + RPRPatternElement *end; + + /* + * Check if this group wraps a single ALT with no siblings. + * Scan from after ALT to END: if no element at childDepth exists, + * the ALT is the sole child. + */ + if (*idx + 1 < pattern->numElements && + RPRElemIsAlt(&pattern->elements[*idx + 1])) + { + int j; + + singleAlt = true; + for (j = *idx + 2; j < pattern->numElements; j++) { - /* Close down to END's depth, output quantifier */ - while (prevDepth > elem->depth + 1) + RPRPatternElement *e = &pattern->elements[j]; + + if (RPRElemIsEnd(e) && e->depth == begin->depth) + break; + if (e->depth <= childDepth) { - appendStringInfoChar(&buf, ')'); - prevDepth--; + singleAlt = false; + break; } - appendStringInfoChar(&buf, ')'); - append_rpr_quantifier(&buf, elem); - prevDepth = elem->depth; - needSpace = true; - continue; } + } - if (RPRElemIsVar(elem)) - { - /* Open parens for depth increase */ - while (prevDepth < elem->depth) - { - if (needSpace) - appendStringInfoChar(&buf, ' '); - appendStringInfoChar(&buf, '('); - prevDepth++; - needSpace = false; - } + /* Open group paren (unless single ALT provides it) */ + if (!singleAlt) + { + if (*needSpace) + appendStringInfoChar(buf, ' '); + appendStringInfoChar(buf, '('); + *needSpace = false; + } + *prevDepth = childDepth; + (*idx)++; /* consume BEGIN */ - /* Close parens for depth decrease */ - while (prevDepth > elem->depth) - { - appendStringInfoChar(&buf, ')'); - prevDepth--; - } + /* Process group children; stops at matching END */ + deparse_rpr_elements(pattern, idx, buf, begin->depth, + prevDepth, needSpace); - if (needSpace) - appendStringInfoChar(&buf, ' '); + /* Consume END and output quantifier */ + Assert(*idx < pattern->numElements); + end = &pattern->elements[*idx]; + Assert(RPRElemIsEnd(end) && end->depth == begin->depth); - appendStringInfoString(&buf, pattern->varNames[elem->varId]); - append_rpr_quantifier(&buf, elem); - needSpace = true; + while (*prevDepth > end->depth + 1) + { + appendStringInfoChar(buf, ')'); + (*prevDepth)--; + } + if (!singleAlt) + appendStringInfoChar(buf, ')'); + append_rpr_quantifier(buf, end); + *prevDepth = end->depth; + *needSpace = true; + (*idx)++; /* consume END */ +} - if (elem->jump != RPR_ELEMIDX_INVALID) - altSepList = list_append_unique_int(altSepList, elem->jump); - } +/* + * Process an ALT element: adjust depth parens and register separator positions. + */ +static void +deparse_rpr_alt(RPRPattern *pattern, int *idx, StringInfoData *buf, + RPRDepth *prevDepth, bool *needSpace, List **altSeps) +{ + RPRPatternElement *elem = &pattern->elements[*idx]; + + /* Close parens for depth decrease */ + while (*prevDepth > elem->depth) + { + appendStringInfoChar(buf, ')'); + (*prevDepth)--; + *needSpace = true; } - list_free(altSepList); + /* Open parens up to ALT's depth */ + while (*prevDepth < elem->depth) + { + if (*needSpace) + appendStringInfoChar(buf, ' '); + appendStringInfoChar(buf, '('); + (*prevDepth)++; + *needSpace = false; + } - /* Close remaining */ - while (prevDepth > 0) + /* Register next alternation separator position */ + if (elem->next != RPR_ELEMIDX_INVALID) { - appendStringInfoChar(&buf, ')'); - prevDepth--; + RPRPatternElement *firstElem = &pattern->elements[elem->next]; + + if (firstElem->jump != RPR_ELEMIDX_INVALID) + *altSeps = lappend_int(*altSeps, firstElem->jump); + } + if (elem->jump != RPR_ELEMIDX_INVALID) + *altSeps = lappend_int(*altSeps, elem->jump); + (*idx)++; +} + +/* + * Process a VAR element: adjust depth parens and output variable name. + */ +static void +deparse_rpr_var(RPRPattern *pattern, int *idx, StringInfoData *buf, + RPRDepth *prevDepth, bool *needSpace, List **altSeps) +{ + RPRPatternElement *elem = &pattern->elements[*idx]; + + /* Open parens for depth increase */ + while (*prevDepth < elem->depth) + { + if (*needSpace) + appendStringInfoChar(buf, ' '); + appendStringInfoChar(buf, '('); + (*prevDepth)++; + *needSpace = false; } - return buf.data; + /* Close parens for depth decrease */ + while (*prevDepth > elem->depth) + { + appendStringInfoChar(buf, ')'); + (*prevDepth)--; + } + + if (*needSpace) + appendStringInfoChar(buf, ' '); + + Assert(elem->varId < pattern->numVars); + appendStringInfoString(buf, pattern->varNames[elem->varId]); + append_rpr_quantifier(buf, elem); + *needSpace = true; + + if (elem->jump != RPR_ELEMIDX_INVALID) + *altSeps = lappend_int(*altSeps, elem->jump); + (*idx)++; } /* @@ -3673,140 +3821,155 @@ show_windowagg_info(WindowAggState *winstate, ExplainState *es) /* Show NFA statistics for Row Pattern Recognition */ if (wagg->rpPattern != NULL) + show_rpr_nfa_stats(winstate, es); +} + +/* + * Show NFA statistics for Row Pattern Recognition on WindowAgg node. + */ +static void +show_rpr_nfa_stats(WindowAggState *winstate, ExplainState *es) +{ + if (es->format != EXPLAIN_FORMAT_TEXT) { - if (es->format != EXPLAIN_FORMAT_TEXT) + /* State and context counters */ + ExplainPropertyInteger("NFA States Peak", NULL, winstate->nfaStatesMax, es); + ExplainPropertyInteger("NFA States Total", NULL, winstate->nfaStatesTotalCreated, es); + ExplainPropertyInteger("NFA States Merged", NULL, winstate->nfaStatesMerged, es); + ExplainPropertyInteger("NFA Contexts Peak", NULL, winstate->nfaContextsMax, es); + ExplainPropertyInteger("NFA Contexts Total", NULL, winstate->nfaContextsTotalCreated, es); + ExplainPropertyInteger("NFA Contexts Absorbed", NULL, winstate->nfaContextsAbsorbed, es); + ExplainPropertyInteger("NFA Contexts Skipped", NULL, winstate->nfaContextsSkipped, es); + ExplainPropertyInteger("NFA Contexts Pruned", NULL, winstate->nfaContextsPruned, es); + + /* Match/mismatch counts and length statistics */ + ExplainPropertyInteger("NFA Matched", NULL, winstate->nfaMatchesSucceeded, es); + ExplainPropertyInteger("NFA Mismatched", NULL, winstate->nfaMatchesFailed, es); + if (winstate->nfaMatchesSucceeded > 0) { - ExplainPropertyInteger("NFA States Peak", NULL, winstate->nfaStatesMax, es); - ExplainPropertyInteger("NFA States Total", NULL, winstate->nfaStatesTotalCreated, es); - ExplainPropertyInteger("NFA States Merged", NULL, winstate->nfaStatesMerged, es); - ExplainPropertyInteger("NFA Contexts Peak", NULL, winstate->nfaContextsMax, es); - ExplainPropertyInteger("NFA Contexts Total", NULL, winstate->nfaContextsTotalCreated, es); - ExplainPropertyInteger("NFA Contexts Absorbed", NULL, winstate->nfaContextsAbsorbed, es); - ExplainPropertyInteger("NFA Contexts Skipped", NULL, winstate->nfaContextsSkipped, es); - ExplainPropertyInteger("NFA Contexts Pruned", NULL, winstate->nfaContextsPruned, es); - ExplainPropertyInteger("NFA Matched", NULL, winstate->nfaMatchesSucceeded, es); - ExplainPropertyInteger("NFA Mismatched", NULL, winstate->nfaMatchesFailed, es); - if (winstate->nfaMatchesSucceeded > 0) - { - ExplainPropertyInteger("NFA Match Length Min", NULL, winstate->nfaMatchLen.min, es); - ExplainPropertyInteger("NFA Match Length Max", NULL, winstate->nfaMatchLen.max, es); - ExplainPropertyFloat("NFA Match Length Avg", NULL, - (double) winstate->nfaMatchLen.total / winstate->nfaMatchesSucceeded, 1, - es); - } - if (winstate->nfaMatchesFailed > 0) - { - ExplainPropertyInteger("NFA Mismatch Length Min", NULL, winstate->nfaFailLen.min, es); - ExplainPropertyInteger("NFA Mismatch Length Max", NULL, winstate->nfaFailLen.max, es); - ExplainPropertyFloat("NFA Mismatch Length Avg", NULL, - (double) winstate->nfaFailLen.total / winstate->nfaMatchesFailed, 1, - es); - } - if (winstate->nfaContextsAbsorbed > 0) - { - ExplainPropertyInteger("NFA Absorbed Length Min", NULL, winstate->nfaAbsorbedLen.min, es); - ExplainPropertyInteger("NFA Absorbed Length Max", NULL, winstate->nfaAbsorbedLen.max, es); - ExplainPropertyFloat("NFA Absorbed Length Avg", NULL, - (double) winstate->nfaAbsorbedLen.total / winstate->nfaContextsAbsorbed, 1, - es); - } - if (winstate->nfaContextsSkipped > 0) - { - ExplainPropertyInteger("NFA Skipped Length Min", NULL, winstate->nfaSkippedLen.min, es); - ExplainPropertyInteger("NFA Skipped Length Max", NULL, winstate->nfaSkippedLen.max, es); - ExplainPropertyFloat("NFA Skipped Length Avg", NULL, - (double) winstate->nfaSkippedLen.total / winstate->nfaContextsSkipped, 1, - es); - } + ExplainPropertyInteger("NFA Match Length Min", NULL, winstate->nfaMatchLen.min, es); + ExplainPropertyInteger("NFA Match Length Max", NULL, winstate->nfaMatchLen.max, es); + ExplainPropertyFloat("NFA Match Length Avg", NULL, + (double) winstate->nfaMatchLen.total / winstate->nfaMatchesSucceeded, 1, + es); } - else + if (winstate->nfaMatchesFailed > 0) { - ExplainIndentText(es); + ExplainPropertyInteger("NFA Mismatch Length Min", NULL, winstate->nfaFailLen.min, es); + ExplainPropertyInteger("NFA Mismatch Length Max", NULL, winstate->nfaFailLen.max, es); + ExplainPropertyFloat("NFA Mismatch Length Avg", NULL, + (double) winstate->nfaFailLen.total / winstate->nfaMatchesFailed, 1, + es); + } + + /* Absorbed/skipped context length statistics */ + if (winstate->nfaContextsAbsorbed > 0) + { + ExplainPropertyInteger("NFA Absorbed Length Min", NULL, winstate->nfaAbsorbedLen.min, es); + ExplainPropertyInteger("NFA Absorbed Length Max", NULL, winstate->nfaAbsorbedLen.max, es); + ExplainPropertyFloat("NFA Absorbed Length Avg", NULL, + (double) winstate->nfaAbsorbedLen.total / winstate->nfaContextsAbsorbed, 1, + es); + } + if (winstate->nfaContextsSkipped > 0) + { + ExplainPropertyInteger("NFA Skipped Length Min", NULL, winstate->nfaSkippedLen.min, es); + ExplainPropertyInteger("NFA Skipped Length Max", NULL, winstate->nfaSkippedLen.max, es); + ExplainPropertyFloat("NFA Skipped Length Avg", NULL, + (double) winstate->nfaSkippedLen.total / winstate->nfaContextsSkipped, 1, + es); + } + } + else + { + /* State and context counters */ + ExplainIndentText(es); + appendStringInfo(es->str, + "NFA States: " INT64_FORMAT " peak, " INT64_FORMAT " total, " INT64_FORMAT " merged\n", + winstate->nfaStatesMax, + winstate->nfaStatesTotalCreated, + winstate->nfaStatesMerged); + ExplainIndentText(es); + appendStringInfo(es->str, + "NFA Contexts: " INT64_FORMAT " peak, " INT64_FORMAT " total, " INT64_FORMAT " pruned\n", + winstate->nfaContextsMax, + winstate->nfaContextsTotalCreated, + winstate->nfaContextsPruned); + + /* Match/mismatch counts with length min/max/avg */ + ExplainIndentText(es); + appendStringInfo(es->str, "NFA: "); + if (winstate->nfaMatchesSucceeded > 0) + { + double avgLen = (double) winstate->nfaMatchLen.total / winstate->nfaMatchesSucceeded; + appendStringInfo(es->str, - "NFA States: " INT64_FORMAT " peak, " INT64_FORMAT " total, " INT64_FORMAT " merged\n", - winstate->nfaStatesMax, - winstate->nfaStatesTotalCreated, - winstate->nfaStatesMerged); - ExplainIndentText(es); + INT64_FORMAT " matched (len " INT64_FORMAT "/" INT64_FORMAT "/%.1f)", + winstate->nfaMatchesSucceeded, + winstate->nfaMatchLen.min, + winstate->nfaMatchLen.max, + avgLen); + } + else + { + appendStringInfo(es->str, "0 matched"); + } + if (winstate->nfaMatchesFailed > 0) + { + double avgFail = (double) winstate->nfaFailLen.total / winstate->nfaMatchesFailed; + appendStringInfo(es->str, - "NFA Contexts: " INT64_FORMAT " peak, " INT64_FORMAT " total, " INT64_FORMAT " pruned\n", - winstate->nfaContextsMax, - winstate->nfaContextsTotalCreated, - winstate->nfaContextsPruned); + ", " INT64_FORMAT " mismatched (len " INT64_FORMAT "/" INT64_FORMAT "/%.1f)", + winstate->nfaMatchesFailed, + winstate->nfaFailLen.min, + winstate->nfaFailLen.max, + avgFail); + } + else + { + appendStringInfo(es->str, ", 0 mismatched"); + } + appendStringInfoChar(es->str, '\n'); + + /* Absorbed/skipped context length statistics */ + if (winstate->nfaContextsAbsorbed > 0 || winstate->nfaContextsSkipped > 0) + { ExplainIndentText(es); appendStringInfo(es->str, "NFA: "); - if (winstate->nfaMatchesSucceeded > 0) + + if (winstate->nfaContextsAbsorbed > 0) { - double avgLen = (double) winstate->nfaMatchLen.total / winstate->nfaMatchesSucceeded; + double avgAbsorbed = (double) winstate->nfaAbsorbedLen.total / winstate->nfaContextsAbsorbed; appendStringInfo(es->str, - INT64_FORMAT " matched (len " INT64_FORMAT "/" INT64_FORMAT "/%.1f)", - winstate->nfaMatchesSucceeded, - winstate->nfaMatchLen.min, - winstate->nfaMatchLen.max, - avgLen); + INT64_FORMAT " absorbed (len " INT64_FORMAT "/" INT64_FORMAT "/%.1f)", + winstate->nfaContextsAbsorbed, + winstate->nfaAbsorbedLen.min, + winstate->nfaAbsorbedLen.max, + avgAbsorbed); } else { - appendStringInfo(es->str, "0 matched"); + appendStringInfo(es->str, "0 absorbed"); } - if (winstate->nfaMatchesFailed > 0) + + if (winstate->nfaContextsSkipped > 0) { - double avgFail = (double) winstate->nfaFailLen.total / winstate->nfaMatchesFailed; + double avgSkipped = (double) winstate->nfaSkippedLen.total / winstate->nfaContextsSkipped; appendStringInfo(es->str, - ", " INT64_FORMAT " mismatched (len " INT64_FORMAT "/" INT64_FORMAT "/%.1f)", - winstate->nfaMatchesFailed, - winstate->nfaFailLen.min, - winstate->nfaFailLen.max, - avgFail); + ", " INT64_FORMAT " skipped (len " INT64_FORMAT "/" INT64_FORMAT "/%.1f)", + winstate->nfaContextsSkipped, + winstate->nfaSkippedLen.min, + winstate->nfaSkippedLen.max, + avgSkipped); } else { - appendStringInfo(es->str, ", 0 mismatched"); + appendStringInfo(es->str, ", 0 skipped"); } - appendStringInfoChar(es->str, '\n'); - - /* Show absorbed and skipped context length statistics */ - if (winstate->nfaContextsAbsorbed > 0 || winstate->nfaContextsSkipped > 0) - { - ExplainIndentText(es); - appendStringInfo(es->str, "NFA: "); - - if (winstate->nfaContextsAbsorbed > 0) - { - double avgAbsorbed = (double) winstate->nfaAbsorbedLen.total / winstate->nfaContextsAbsorbed; - - appendStringInfo(es->str, - INT64_FORMAT " absorbed (len " INT64_FORMAT "/" INT64_FORMAT "/%.1f)", - winstate->nfaContextsAbsorbed, - winstate->nfaAbsorbedLen.min, - winstate->nfaAbsorbedLen.max, - avgAbsorbed); - } - else - { - appendStringInfo(es->str, "0 absorbed"); - } - - if (winstate->nfaContextsSkipped > 0) - { - double avgSkipped = (double) winstate->nfaSkippedLen.total / winstate->nfaContextsSkipped; - - appendStringInfo(es->str, - ", " INT64_FORMAT " skipped (len " INT64_FORMAT "/" INT64_FORMAT "/%.1f)", - winstate->nfaContextsSkipped, - winstate->nfaSkippedLen.min, - winstate->nfaSkippedLen.max, - avgSkipped); - } - else - { - appendStringInfo(es->str, ", 0 skipped"); - } - appendStringInfoChar(es->str, '\n'); - } + appendStringInfoChar(es->str, '\n'); } } } diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index 1e088615d19..b2874bf6e9d 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -6048,6 +6048,41 @@ nfa_advance_alt(WindowAggState *winstate, RPRNFAContext *ctx, nfa_state_free(winstate, state); } +/* + * nfa_advance_begin + * + * Handle BEGIN element: group entry logic. + * BEGIN is only visited at initial group entry (count is always 0). + * If min=0, creates a skip path past the group. + * Loop-back from END goes directly to first child, bypassing BEGIN. + */ +static void +nfa_advance_begin(WindowAggState *winstate, RPRNFAContext *ctx, + RPRNFAState *state, RPRPatternElement *elem, + int64 currentPos, bool initialAdvance) +{ + RPRPattern *pattern = winstate->rpPattern; + RPRPatternElement *elements = pattern->elements; + + state->counts[elem->depth] = 0; + + /* Optional group: create skip path */ + if (elem->min == 0) + { + RPRNFAState *skipState; + + skipState = nfa_state_clone(winstate, elem->jump, state->altPriority, + state->counts, state->isAbsorbable); + nfa_route_to_elem(winstate, ctx, skipState, + &elements[elem->jump], currentPos, initialAdvance); + } + + /* Enter group: route to first child */ + state->elemIdx = elem->next; + nfa_route_to_elem(winstate, ctx, state, + &elements[state->elemIdx], currentPos, initialAdvance); +} + /* * nfa_advance_end * @@ -6233,6 +6268,10 @@ nfa_advance_state(WindowAggState *winstate, RPRNFAContext *ctx, nfa_advance_alt(winstate, ctx, state, elem, currentPos, initialAdvance); break; + case RPR_VARID_BEGIN: + nfa_advance_begin(winstate, ctx, state, elem, currentPos, initialAdvance); + break; + case RPR_VARID_END: nfa_advance_end(winstate, ctx, state, elem, currentPos, initialAdvance); break; diff --git a/src/backend/optimizer/plan/rpr.c b/src/backend/optimizer/plan/rpr.c index 50043c416c6..67710a94a0d 100644 --- a/src/backend/optimizer/plan/rpr.c +++ b/src/backend/optimizer/plan/rpr.c @@ -51,6 +51,7 @@ static RPRPatternNode *tryUnwrapSingleChild(RPRPatternNode *pattern); static List *flattenSeqChildren(List *children); static List *mergeConsecutiveVars(List *children); static List *mergeConsecutiveGroups(List *children); +static List *mergeConsecutiveAlts(List *children); static List *mergeGroupPrefixSuffix(List *children); static RPRPatternNode *optimizeSeqPattern(RPRPatternNode *pattern); @@ -363,6 +364,110 @@ mergeConsecutiveGroups(List *children) return mergedChildren; } +/* + * mergeConsecutiveAlts + * Merge consecutive identical ALT nodes into a GROUP. + * + * Example: + * (A | B) (A | B) (A | B) -> (A | B){3} + * + * After GROUP{1,1} unwrap, bare alternations like (A | B) become ALT nodes + * in the SEQ. This step detects consecutive identical ALT nodes and wraps + * them in a GROUP with the appropriate quantifier. + */ +static List * +mergeConsecutiveAlts(List *children) +{ + ListCell *lc; + List *mergedChildren = NIL; + RPRPatternNode *prev = NULL; + int count = 0; + + foreach(lc, children) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + + if (child->nodeType == RPR_PATTERN_ALT && child->reluctant < 0) + { + if (prev != NULL && + rprPatternChildrenEqual(prev->children, child->children)) + { + /* Same ALT as prev - accumulate */ + count++; + } + else + { + /* Different ALT or first ALT - flush previous */ + if (prev != NULL) + { + if (count > 1) + { + /* Wrap in GROUP{count,count}(ALT) */ + RPRPatternNode *group = makeNode(RPRPatternNode); + + group->nodeType = RPR_PATTERN_GROUP; + group->min = count; + group->max = count; + group->reluctant = -1; + group->location = -1; + group->children = list_make1(prev); + mergedChildren = lappend(mergedChildren, group); + } + else + mergedChildren = lappend(mergedChildren, prev); + } + prev = child; + count = 1; + } + } + else + { + /* Non-ALT - flush previous */ + if (prev != NULL) + { + if (count > 1) + { + RPRPatternNode *group = makeNode(RPRPatternNode); + + group->nodeType = RPR_PATTERN_GROUP; + group->min = count; + group->max = count; + group->reluctant = -1; + group->location = -1; + group->children = list_make1(prev); + mergedChildren = lappend(mergedChildren, group); + } + else + mergedChildren = lappend(mergedChildren, prev); + } + mergedChildren = lappend(mergedChildren, child); + prev = NULL; + count = 0; + } + } + + /* Flush remaining */ + if (prev != NULL) + { + if (count > 1) + { + RPRPatternNode *group = makeNode(RPRPatternNode); + + group->nodeType = RPR_PATTERN_GROUP; + group->min = count; + group->max = count; + group->reluctant = -1; + group->location = -1; + group->children = list_make1(prev); + mergedChildren = lappend(mergedChildren, group); + } + else + mergedChildren = lappend(mergedChildren, prev); + } + + return mergedChildren; +} + /* * mergeGroupPrefixSuffix * Merge sequence prefix/suffix into GROUP with matching children. @@ -529,8 +634,9 @@ mergeGroupPrefixSuffix(List *children) * 1. Flatten nested SEQ and GROUP{1,1} * 2. Merge consecutive identical VAR nodes * 3. Merge consecutive identical GROUP nodes - * 4. Merge prefix/suffix into GROUP with matching children - * 5. Unwrap single-item SEQ + * 4. Merge consecutive identical ALT nodes into GROUP + * 5. Merge prefix/suffix into GROUP with matching children + * 6. Unwrap single-item SEQ */ static RPRPatternNode * optimizeSeqPattern(RPRPatternNode *pattern) @@ -544,6 +650,9 @@ optimizeSeqPattern(RPRPatternNode *pattern) /* Merge consecutive identical GROUP nodes */ pattern->children = mergeConsecutiveGroups(pattern->children); + /* Merge consecutive identical ALT nodes into GROUP */ + pattern->children = mergeConsecutiveAlts(pattern->children); + /* Merge prefix/suffix into GROUP with matching children */ pattern->children = mergeGroupPrefixSuffix(pattern->children); @@ -665,12 +774,17 @@ tryMultiplyQuantifiers(RPRPatternNode *pattern) int64 new_min_64; int64 new_max_64; - if (list_length(pattern->children) != 1 || pattern->reluctant >= 0) + /* Parser always creates GROUP with exactly one child */ + Assert(list_length(pattern->children) == 1); + + if (pattern->reluctant >= 0) return pattern; child = (RPRPatternNode *) linitial(pattern->children); - if (child->nodeType != RPR_PATTERN_VAR || child->reluctant >= 0) + if ((child->nodeType != RPR_PATTERN_VAR && + child->nodeType != RPR_PATTERN_GROUP) || + child->reluctant >= 0) return pattern; /* Case 1: Both unbounded - (A*)* -> A*, (A+)+ -> A+ */ @@ -890,6 +1004,10 @@ scanRPRPatternRecursive(RPRPatternNode *node, char **varNames, int *numVars, break; case RPR_PATTERN_GROUP: + /* Add BEGIN element if group has non-trivial quantifier */ + if (node->min != 1 || node->max != 1) + (*numElements)++; + /* Recurse into children at increased depth */ foreach(lc, node->children) { @@ -1028,38 +1146,51 @@ fillRPRPatternGroup(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth de { ListCell *lc; int groupStartIdx = *idx; - bool altOnlyChild; - RPRDepth childDepth; - - /* - * Fill group content at increased depth. Exception: if the only child is - * ALT, don't increase depth since GROUP's parens already provide visual - * grouping. This avoids output like "((a | b))+" instead of "(a | b)+". - */ - altOnlyChild = (list_length(node->children) == 1 && - ((RPRPatternNode *) linitial(node->children))->nodeType == RPR_PATTERN_ALT); - childDepth = altOnlyChild ? depth : depth + 1; + int beginIdx = -1; - foreach(lc, node->children) - { - fillRPRPattern((RPRPatternNode *) lfirst(lc), pat, idx, childDepth); - } - - /* Add group end marker if group has non-trivial quantifier */ + /* Add BEGIN marker if group has non-trivial quantifier */ if (node->min != 1 || node->max != 1) { RPRPatternElement *elem = &pat->elements[*idx]; + beginIdx = *idx; memset(elem, 0, sizeof(RPRPatternElement)); - elem->varId = RPR_VARID_END; + elem->varId = RPR_VARID_BEGIN; elem->depth = depth; elem->min = node->min; elem->max = (node->max == INT_MAX) ? RPR_QUANTITY_INF : node->max; - elem->next = RPR_ELEMIDX_INVALID; - elem->jump = groupStartIdx; + elem->next = RPR_ELEMIDX_INVALID; /* set by finalize */ + elem->jump = RPR_ELEMIDX_INVALID; /* set after END */ if (node->reluctant >= 0) elem->flags |= RPR_ELEM_RELUCTANT; (*idx)++; + groupStartIdx = *idx; /* children start after BEGIN */ + } + + foreach(lc, node->children) + { + fillRPRPattern((RPRPatternNode *) lfirst(lc), pat, idx, depth + 1); + } + + /* Add group end marker if group has non-trivial quantifier */ + if (node->min != 1 || node->max != 1) + { + RPRPatternElement *beginElem = &pat->elements[beginIdx]; + RPRPatternElement *endElem = &pat->elements[*idx]; + + memset(endElem, 0, sizeof(RPRPatternElement)); + endElem->varId = RPR_VARID_END; + endElem->depth = depth; + endElem->min = node->min; + endElem->max = (node->max == INT_MAX) ? RPR_QUANTITY_INF : node->max; + endElem->next = RPR_ELEMIDX_INVALID; + endElem->jump = groupStartIdx; /* loop to first child */ + if (node->reluctant >= 0) + endElem->flags |= RPR_ELEM_RELUCTANT; + (*idx)++; + + /* Set BEGIN skip pointer (next is set by finalize) */ + beginElem->jump = *idx; /* skip: go to after END */ } } @@ -1097,9 +1228,7 @@ fillRPRPatternAlt(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth dept altBranchStarts = lappend_int(altBranchStarts, branchStart); fillRPRPattern(alt, pat, idx, depth + 1); - - if (*idx > branchStart) - altEndPositions = lappend_int(altEndPositions, *idx - 1); + altEndPositions = lappend_int(altEndPositions, *idx - 1); } /* Set jump on first element of each alternative to next alternative */ @@ -1114,13 +1243,35 @@ fillRPRPatternAlt(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth dept /* Set next on last element of each alternative to after the alternation */ { int afterAltIdx = *idx; + ListCell *lc2 = list_head(altBranchStarts); foreach(lc, altEndPositions) { int endPos = lfirst_int(lc); + int branchStart = lfirst_int(lc2); + + if (pat->elements[endPos].next != RPR_ELEMIDX_INVALID) + { + /* + * An inner ALT already set next on this element. Redirect + * all elements in this branch that share the same target to + * point to after this ALT instead. + */ + int oldTarget = pat->elements[endPos].next; + int j; + + for (j = branchStart; j <= endPos; j++) + { + if (pat->elements[j].next == oldTarget) + pat->elements[j].next = afterAltIdx; + } + } + else + { + pat->elements[endPos].next = afterAltIdx; + } - Assert(pat->elements[endPos].next == RPR_ELEMIDX_INVALID); - pat->elements[endPos].next = afterAltIdx; + lc2 = lnext(altBranchStarts, lc2); } } @@ -1451,9 +1602,18 @@ computeAbsorbabilityRecursive(RPRPattern *pattern, RPRElemIdx startIdx, if (*hasAbsorbable) elem->flags |= RPR_ELEM_ABSORBABLE_BRANCH; } + else if (RPRElemIsBegin(elem)) + { + /* BEGIN: skip to first child and check that */ + computeAbsorbabilityRecursive(pattern, elem->next, hasAbsorbable, parentDepth); + + /* Mark BEGIN element if contents are absorbable */ + if (*hasAbsorbable) + elem->flags |= RPR_ELEM_ABSORBABLE_BRANCH; + } else { - /* Non-ALT: check if unbounded start */ + /* Non-ALT, non-BEGIN: check if unbounded start */ if (isUnboundedStart(pattern, startIdx, parentDepth)) { *hasAbsorbable = true; @@ -1521,8 +1681,8 @@ collectPatternVariables(RPRPatternNode *pattern) { List *varNames = NIL; - if (pattern == NULL) - return NIL; + /* Caller ensures pattern is not NULL */ + Assert(pattern != NULL); collectPatternVariablesRecursive(pattern, &varNames); return varNames; diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index dbd667fc51a..ca7d0d25db3 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -16922,6 +16922,7 @@ row_pattern_alt: n->children = list_make2($1, $3); n->min = 1; n->max = 1; + n->reluctant = -1; n->location = @1; $$ = (Node *) n; } @@ -16948,6 +16949,7 @@ row_pattern_seq: n->children = list_make2($1, $2); n->min = 1; n->max = 1; + n->reluctant = -1; n->location = @1; $$ = (Node *) n; } @@ -16975,7 +16977,7 @@ row_pattern_primary: n->varName = $1; n->min = 1; n->max = 1; - n->reluctant = false; + n->reluctant = -1; n->children = NIL; n->location = @1; $$ = (Node *) n; @@ -16988,7 +16990,7 @@ row_pattern_primary: n->children = list_make1(inner); n->min = 1; n->max = 1; - n->reluctant = false; + n->reluctant = -1; n->location = @1; $$ = (Node *) n; } diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index a7adba616e6..0d9e2e004e8 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -6776,8 +6776,12 @@ append_pattern_quantifier(StringInfo buf, RPRPatternNode *node) else appendStringInfo(buf, "{%d,%d}", node->min, node->max); - if (node->reluctant >= 0 && has_quantifier) + if (node->reluctant >= 0) + { + if (!has_quantifier) + appendStringInfo(buf, "{1}"); /* make reluctant ? unambiguous */ appendStringInfoChar(buf, '?'); + } } /* diff --git a/src/include/optimizer/rpr.h b/src/include/optimizer/rpr.h index 691c6c27e97..8e1bc47643c 100644 --- a/src/include/optimizer/rpr.h +++ b/src/include/optimizer/rpr.h @@ -17,16 +17,18 @@ #include "nodes/plannodes.h" /* Limits and special values */ -#define RPR_VARID_MAX 252 /* max pattern variables: 252 */ +#define RPR_VARID_MAX 251 /* max pattern variables: 251 */ #define RPR_QUANTITY_INF INT32_MAX /* unbounded quantifier */ #define RPR_COUNT_MAX INT32_MAX /* max runtime count (NFA state) */ #define RPR_ELEMIDX_MAX INT16_MAX /* max pattern elements */ #define RPR_ELEMIDX_INVALID ((RPRElemIdx) -1) /* invalid index */ -#define RPR_DEPTH_MAX UINT8_MAX /* max pattern nesting depth */ +#define RPR_DEPTH_MAX (UINT8_MAX - 1) /* max pattern nesting depth: 254 */ +#define RPR_DEPTH_NONE UINT8_MAX /* no enclosing group (top-level) */ -/* Special varId values for control elements (253-255) */ -#define RPR_VARID_ALT ((RPRVarId) 253) /* alternation start */ -#define RPR_VARID_END ((RPRVarId) 254) /* group end */ +/* Special varId values for control elements (252-255) */ +#define RPR_VARID_BEGIN ((RPRVarId) 252) /* group begin */ +#define RPR_VARID_END ((RPRVarId) 253) /* group end */ +#define RPR_VARID_ALT ((RPRVarId) 254) /* alternation start */ #define RPR_VARID_FIN ((RPRVarId) 255) /* pattern finish */ /* Element flags */ @@ -40,8 +42,9 @@ #define RPRElemIsAbsorbableBranch(e) ((e)->flags & RPR_ELEM_ABSORBABLE_BRANCH) #define RPRElemIsAbsorbable(e) ((e)->flags & RPR_ELEM_ABSORBABLE) #define RPRElemIsVar(e) ((e)->varId <= RPR_VARID_MAX) -#define RPRElemIsAlt(e) ((e)->varId == RPR_VARID_ALT) +#define RPRElemIsBegin(e) ((e)->varId == RPR_VARID_BEGIN) #define RPRElemIsEnd(e) ((e)->varId == RPR_VARID_END) +#define RPRElemIsAlt(e) ((e)->varId == RPR_VARID_ALT) #define RPRElemIsFin(e) ((e)->varId == RPR_VARID_FIN) #define RPRElemCanSkip(e) ((e)->min == 0) diff --git a/src/test/regress/expected/rpr.out b/src/test/regress/expected/rpr.out index d4298860865..8c8ff90e1cd 100644 --- a/src/test/regress/expected/rpr.out +++ b/src/test/regress/expected/rpr.out @@ -2724,8 +2724,8 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER ERROR: reluctant quantifiers are not yet supported LINE 9: PATTERN (START UP+? DOWN+) ^ --- Maximum pattern variables is 252 (RPR_VARID_MAX) --- Ok: 252 variables (maximum allowed) +-- Maximum pattern variables is 251 (RPR_VARID_MAX) +-- Error: 252 variables exceeds limit of 251 DO $$ DECLARE pattern_vars text; @@ -2745,7 +2745,14 @@ BEGIN EXECUTE query; END; $$; --- Error: 253 variables exceeds limit of 252 +ERROR: too many pattern variables +DETAIL: Maximum is 251. +CONTEXT: SQL statement "SELECT * FROM (SELECT 1 AS x) t WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (v001 v002 v003 v004 v005 v006 v007 v008 v009 v010 v011 v012 v013 v014 v015 v016 v017 v018 v019 v020 v021 v022 v023 v024 v025 v026 v027 v028 v029 v030 v031 v032 v033 v034 v035 v036 v037 v038 v039 v040 v041 v042 v043 v044 v045 v046 v047 v048 v049 v050 v051 v052 v053 v054 v055 v056 v057 v058 v059 v060 v061 v062 v063 v064 v065 v066 v067 v068 v069 v070 v071 v072 v073 v074 v075 v076 v077 v078 v079 v080 v081 v082 v083 v084 v085 v086 v087 v088 v089 v090 v091 v092 v093 v094 v095 v096 v097 v098 v099 v100 v101 v102 v103 v104 v105 v106 v107 v108 v109 v110 v111 v112 v113 v114 v115 v116 v117 v118 v119 v120 v121 v122 v123 v124 v125 v126 v127 v128 v129 v130 v131 v132 v133 v134 v135 v136 v137 v138 v139 v140 v141 v142 v143 v144 v145 v146 v147 v148 v149 v150 v151 v152 v153 v154 v155 v156 v157 v158 v159 v160 v161 v162 v163 v164 v165 v166 v167 v168 v169 v170 v171 v172 v173 v174 v175 v176 v177 v178 v179 v180 v181 v182 v183 v184 v185 v186 v187 v188 v189 v190 v191 v192 v193 v194 v195 v196 v197 v198 v199 v200 v201 v202 v203 v204 v205 v206 v207 v208 v209 v210 v211 v212 v213 v214 v215 v216 v217 v218 v219 v220 v221 v222 v223 v224 v225 v226 v227 v228 v229 v230 v231 v232 v233 v234 v235 v236 v237 v238 v239 v240 v241 v242 v243 v244 v245 v246 v247 v248 v249 v250 v251 v252) + DEFINE v001 AS TRUE, v002 AS TRUE, v003 AS TRUE, v004 AS TRUE, v005 AS TRUE, v006 AS TRUE, v007 AS TRUE, v008 AS TRUE, v009 AS TRUE, v010 AS TRUE, v011 AS TRUE, v012 AS TRUE, v013 AS TRUE, v014 AS TRUE, v015 AS TRUE, v016 AS TRUE, v017 AS TRUE, v018 AS TRUE, v019 AS TRUE, v020 AS TRUE, v021 AS TRUE, v022 AS TRUE, v023 AS TRUE, v024 AS TRUE, v025 AS TRUE, v026 AS TRUE, v027 AS TRUE, v028 AS TRUE, v029 AS TRUE, v030 AS TRUE, v031 AS TRUE, v032 AS TRUE, v033 AS TRUE, v034 AS TRUE, v035 AS TRUE, v036 AS TRUE, v037 AS TRUE, v038 AS TRUE, v039 AS TRUE, v040 AS TRUE, v041 AS TRUE, v042 AS TRUE, v043 AS TRUE, v044 AS TRUE, v045 AS TRUE, v046 AS TRUE, v047 AS TRUE, v048 AS TRUE, v049 AS TRUE, v050 AS TRUE, v051 AS TRUE, v052 AS TRUE, v053 AS TRUE, v054 AS TRUE, v055 AS TRUE, v056 AS TRUE, v057 AS TRUE, v058 AS TRUE, v059 AS TRUE, v060 AS TRUE, v061 AS TRUE, v062 AS TRUE, v063 AS TRUE, v064 AS TRUE, v065 AS TRUE, v066 AS TRUE, v067 AS TRUE, v068 AS TRUE, v069 AS TRUE, v070 AS TRUE, v071 AS TRUE, v072 AS TRUE, v073 AS TRUE, v074 AS TRUE, v075 AS TRUE, v076 AS TRUE, v077 AS TRUE, v078 AS TRUE, v079 AS TRUE, v080 AS TRUE, v081 AS TRUE, v082 AS TRUE, v083 AS TRUE, v084 AS TRUE, v085 AS TRUE, v086 AS TRUE, v087 AS TRUE, v088 AS TRUE, v089 AS TRUE, v090 AS TRUE, v091 AS TRUE, v092 AS TRUE, v093 AS TRUE, v094 AS TRUE, v095 AS TRUE, v096 AS TRUE, v097 AS TRUE, v098 AS TRUE, v099 AS TRUE, v100 AS TRUE, v101 AS TRUE, v102 AS TRUE, v103 AS TRUE, v104 AS TRUE, v105 AS TRUE, v106 AS TRUE, v107 AS TRUE, v108 AS TRUE, v109 AS TRUE, v110 AS TRUE, v111 AS TRUE, v112 AS TRUE, v113 AS TRUE, v114 AS TRUE, v115 AS TRUE, v116 AS TRUE, v117 AS TRUE, v118 AS TRUE, v119 AS TRUE, v120 AS TRUE, v121 AS TRUE, v122 AS TRUE, v123 AS TRUE, v124 AS TRUE, v125 AS TRUE, v126 AS TRUE, v127 AS TRUE, v128 AS TRUE, v129 AS TRUE, v130 AS TRUE, v131 AS TRUE, v132 AS TRUE, v133 AS TRUE, v134 AS TRUE, v135 AS TRUE, v136 AS TRUE, v137 AS TRUE, v138 AS TRUE, v139 AS TRUE, v140 AS TRUE, v141 AS TRUE, v142 AS TRUE, v143 AS TRUE, v144 AS TRUE, v145 AS TRUE, v146 AS TRUE, v147 AS TRUE, v148 AS TRUE, v149 AS TRUE, v150 AS TRUE, v151 AS TRUE, v152 AS TRUE, v153 AS TRUE, v154 AS TRUE, v155 AS TRUE, v156 AS TRUE, v157 AS TRUE, v158 AS TRUE, v159 AS TRUE, v160 AS TRUE, v161 AS TRUE, v162 AS TRUE, v163 AS TRUE, v164 AS TRUE, v165 AS TRUE, v166 AS TRUE, v167 AS TRUE, v168 AS TRUE, v169 AS TRUE, v170 AS TRUE, v171 AS TRUE, v172 AS TRUE, v173 AS TRUE, v174 AS TRUE, v175 AS TRUE, v176 AS TRUE, v177 AS TRUE, v178 AS TRUE, v179 AS TRUE, v180 AS TRUE, v181 AS TRUE, v182 AS TRUE, v183 AS TRUE, v184 AS TRUE, v185 AS TRUE, v186 AS TRUE, v187 AS TRUE, v188 AS TRUE, v189 AS TRUE, v190 AS TRUE, v191 AS TRUE, v192 AS TRUE, v193 AS TRUE, v194 AS TRUE, v195 AS TRUE, v196 AS TRUE, v197 AS TRUE, v198 AS TRUE, v199 AS TRUE, v200 AS TRUE, v201 AS TRUE, v202 AS TRUE, v203 AS TRUE, v204 AS TRUE, v205 AS TRUE, v206 AS TRUE, v207 AS TRUE, v208 AS TRUE, v209 AS TRUE, v210 AS TRUE, v211 AS TRUE, v212 AS TRUE, v213 AS TRUE, v214 AS TRUE, v215 AS TRUE, v216 AS TRUE, v217 AS TRUE, v218 AS TRUE, v219 AS TRUE, v220 AS TRUE, v221 AS TRUE, v222 AS TRUE, v223 AS TRUE, v224 AS TRUE, v225 AS TRUE, v226 AS TRUE, v227 AS TRUE, v228 AS TRUE, v229 AS TRUE, v230 AS TRUE, v231 AS TRUE, v232 AS TRUE, v233 AS TRUE, v234 AS TRUE, v235 AS TRUE, v236 AS TRUE, v237 AS TRUE, v238 AS TRUE, v239 AS TRUE, v240 AS TRUE, v241 AS TRUE, v242 AS TRUE, v243 AS TRUE, v244 AS TRUE, v245 AS TRUE, v246 AS TRUE, v247 AS TRUE, v248 AS TRUE, v249 AS TRUE, v250 AS TRUE, v251 AS TRUE, v252 AS TRUE)" +PL/pgSQL function inline_code_block line 17 at EXECUTE +-- Error: 253 variables exceeds limit of 251 DO $$ DECLARE pattern_vars text; @@ -2766,7 +2773,7 @@ BEGIN END; $$; ERROR: too many pattern variables -DETAIL: Maximum is 252. +DETAIL: Maximum is 251. CONTEXT: SQL statement "SELECT * FROM (SELECT 1 AS x) t WINDOW w AS ( ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN (v001 v002 v003 v004 v005 v006 v007 v008 v009 v010 v011 v012 v013 v014 v015 v016 v017 v018 v019 v020 v021 v022 v023 v024 v025 v026 v027 v028 v029 v030 v031 v032 v033 v034 v035 v036 v037 v038 v039 v040 v041 v042 v043 v044 v045 v046 v047 v048 v049 v050 v051 v052 v053 v054 v055 v056 v057 v058 v059 v060 v061 v062 v063 v064 v065 v066 v067 v068 v069 v070 v071 v072 v073 v074 v075 v076 v077 v078 v079 v080 v081 v082 v083 v084 v085 v086 v087 v088 v089 v090 v091 v092 v093 v094 v095 v096 v097 v098 v099 v100 v101 v102 v103 v104 v105 v106 v107 v108 v109 v110 v111 v112 v113 v114 v115 v116 v117 v118 v119 v120 v121 v122 v123 v124 v125 v126 v127 v128 v129 v130 v131 v132 v133 v134 v135 v136 v137 v138 v139 v140 v141 v142 v143 v144 v145 v146 v147 v148 v149 v150 v151 v152 v153 v154 v155 v156 v157 v158 v159 v160 v161 v162 v163 v164 v165 v166 v167 v168 v169 v170 v171 v172 v173 v174 v175 v176 v177 v178 v179 v180 v181 v182 v183 v184 v185 v186 v187 v188 v189 v190 v191 v192 v193 v194 v195 v196 v197 v198 v199 v200 v201 v202 v203 v204 v205 v206 v207 v208 v209 v210 v211 v212 v213 v214 v215 v216 v217 v218 v219 v220 v221 v222 v223 v224 v225 v226 v227 v228 v229 v230 v231 v232 v233 v234 v235 v236 v237 v238 v239 v240 v241 v242 v243 v244 v245 v246 v247 v248 v249 v250 v251 v252 v253) diff --git a/src/test/regress/expected/rpr_base.out b/src/test/regress/expected/rpr_base.out index 23851c5a11c..c269ab99651 100644 --- a/src/test/regress/expected/rpr_base.out +++ b/src/test/regress/expected/rpr_base.out @@ -2833,6 +2833,23 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING -> Seq Scan on rpr_plan (6 rows) +-- Consecutive VAR merge: A A+ -> a{2,} +-- Tests line 251: child->max == RPR_QUANTITY_INF branch in mergeConsecutiveVars +-- prev: A{1,1} (finite), child: A+ (infinite) triggers line 251 evaluation +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A A+) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{2,}" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + -- Consecutive GROUP merge with finite quantifiers: ((A B){5}) ((A B){10}) -> merged EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan @@ -2863,6 +2880,23 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING -> Seq Scan on rpr_plan (6 rows) +-- Consecutive GROUP merge: (A B){2} (A B)+ -> (a b){3,} +-- Tests line 325: child->max == RPR_QUANTITY_INF branch in mergeConsecutiveGroups +-- prev: (A B){2,2} (finite), child: (A B)+ (infinite) triggers line 325 evaluation +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B){2} (A B)+) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b'){3,}" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + -- PREFIX merge: A B (A B)+ -> (a b){2,} EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan @@ -5057,13 +5091,13 @@ WINDOW w AS ( 0 (2 rows) --- Test: 252 variables in PATTERN, 253 in DEFINE (boundary - should succeed) +-- Test: 251 variables in PATTERN, 252 in DEFINE (boundary - should succeed) -- Expected: Success - unused DEFINE variables are filtered out SELECT COUNT(*) OVER w FROM rpr_errors WINDOW w AS ( ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - PATTERN (V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40 V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60 V61 V62 V63 V64 V65 V66 V67 V68 V69 V70 V71 V72 V73 V74 V75 V76 V77 V78 V79 V80 V81 V82 V83 V84 V85 V86 V87 V88 V89 V90 V91 V92 V93 V94 V95 V96 V97 V98 V99 V100 V101 V102 V103 V104 V105 V106 V107 V108 V109 V110 V111 V112 V113 V114 V115 V116 V117 V118 V119 V120 V121 V122 V123 V124 V125 V126 V127 V128 V129 V130 V131 V132 V133 V134 V135 V136 V137 V138 V139 V140 V141 V142 V143 V144 V145 V146 V147 V148 V149 V150 V151 V152 V153 V154 V155 V156 V157 V158 V159 V160 V161 V162 V163 V164 V165 V166 V167 V168 V169 V170 V171 V172 V173 V174 V175 V176 V177 V178 V179 V180 V181 V182 V183 V184 V185 V186 V187 V188 V189 V190 V191 V192 V193 V194 V195 V196 V197 V198 V199 V200 V201 V202 V203 V204 V205 V206 V207 V208 V209 V210 V211 V212 V213 V214 V215 V216 V217 V218 V219 V220 V221 V222 V223 V224 V225 V226 V227 V228 V229 V230 V231 V232 V233 V234 V235 V236 V237 V238 V239 V240 V241 V242 V243 V244 V245 V246 V247 V248 V249 V250 V251 V252) + PATTERN (V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40 V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60 V61 V62 V63 V64 V65 V66 V67 V68 V69 V70 V71 V72 V73 V74 V75 V76 V77 V78 V79 V80 V81 V82 V83 V84 V85 V86 V87 V88 V89 V90 V91 V92 V93 V94 V95 V96 V97 V98 V99 V100 V101 V102 V103 V104 V105 V106 V107 V108 V109 V110 V111 V112 V113 V114 V115 V116 V117 V118 V119 V120 V121 V122 V123 V124 V125 V126 V127 V128 V129 V130 V131 V132 V133 V134 V135 V136 V137 V138 V139 V140 V141 V142 V143 V144 V145 V146 V147 V148 V149 V150 V151 V152 V153 V154 V155 V156 V157 V158 V159 V160 V161 V162 V163 V164 V165 V166 V167 V168 V169 V170 V171 V172 V173 V174 V175 V176 V177 V178 V179 V180 V181 V182 V183 V184 V185 V186 V187 V188 V189 V190 V191 V192 V193 V194 V195 V196 V197 V198 V199 V200 V201 V202 V203 V204 V205 V206 V207 V208 V209 V210 V211 V212 V213 V214 V215 V216 V217 V218 V219 V220 V221 V222 V223 V224 V225 V226 V227 V228 V229 V230 V231 V232 V233 V234 V235 V236 V237 V238 V239 V240 V241 V242 V243 V244 V245 V246 V247 V248 V249 V250 V251) DEFINE V1 AS val > 0, V2 AS val > 0, V3 AS val > 0, V4 AS val > 0, V5 AS val > 0, V6 AS val > 0, V7 AS val > 0, V8 AS val > 0, V9 AS val > 0, V10 AS val > 0, V11 AS val > 0, V12 AS val > 0, V13 AS val > 0, V14 AS val > 0, V15 AS val > 0, V16 AS val > 0, V17 AS val > 0, V18 AS val > 0, V19 AS val > 0, V20 AS val > 0, @@ -5090,7 +5124,7 @@ WINDOW w AS ( V221 AS val > 0, V222 AS val > 0, V223 AS val > 0, V224 AS val > 0, V225 AS val > 0, V226 AS val > 0, V227 AS val > 0, V228 AS val > 0, V229 AS val > 0, V230 AS val > 0, V231 AS val > 0, V232 AS val > 0, V233 AS val > 0, V234 AS val > 0, V235 AS val > 0, V236 AS val > 0, V237 AS val > 0, V238 AS val > 0, V239 AS val > 0, V240 AS val > 0, V241 AS val > 0, V242 AS val > 0, V243 AS val > 0, V244 AS val > 0, V245 AS val > 0, V246 AS val > 0, V247 AS val > 0, V248 AS val > 0, V249 AS val > 0, V250 AS val > 0, - V251 AS val > 0, V252 AS val > 0, V253 AS val > 0 + V251 AS val > 0, V252 AS val > 0 ); count ------- @@ -5098,13 +5132,13 @@ WINDOW w AS ( 0 (2 rows) --- Test: 253 variables in PATTERN, 252 in DEFINE (exceeds limit with implicit TRUE) --- Expected: ERROR - too many pattern variables (Maximum is 252) +-- Test: 252 variables in PATTERN, 251 in DEFINE (exceeds limit with implicit TRUE) +-- Expected: ERROR - too many pattern variables (Maximum is 251) SELECT COUNT(*) OVER w FROM rpr_errors WINDOW w AS ( ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - PATTERN (V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40 V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60 V61 V62 V63 V64 V65 V66 V67 V68 V69 V70 V71 V72 V73 V74 V75 V76 V77 V78 V79 V80 V81 V82 V83 V84 V85 V86 V87 V88 V89 V90 V91 V92 V93 V94 V95 V96 V97 V98 V99 V100 V101 V102 V103 V104 V105 V106 V107 V108 V109 V110 V111 V112 V113 V114 V115 V116 V117 V118 V119 V120 V121 V122 V123 V124 V125 V126 V127 V128 V129 V130 V131 V132 V133 V134 V135 V136 V137 V138 V139 V140 V141 V142 V143 V144 V145 V146 V147 V148 V149 V150 V151 V152 V153 V154 V155 V156 V157 V158 V159 V160 V161 V162 V163 V164 V165 V166 V167 V168 V169 V170 V171 V172 V173 V174 V175 V176 V177 V178 V179 V180 V181 V182 V183 V184 V185 V186 V187 V188 V189 V190 V191 V192 V193 V194 V195 V196 V197 V198 V199 V200 V201 V202 V203 V204 V205 V206 V207 V208 V209 V210 V211 V212 V213 V214 V215 V216 V217 V218 V219 V220 V221 V222 V223 V224 V225 V226 V227 V228 V229 V230 V231 V232 V233 V234 V235 V236 V237 V238 V239 V240 V241 V242 V243 V244 V245 V246 V247 V248 V249 V250 V251 V252 V253) + PATTERN (V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40 V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60 V61 V62 V63 V64 V65 V66 V67 V68 V69 V70 V71 V72 V73 V74 V75 V76 V77 V78 V79 V80 V81 V82 V83 V84 V85 V86 V87 V88 V89 V90 V91 V92 V93 V94 V95 V96 V97 V98 V99 V100 V101 V102 V103 V104 V105 V106 V107 V108 V109 V110 V111 V112 V113 V114 V115 V116 V117 V118 V119 V120 V121 V122 V123 V124 V125 V126 V127 V128 V129 V130 V131 V132 V133 V134 V135 V136 V137 V138 V139 V140 V141 V142 V143 V144 V145 V146 V147 V148 V149 V150 V151 V152 V153 V154 V155 V156 V157 V158 V159 V160 V161 V162 V163 V164 V165 V166 V167 V168 V169 V170 V171 V172 V173 V174 V175 V176 V177 V178 V179 V180 V181 V182 V183 V184 V185 V186 V187 V188 V189 V190 V191 V192 V193 V194 V195 V196 V197 V198 V199 V200 V201 V202 V203 V204 V205 V206 V207 V208 V209 V210 V211 V212 V213 V214 V215 V216 V217 V218 V219 V220 V221 V222 V223 V224 V225 V226 V227 V228 V229 V230 V231 V232 V233 V234 V235 V236 V237 V238 V239 V240 V241 V242 V243 V244 V245 V246 V247 V248 V249 V250 V251 V252) DEFINE V1 AS val > 0, V2 AS val > 0, V3 AS val > 0, V4 AS val > 0, V5 AS val > 0, V6 AS val > 0, V7 AS val > 0, V8 AS val > 0, V9 AS val > 0, V10 AS val > 0, V11 AS val > 0, V12 AS val > 0, V13 AS val > 0, V14 AS val > 0, V15 AS val > 0, V16 AS val > 0, V17 AS val > 0, V18 AS val > 0, V19 AS val > 0, V20 AS val > 0, @@ -5131,18 +5165,18 @@ WINDOW w AS ( V221 AS val > 0, V222 AS val > 0, V223 AS val > 0, V224 AS val > 0, V225 AS val > 0, V226 AS val > 0, V227 AS val > 0, V228 AS val > 0, V229 AS val > 0, V230 AS val > 0, V231 AS val > 0, V232 AS val > 0, V233 AS val > 0, V234 AS val > 0, V235 AS val > 0, V236 AS val > 0, V237 AS val > 0, V238 AS val > 0, V239 AS val > 0, V240 AS val > 0, V241 AS val > 0, V242 AS val > 0, V243 AS val > 0, V244 AS val > 0, V245 AS val > 0, V246 AS val > 0, V247 AS val > 0, V248 AS val > 0, V249 AS val > 0, V250 AS val > 0, - V251 AS val > 0, V252 AS val > 0 + V251 AS val > 0 ); ERROR: too many pattern variables -DETAIL: Maximum is 252. --- Test: Pattern nesting at maximum depth (depth 254) +DETAIL: Maximum is 251. +-- Test: Pattern nesting at maximum depth (depth 253) -- Expected: Should succeed --- Note: 254 nested GROUP{3,7} quantifiers produce depth 254 after optimization +-- Note: 253 nested GROUP{3,7} quantifiers produce depth 253 after optimization SELECT id, val, COUNT(*) OVER w FROM rpr_errors WINDOW w AS ( ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - PATTERN (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A{3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}) + PATTERN ((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A{3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}) DEFINE A AS val > 0 ); id | val | count @@ -5151,18 +5185,18 @@ WINDOW w AS ( 2 | 20 | 0 (2 rows) --- Test: Pattern nesting depth exceeds maximum (depth 255) +-- Test: Pattern nesting depth exceeds maximum (depth 254) -- Expected: ERROR - pattern nesting too deep --- Note: 255 nested GROUP{3,7} quantifiers produce depth 255 after optimization +-- Note: 254 nested GROUP{3,7} quantifiers produce depth 254 after optimization SELECT id, val, COUNT(*) OVER w FROM rpr_errors WINDOW w AS ( ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - PATTERN ((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A{3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}) + PATTERN (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A{3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}) DEFINE A AS val > 0 ); ERROR: pattern nesting too deep -DETAIL: Pattern nesting depth 255 exceeds maximum 254. +DETAIL: Pattern nesting depth 254 exceeds maximum 253. DROP TABLE rpr_errors; -- ============================================================ -- Jacob's Patterns @@ -5481,6 +5515,29 @@ WINDOW w AS ( 3 | 0 (3 rows) +-- Optional group with alternation: A ((B | C) (D | E))* F? +-- When only A matches, the * group matches 0 times and F? matches 0 times +SELECT id, val, match_len +FROM (SELECT id, val, + COUNT(*) OVER w AS match_len + FROM (VALUES (1, 1), (2, 99)) AS t(id, val) + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A ((B | C) (D | E))* F?) + DEFINE A AS val = 1, + B AS val = 2, C AS val = 3, + D AS val = 4, E AS val = 5, + F AS val = 6 + ) +) s; + id | val | match_len +----+-----+----------- + 1 | 1 | 1 + 2 | 99 | 0 +(2 rows) + DROP TABLE rpr_plan; -- ============================================================ -- End of rpr_base.sql diff --git a/src/test/regress/expected/rpr_explain.out b/src/test/regress/expected/rpr_explain.out index b5ceaae53b5..ea75d62718e 100644 --- a/src/test/regress/expected/rpr_explain.out +++ b/src/test/regress/expected/rpr_explain.out @@ -1,13 +1,42 @@ +-- ============================================================ +-- RPR EXPLAIN Tests +-- Tests for Row Pattern Recognition EXPLAIN output +-- ============================================================ -- --- Test: EXPLAIN ANALYZE output for Row Pattern Recognition NFA statistics +-- This test suite validates EXPLAIN output for RPR queries, +-- including NFA statistics shown in EXPLAIN ANALYZE: +-- - NFA States: peak, total, merged +-- - NFA Contexts: peak, total, absorbed, skipped +-- - NFA: matched (len min/max/avg), mismatched (len min/max/avg) +-- - Pattern deparse formatting +-- - Multiple output formats (text, JSON, XML) -- --- This file tests the NFA statistics shown in EXPLAIN ANALYZE output: --- - NFA States: peak, total, merged --- - NFA Contexts: peak, total, absorbed, skipped --- - NFA: matched (len min/max/avg), mismatched (len min/max/avg) --- --- Filter function to normalize Storage memory values only (not NFA statistics) --- Works for text, JSON, and XML formats +-- Test Coverage: +-- Basic NFA Statistics Tests +-- State Statistics Tests +-- Context Statistics Tests +-- Match Length Statistics Tests +-- Mismatch Length Statistics Tests +-- JSON Format Tests +-- XML Format Tests +-- Multiple Partitions Tests +-- Edge Cases +-- Complex Pattern Tests +-- Real-world Pattern Examples +-- Performance-oriented Tests +-- INITIAL vs no INITIAL comparison +-- Quantifier Variations +-- Regression Tests for Statistics Accuracy +-- Alternation Pattern Tests +-- Group Pattern Tests +-- Window Function Combinations +-- DEFINE Expression Variations +-- Large Scale Statistics Verification +-- ============================================================ +-- Filter function to normalize Storage memory values only (not NFA statistics). +-- NFA statistics should not change between platforms; if they do, it could +-- indicate issues such as uninitialized memory access. +-- Works for text, JSON, and XML formats. create function rpr_explain_filter(text) returns setof text language plpgsql as $$ @@ -69,10 +98,25 @@ VALUES (124, 'D'), (122, 'D'), (120, 'D'), (118, 'D'), (119, 'U'), (121, 'U'), (123, 'U'), (125, 'U'), (127, 'U'), (129, 'U'), (131, 'U'), (133, 'U'), (130, 'D'), (127, 'D'), (124, 'D'); --- --- Section 1: Basic NFA Statistics Tests --- --- Test 1.1: Simple pattern - should show basic statistics +-- ============================================================ +-- Basic NFA Statistics Tests +-- ============================================================ +-- Simple pattern - should show basic statistics +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS cat = 'A', B AS cat = 'B' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------ + PATTERN (a b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -95,7 +139,23 @@ WINDOW w AS ( -> Seq Scan on nfa_test (actual rows=100.00 loops=1) (8 rows) --- Test 1.2: Pattern with no matches - 0 matched +DROP VIEW rpr_v; +-- Pattern with no matches - 0 matched +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (X Y Z) + DEFINE X AS cat = 'X', Y AS cat = 'Y', Z AS cat = 'Z' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------- + PATTERN (x y z) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -118,7 +178,23 @@ WINDOW w AS ( -> Seq Scan on nfa_test (actual rows=100.00 loops=1) (8 rows) --- Test 1.3: Pattern matching every row - high match count +DROP VIEW rpr_v; +-- Pattern matching every row - high match count +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (R) + DEFINE R AS TRUE +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------- + PATTERN (r) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -141,10 +217,95 @@ WINDOW w AS ( -> Seq Scan on nfa_test (actual rows=100.00 loops=1) (8 rows) --- --- Section 2: State Statistics Tests (peak, total, merged) --- --- Test 2.1: Simple quantifier pattern - A+ with short matches (no merging) +DROP VIEW rpr_v; +-- Regression test: Space before parenthesis in pattern deparse +-- Verifies that "A (B | C)" correctly outputs as "a (b | c)" with space +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A (B | C)) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------ + PATTERN (a (b | c)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A (B | C)) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a (b | c) + -> Function Scan on generate_series s +(4 rows) + +DROP VIEW rpr_v; +-- Regression test: Sequential alternations at same depth +-- Verifies that "((B | C) (D | E))" correctly outputs as "(b | c) (d | e)" +-- Previously failed due to missing parentheses on ALT depth decrease +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B | C) (D | E))*) + DEFINE A AS v % 5 = 1, B AS v % 5 = 2, C AS v % 5 = 3, D AS v % 5 = 4, E AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------------------- + PATTERN (a ((b | c) (d | e))*) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B | C) (D | E))*) + DEFINE A AS v % 5 = 1, B AS v % 5 = 2, C AS v % 5 = 3, D AS v % 5 = 4, E AS v % 5 = 0 +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a ((b | c) (d | e))* + -> Function Scan on generate_series s +(4 rows) + +DROP VIEW rpr_v; +-- ============================================================ +-- State Statistics Tests (peak, total, merged) +-- ============================================================ +-- Simple quantifier pattern - A+ with short matches (no merging) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS v % 2 = 1 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------- + PATTERN (a+) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -167,7 +328,25 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=50.00 loops=1) (8 rows) --- Test 2.2: Alternation pattern - multiple state branches +DROP VIEW rpr_v; +-- Alternation pattern - multiple state branches +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C) (D | E)) + DEFINE + A AS cat = 'A', B AS cat = 'B', C AS cat = 'C', + D AS cat = 'D', E AS cat = 'E' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------------------- + PATTERN ((a | b | c) (d | e)) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -184,7 +363,7 @@ WINDOW w AS ( ------------------------------------------------------------------- WindowAgg (actual rows=100.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: (a | b | c d | e) + Pattern: (a | b | c) (d | e) Storage: Memory Maximum Storage: NkB NFA States: 5 peak, 363 total, 0 merged NFA Contexts: 3 peak, 101 total, 40 pruned @@ -192,7 +371,26 @@ WINDOW w AS ( -> Seq Scan on nfa_test (actual rows=100.00 loops=1) (8 rows) --- Test 2.3: Complex pattern with high state count +DROP VIEW rpr_v; +-- Complex pattern with high state count +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B* C+) + DEFINE + A AS v % 3 = 1, + B AS v % 3 = 2, + C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------- + PATTERN (a+ b* c+) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -218,7 +416,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=100.00 loops=1) (8 rows) --- Test 2.4: Grouped pattern with quantifier - state merging +DROP VIEW rpr_v; +-- Grouped pattern with quantifier - state merging +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN ((a b)+) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -242,8 +456,24 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=60.00 loops=1) (9 rows) --- Test 2.5: State explosion pattern - many alternations +DROP VIEW rpr_v; +-- State explosion pattern - many alternations -- Pattern (A|B)(A|B)(A|B)(A|B) can create many parallel states +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B)) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------------------------------------------------------------ + PATTERN ((a | b) (a | b) (a | b) (a | b) (a | b) (a | b) (a | b) (a | b)) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -258,7 +488,7 @@ WINDOW w AS ( ----------------------------------------------------------------------- WindowAgg (actual rows=100.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: (a | b a | b a | b a | b a | b a | b a | b a | b) + Pattern: (a | b){8} Storage: Memory Maximum Storage: NkB NFA States: 17 peak, 632 total, 0 merged NFA Contexts: 9 peak, 101 total, 1 pruned @@ -267,7 +497,130 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=100.00 loops=1) (9 rows) --- Test 2.6: High state merging - alternation with plus quantifier +DROP VIEW rpr_v; +-- Consecutive ALT merge followed by different ALT +-- Tests mergeConsecutiveAlts flush on ALT change: (A|B){2} (C|D) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (C | D)) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------------------- + PATTERN ((a | b) (a | b) (c | d)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (C | D)) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b){2} (c | d) + -> Function Scan on generate_series s +(4 rows) + +DROP VIEW rpr_v; +-- Consecutive ALT merge followed by non-ALT element +-- Tests mergeConsecutiveAlts flush on non-ALT: (A|B){2} c +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------------- + PATTERN ((a | b) (a | b) c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b){2} c + -> Function Scan on generate_series s +(4 rows) + +DROP VIEW rpr_v; +-- ALT prefix/suffix absorbed into GROUP: (A|B) (A|B)+ (A|B) -> (A|B){3,} +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B)+ (A | B)) + DEFINE A AS v % 2 = 0, B AS v % 2 = 1 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------------------------- + PATTERN ((a | b) (a | b)+ (a | b)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B)+ (A | B)) + DEFINE A AS v % 2 = 0, B AS v % 2 = 1 +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b){3,} + -> Function Scan on generate_series s +(4 rows) + +DROP VIEW rpr_v; +-- High state merging - alternation with plus quantifier +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C)+ D) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3, D AS v % 4 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------------- + PATTERN ((a | b | c)+ d) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -291,7 +644,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=100.00 loops=1) (9 rows) --- Test 2.7: Nested quantifiers causing state growth +DROP VIEW rpr_v; +-- Nested quantifiers causing state growth +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A | B)+)+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------- + PATTERN (((a | b)+)+) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -306,19 +675,35 @@ WINDOW w AS ( ------------------------------------------------------------------------ WindowAgg (actual rows=1000.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: ((a | b)+)+ + Pattern: (a | b)+ Storage: Memory Maximum Storage: NkB - NFA States: 16 peak, 7334 total, 0 merged + NFA States: 8 peak, 4002 total, 0 merged NFA Contexts: 4 peak, 1001 total, 333 pruned NFA: 334 matched (len 1/2/2.0), 0 mismatched NFA: 0 absorbed, 333 skipped (len 2/2/2.0) -> Function Scan on generate_series s (actual rows=1000.00 loops=1) (9 rows) --- --- Section 3: Context Statistics Tests (peak, total, absorbed, skipped) --- --- Test 3.1: Context absorption with unbounded quantifier at start +DROP VIEW rpr_v; +-- ============================================================ +-- Context Statistics Tests (peak, total, absorbed, skipped) +-- ============================================================ +-- Context absorption with unbounded quantifier at start +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -342,7 +727,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=50.00 loops=1) (9 rows) --- Test 3.2: No absorption - bounded quantifier +DROP VIEW rpr_v; +-- No absorption - bounded quantifier +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,4} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------- + PATTERN (a{2,4} b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -366,7 +767,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=50.00 loops=1) (9 rows) --- Test 3.3: Contexts skipped by SKIP PAST LAST ROW +DROP VIEW rpr_v; +-- Contexts skipped by SKIP PAST LAST ROW +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v % 10 = 1, B AS v % 10 = 2, C AS v % 10 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------- + PATTERN (a b c) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -389,7 +806,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=100.00 loops=1) (8 rows) --- Test 3.4: High context absorption - unbounded group +DROP VIEW rpr_v; +-- High context absorption - unbounded group +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------- + PATTERN ((a b)+ c) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -412,12 +845,12 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=100.00 loops=1) (8 rows) --- --- Section 4: Match Length Statistics Tests --- --- Test 4.1: Fixed length matches - all same length -SELECT rpr_explain_filter(' -EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +DROP VIEW rpr_v; +-- ============================================================ +-- Match Length Statistics Tests +-- ============================================================ +-- Fixed length matches - all same length +CREATE TEMP VIEW rpr_v AS SELECT count(*) OVER w FROM nfa_test WINDOW w AS ( @@ -425,12 +858,30 @@ WINDOW w AS ( AFTER MATCH SKIP PAST LAST ROW PATTERN (A B C D E) DEFINE - A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'', - D AS cat = ''D'', E AS cat = ''E'' -);'); - rpr_explain_filter -------------------------------------------------------------------- - WindowAgg (actual rows=100.00 loops=1) + A AS cat = 'A', B AS cat = 'B', C AS cat = 'C', + D AS cat = 'D', E AS cat = 'E' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------ + PATTERN (a b c d e) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'', + D AS cat = ''D'', E AS cat = ''E'' +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: a b c d e Storage: Memory Maximum Storage: NkB @@ -440,7 +891,23 @@ WINDOW w AS ( -> Seq Scan on nfa_test (actual rows=100.00 loops=1) (8 rows) --- Test 4.2: Variable length matches - min/max/avg differ +DROP VIEW rpr_v; +-- Variable length matches - min/max/avg differ +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -464,7 +931,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=100.00 loops=1) (9 rows) --- Test 4.3: Very long matches +DROP VIEW rpr_v; +-- Very long matches +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 200) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v <= 195, B AS v > 195 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -488,7 +971,25 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=200.00 loops=1) (9 rows) --- Test 4.4: Mix of short and long matches +DROP VIEW rpr_v; +-- Mix of short and long matches +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS (v % 20 <> 0) AND (v % 20 <= 10 OR v % 20 > 15), + B AS v % 20 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -514,11 +1015,34 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=100.00 loops=1) (9 rows) --- --- Section 5: Mismatch Length Statistics Tests --- --- Test 5.1: Pattern that causes mismatches with length > 1 +DROP VIEW rpr_v; +-- ============================================================ +-- Mismatch Length Statistics Tests +-- ============================================================ +-- Pattern that causes mismatches with length > 1 -- Mismatch happens when partial match fails after processing multiple rows +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM ( + SELECT v, + CASE WHEN v % 10 IN (1,2,3) THEN 'A' + WHEN v % 10 IN (4,5) THEN 'B' + WHEN v % 10 = 6 THEN 'C' + ELSE 'X' END AS cat + FROM generate_series(1, 100) AS s(v) +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+ C) + DEFINE A AS cat = 'A', B AS cat = 'B', C AS cat = 'C' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------- + PATTERN (a+ b+ c) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -549,7 +1073,35 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=100.00 loops=1) (9 rows) --- Test 5.2: Long partial matches that fail +DROP VIEW rpr_v; +-- Long partial matches that fail +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM ( + SELECT i AS v, + CASE + WHEN i <= 20 THEN 'A' + WHEN i <= 25 THEN 'B' + WHEN i = 26 THEN 'X' -- breaks the pattern + WHEN i <= 50 THEN 'A' + WHEN i <= 55 THEN 'B' + WHEN i = 56 THEN 'C' -- completes pattern + ELSE 'Y' + END AS cat + FROM generate_series(1, 60) i +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+ C) + DEFINE A AS cat = 'A', B AS cat = 'B', C AS cat = 'C' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------- + PATTERN (a+ b+ c) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -585,10 +1137,26 @@ WINDOW w AS ( -> Function Scan on generate_series i (actual rows=60.00 loops=1) (9 rows) --- --- Section 6: JSON Format Tests --- --- Test 6.1: JSON format output with all statistics +DROP VIEW rpr_v; +-- ============================================================ +-- JSON Format Tests +-- ============================================================ +-- JSON format output with all statistics +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------- + PATTERN (a+ b+) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT JSON) SELECT count(*) OVER w @@ -647,7 +1215,23 @@ WINDOW w AS ( ] (1 row) --- Test 6.2: JSON format with match length statistics +DROP VIEW rpr_v; +-- JSON format with match length statistics +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT JSON) SELECT count(*) OVER w @@ -709,10 +1293,26 @@ WINDOW w AS ( ] (1 row) --- --- Section 7: XML Format Tests --- --- Test 7.1: XML format output +DROP VIEW rpr_v; +-- ============================================================ +-- XML Format Tests +-- ============================================================ +-- XML format output +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------ + PATTERN (a b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT XML) SELECT count(*) OVER w @@ -771,10 +1371,191 @@ WINDOW w AS ( (1 row) --- --- Section 8: Multiple Partitions Tests --- --- Test 8.1: Statistics across multiple partitions +DROP VIEW rpr_v; +-- JSON format with mismatch statistics +-- Pattern A B C expects 1,2,3 but gets 1,2,4 twice causing mismatches +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM (VALUES (1),(2),(4), (1),(2),(4), (1),(2),(3)) AS t(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v = 1, B AS v = 2, C AS v = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------- + PATTERN (a b c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT JSON) +SELECT count(*) OVER w +FROM (VALUES (1),(2),(4), (1),(2),(4), (1),(2),(3)) AS t(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v = 1, B AS v = 2, C AS v = 3 +)'); + rpr_explain_filter +---------------------------------------------------------------------------- + [ + + { + + "Plan": { + + "Node Type": "WindowAgg", + + "Parallel Aware": false, + + "Async Capable": false, + + "Actual Rows": 9.00, + + "Actual Loops": 1, + + "Disabled": false, + + "Window": "w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)",+ + "Pattern": "a b c", + + "Storage": "Memory", + + "Maximum Storage": 0, + + "NFA States Peak": 2, + + "NFA States Total": 10, + + "NFA States Merged": 0, + + "NFA Contexts Peak": 3, + + "NFA Contexts Total": 10, + + "NFA Contexts Absorbed": 0, + + "NFA Contexts Skipped": 0, + + "NFA Contexts Pruned": 6, + + "NFA Matched": 1, + + "NFA Mismatched": 2, + + "NFA Match Length Min": 3, + + "NFA Match Length Max": 3, + + "NFA Match Length Avg": 3.0, + + "NFA Mismatch Length Min": 3, + + "NFA Mismatch Length Max": 3, + + "NFA Mismatch Length Avg": 3.0, + + "Plans": [ + + { + + "Node Type": "Values Scan", + + "Parent Relationship": "Outer", + + "Parallel Aware": false, + + "Async Capable": false, + + "Alias": "*VALUES*", + + "Actual Rows": 9.00, + + "Actual Loops": 1, + + "Disabled": false + + } + + ] + + }, + + "Triggers": [ + + ] + + } + + ] +(1 row) + +DROP VIEW rpr_v; +-- JSON format with skipped context statistics +-- Alternation pattern with SKIP PAST LAST ROW causes many contexts to be skipped +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B)) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------------------------------------------------------------ + PATTERN ((a | b) (a | b) (a | b) (a | b) (a | b) (a | b) (a | b) (a | b)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT JSON) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B)) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +)'); + rpr_explain_filter +---------------------------------------------------------------------------- + [ + + { + + "Plan": { + + "Node Type": "WindowAgg", + + "Parallel Aware": false, + + "Async Capable": false, + + "Actual Rows": 100.00, + + "Actual Loops": 1, + + "Disabled": false, + + "Window": "w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)",+ + "Pattern": "(a | b){8}", + + "Storage": "Memory", + + "Maximum Storage": 0, + + "NFA States Peak": 17, + + "NFA States Total": 632, + + "NFA States Merged": 0, + + "NFA Contexts Peak": 9, + + "NFA Contexts Total": 101, + + "NFA Contexts Absorbed": 0, + + "NFA Contexts Skipped": 84, + + "NFA Contexts Pruned": 1, + + "NFA Matched": 12, + + "NFA Mismatched": 3, + + "NFA Match Length Min": 8, + + "NFA Match Length Max": 8, + + "NFA Match Length Avg": 8.0, + + "NFA Mismatch Length Min": 2, + + "NFA Mismatch Length Max": 4, + + "NFA Mismatch Length Avg": 3.0, + + "NFA Skipped Length Min": 1, + + "NFA Skipped Length Max": 7, + + "NFA Skipped Length Avg": 4.0, + + "Plans": [ + + { + + "Node Type": "Function Scan", + + "Parent Relationship": "Outer", + + "Parallel Aware": false, + + "Async Capable": false, + + "Function Name": "generate_series", + + "Alias": "s", + + "Actual Rows": 100.00, + + "Actual Loops": 1, + + "Disabled": false + + } + + ] + + }, + + "Triggers": [ + + ] + + } + + ] +(1 row) + +DROP VIEW rpr_v; +-- ============================================================ +-- Multiple Partitions Tests +-- ============================================================ +-- Statistics across multiple partitions +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM ( + SELECT p, v + FROM generate_series(1, 3) p, + generate_series(1, 30) v +) t +WINDOW w AS ( + PARTITION BY p + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -808,7 +1589,29 @@ WINDOW w AS ( -> Function Scan on generate_series v (actual rows=30.00 loops=3) (14 rows) --- Test 8.2: Different pattern behavior per partition +DROP VIEW rpr_v; +-- Different pattern behavior per partition +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM ( + SELECT + CASE WHEN v <= 25 THEN 1 ELSE 2 END AS p, + v % 10 AS val + FROM generate_series(1, 50) v +) t +WINDOW w AS ( + PARTITION BY p + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS val < 5, B AS val >= 5 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -841,12 +1644,12 @@ WINDOW w AS ( -> Function Scan on generate_series v (actual rows=50.00 loops=1) (12 rows) --- --- Section 9: Edge Cases --- --- Test 9.1: Empty result set -SELECT rpr_explain_filter(' -EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +DROP VIEW rpr_v; +-- ============================================================ +-- Edge Cases +-- ============================================================ +-- Empty result set +CREATE TEMP VIEW rpr_v AS SELECT count(*) OVER w FROM generate_series(1, 0) AS s(v) WINDOW w AS ( @@ -854,7 +1657,23 @@ WINDOW w AS ( AFTER MATCH SKIP PAST LAST ROW PATTERN (A B) DEFINE A AS v = 1, B AS v = 2 -);'); +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------ + PATTERN (a b) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 0) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v = 1, B AS v = 2 +);'); rpr_explain_filter --------------------------------------------------------------------- WindowAgg (actual rows=0.00 loops=1) @@ -863,7 +1682,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=0.00 loops=1) (4 rows) --- Test 9.2: Single row +DROP VIEW rpr_v; +-- Single row +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 1) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A) + DEFINE A AS TRUE +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------- + PATTERN (a) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -886,7 +1721,25 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=1.00 loops=1) (8 rows) --- Test 9.3: Pattern longer than data +DROP VIEW rpr_v; +-- Pattern longer than data +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 5) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E F G H I J) + DEFINE + A AS v = 1, B AS v = 2, C AS v = 3, D AS v = 4, E AS v = 5, + F AS v = 6, G AS v = 7, H AS v = 8, I AS v = 9, J AS v = 10 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------------------- + PATTERN (a b c d e f g h i j) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -911,7 +1764,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=5.00 loops=1) (8 rows) --- Test 9.4: All rows match as single match +DROP VIEW rpr_v; +-- All rows match as single match +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS TRUE +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------- + PATTERN (a+) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -935,10 +1804,26 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=50.00 loops=1) (9 rows) --- --- Section 10: Complex Pattern Tests --- --- Test 10.1: Nested groups +DROP VIEW rpr_v; +-- ============================================================ +-- Complex Pattern Tests +-- ============================================================ +-- Nested groups +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A B) C)+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------- + PATTERN (((a b) c)+) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -962,7 +1847,25 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=60.00 loops=1) (9 rows) --- Test 10.2: Multiple alternations +DROP VIEW rpr_v; +-- Multiple alternations +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (C | D | E)) + DEFINE + A AS cat = 'A', B AS cat = 'B', C AS cat = 'C', + D AS cat = 'D', E AS cat = 'E' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------------------- + PATTERN ((a | b) (c | d | e)) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -979,7 +1882,7 @@ WINDOW w AS ( ------------------------------------------------------------------- WindowAgg (actual rows=100.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: (a | b c | d | e) + Pattern: (a | b) (c | d | e) Storage: Memory Maximum Storage: NkB NFA States: 5 peak, 282 total, 0 merged NFA Contexts: 3 peak, 101 total, 60 pruned @@ -987,7 +1890,23 @@ WINDOW w AS ( -> Seq Scan on nfa_test (actual rows=100.00 loops=1) (8 rows) --- Test 10.3: Optional elements +DROP VIEW rpr_v; +-- Optional elements +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B? C) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN (a b? c) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1010,7 +1929,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=50.00 loops=1) (8 rows) --- Test 10.4: Bounded quantifiers +DROP VIEW rpr_v; +-- Bounded quantifiers +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,5} B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------- + PATTERN (a{2,5} b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1034,7 +1969,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=100.00 loops=1) (9 rows) --- Test 10.5: Star quantifier +DROP VIEW rpr_v; +-- Star quantifier +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B* C) + DEFINE A AS v % 10 = 1, B AS v % 10 IN (2,3,4,5,6,7,8), C AS v % 10 = 9 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN (a b* c) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1057,10 +2008,26 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=50.00 loops=1) (8 rows) --- --- Section 11: Real-world Pattern Examples --- --- Test 11.1: Stock price pattern - V-shape (down then up) +DROP VIEW rpr_v; +-- ============================================================ +-- Real-world Pattern Examples +-- ============================================================ +-- Stock price pattern - V-shape (down then up) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_complex +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (D+ U+) + DEFINE D AS trend = 'D', U AS trend = 'U' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------- + PATTERN (d+ u+) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1084,7 +2051,23 @@ WINDOW w AS ( -> Seq Scan on nfa_complex (actual rows=30.00 loops=1) (9 rows) --- Test 11.2: Stock price pattern - peak (up, stable, down) +DROP VIEW rpr_v; +-- Stock price pattern - peak (up, stable, down) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_complex +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (U+ S* D+) + DEFINE U AS trend = 'U', S AS trend = 'S', D AS trend = 'D' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------- + PATTERN (u+ s* d+) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1108,7 +2091,23 @@ WINDOW w AS ( -> Seq Scan on nfa_complex (actual rows=30.00 loops=1) (9 rows) --- Test 11.3: Consecutive increasing values (using PREV) +DROP VIEW rpr_v; +-- Consecutive increasing values (using PREV) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3,}) + DEFINE A AS v > PREV(v) OR PREV(v) IS NULL +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------- + PATTERN (a{3,}) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1132,10 +2131,26 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=50.00 loops=1) (9 rows) --- --- Section 12: Performance-oriented Tests --- --- Test 12.1: Large dataset with simple pattern +DROP VIEW rpr_v; +-- ============================================================ +-- Performance-oriented Tests +-- ============================================================ +-- Large dataset with simple pattern +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------ + PATTERN (a b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1158,7 +2173,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=1000.00 loops=1) (8 rows) --- Test 12.2: Large dataset with absorption +DROP VIEW rpr_v; +-- Large dataset with absorption +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 100 <> 0, B AS v % 100 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1182,7 +2213,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=1000.00 loops=1) (9 rows) --- Test 12.3: High state merge ratio +DROP VIEW rpr_v; +-- High state merge ratio +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------- + PATTERN ((a | b)+ c) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1206,10 +2253,27 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=500.00 loops=1) (9 rows) --- --- Section 13: INITIAL vs no INITIAL comparison --- --- Test 13.1: With INITIAL keyword +DROP VIEW rpr_v; +-- ============================================================ +-- INITIAL vs no INITIAL comparison +-- ============================================================ +-- With INITIAL keyword +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1234,7 +2298,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=50.00 loops=1) (9 rows) --- Test 13.2: Without INITIAL keyword (same behavior currently) +DROP VIEW rpr_v; +-- Without INITIAL keyword (same behavior currently) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1258,10 +2338,26 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=50.00 loops=1) (9 rows) --- --- Section 14: Quantifier Variations --- --- Test 14.1: Plus quantifier +DROP VIEW rpr_v; +-- ============================================================ +-- Quantifier Variations +-- ============================================================ +-- Plus quantifier +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS v % 4 <> 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------- + PATTERN (a+) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1285,7 +2381,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=40.00 loops=1) (9 rows) --- Test 14.2: Star quantifier (zero or more) +DROP VIEW rpr_v; +-- Star quantifier (zero or more) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A* B) + DEFINE A AS v % 4 IN (1, 2), B AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a* b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1309,7 +2421,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=40.00 loops=1) (9 rows) --- Test 14.3: Question mark (zero or one) +DROP VIEW rpr_v; +-- Question mark (zero or one) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A? B C) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN (a? b c) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1333,7 +2461,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=40.00 loops=1) (9 rows) --- Test 14.4: Exact count {n} +DROP VIEW rpr_v; +-- Exact count {n} +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN (a{3} b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1356,7 +2500,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=50.00 loops=1) (8 rows) --- Test 14.5: Range {n,m} +DROP VIEW rpr_v; +-- Range {n,m} +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,4} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------- + PATTERN (a{2,4} b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1380,7 +2540,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=50.00 loops=1) (9 rows) --- Test 14.6: At least {n,} +DROP VIEW rpr_v; +-- At least {n,} +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3,} B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------- + PATTERN (a{3,} b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1404,11 +2580,27 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=50.00 loops=1) (9 rows) --- --- Section 15: Regression Tests for Statistics Accuracy --- --- Test 15.1: Verify state count accuracy +DROP VIEW rpr_v; +-- ============================================================ +-- Regression Tests for Statistics Accuracy +-- ============================================================ +-- Verify state count accuracy -- Pattern A+ B with 20 rows should show predictable state behavior +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1432,7 +2624,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=20.00 loops=1) (9 rows) --- Test 15.2: Verify context count with known absorption +DROP VIEW rpr_v; +-- Verify context count with known absorption +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B C) + DEFINE A AS v % 10 IN (1,2,3,4,5,6,7), B AS v % 10 = 8, C AS v % 10 = 9 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN (a+ b c) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1456,7 +2664,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=30.00 loops=1) (9 rows) --- Test 15.3: Verify match length with fixed-length pattern +DROP VIEW rpr_v; +-- Verify match length with fixed-length pattern +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------- + PATTERN (a b c) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1479,10 +2703,26 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=30.00 loops=1) (8 rows) --- --- Section 16: Alternation Pattern Tests --- --- Test 16.1: Simple alternation +DROP VIEW rpr_v; +-- ============================================================ +-- Alternation Pattern Tests +-- ============================================================ +-- Simple alternation +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) C) + DEFINE A AS cat = 'A', B AS cat = 'B', C AS cat = 'C' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------ + PATTERN ((a | b) c) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1505,7 +2745,25 @@ WINDOW w AS ( -> Seq Scan on nfa_test (actual rows=100.00 loops=1) (8 rows) --- Test 16.2: Multiple items in alternation +DROP VIEW rpr_v; +-- Multiple items in alternation +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C | D) E) + DEFINE + A AS cat = 'A', B AS cat = 'B', C AS cat = 'C', + D AS cat = 'D', E AS cat = 'E' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------------- + PATTERN ((a | b | c | d) e) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1530,7 +2788,23 @@ WINDOW w AS ( -> Seq Scan on nfa_test (actual rows=100.00 loops=1) (8 rows) --- Test 16.3: Alternation with quantifiers +DROP VIEW rpr_v; +-- Alternation with quantifiers +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------- + PATTERN ((a | b)+ c) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1554,10 +2828,259 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=50.00 loops=1) (9 rows) --- --- Section 17: Group Pattern Tests --- --- Test 17.1: Simple group +DROP VIEW rpr_v; +-- Multiple alternatives (4+) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A | B | C | D | E) + DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------------- + PATTERN (a | b | c | d | e) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A | B | C | D | E) + DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b | c | d | e) + -> Function Scan on generate_series s +(4 rows) + +DROP VIEW rpr_v; +-- Alternation at start +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) C D) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------- + PATTERN ((a | b) c d) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) C D) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b) c d + -> Function Scan on generate_series s +(4 rows) + +DROP VIEW rpr_v; +-- Multiple sequential alternations +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) C (D | E) F) + DEFINE A AS v % 6 = 0, B AS v % 6 = 1, C AS v % 6 = 2, D AS v % 6 = 3, E AS v % 6 = 4, F AS v % 6 = 5 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------------------- + PATTERN ((a | b) c (d | e) f) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) C (D | E) F) + DEFINE A AS v % 6 = 0, B AS v % 6 = 1, C AS v % 6 = 2, D AS v % 6 = 3, E AS v % 6 = 4, F AS v % 6 = 5 +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b) c (d | e) f + -> Function Scan on generate_series s +(4 rows) + +DROP VIEW rpr_v; +-- Quantified alternatives +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A+ | B+) C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------- + PATTERN ((a+ | b+) c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A+ | B+) C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a+" | b+") c + -> Function Scan on generate_series s +(4 rows) + +DROP VIEW rpr_v; +-- Alternation at end +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (C | D)) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------- + PATTERN (a b (c | d)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (C | D)) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b (c | d) + -> Function Scan on generate_series s +(4 rows) + +DROP VIEW rpr_v; +-- Nested ALT at start of branch inside outer ALT +-- Pattern: (A ((B | C) D | E)) - preceding VAR + inner ALT as first branch element +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B | C) D | E)) + DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------------- + PATTERN (a ((b | c) d | e)) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B | C) D | E)) + DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a ((b | c) d | e) + -> Function Scan on generate_series s +(4 rows) + +DROP VIEW rpr_v; +-- Nested ALT at end of branch inside outer ALT +-- Pattern: (C (A | B) | D) - inner ALT is last element in outer branch +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (C (A | B) | D) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------------- + PATTERN (c (a | b) | d) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (C (A | B) | D) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (c (a | b) | d) + -> Function Scan on generate_series s +(4 rows) + +DROP VIEW rpr_v; +-- ============================================================ +-- Group Pattern Tests +-- ============================================================ +-- Simple group +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN ((a b)+) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1581,7 +3104,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=40.00 loops=1) (9 rows) --- Test 17.2: Group with bounded quantifier +DROP VIEW rpr_v; +-- Group with bounded quantifier +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B){2,4}) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------- + PATTERN ((a b){2,4}) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1605,7 +3144,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=40.00 loops=1) (9 rows) --- Test 17.3: Nested groups +DROP VIEW rpr_v; +-- Nested groups +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A B){2})+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------- + PATTERN (((a b){2})+) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1629,10 +3184,158 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=60.00 loops=1) (9 rows) --- --- Section 18: Window Function Combinations --- --- Test 18.1: count(*) with pattern +DROP VIEW rpr_v; +-- Deep nesting (3+ levels) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((((A | B)+)+)+) + DEFINE A AS v % 2 = 0, B AS v % 2 = 1 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------------- + PATTERN ((((a | b)+)+)+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((((A | B)+)+)+) + DEFINE A AS v % 2 = 0, B AS v % 2 = 1 +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b)+ + -> Function Scan on generate_series s +(4 rows) + +DROP VIEW rpr_v; +-- Bounded quantifier on alternation +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B){2,3} C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +----------------------------- + PATTERN ((a | b){2,3} c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B){2,3} C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b){2,3} c + -> Function Scan on generate_series s +(4 rows) + +DROP VIEW rpr_v; +-- Nested groups with quantifiers +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B)+ C)*) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------- + PATTERN (((a b)+ c)*) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B)+ C)*) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: ((a' b')+" c)* + -> Function Scan on generate_series s +(4 rows) + +DROP VIEW rpr_v; +-- Partial nested quantification +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A (B C)+)*) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------- + PATTERN ((a (b c)+)*) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A (B C)+)*) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); + rpr_explain_filter +------------------------------------------------------------------- + WindowAgg + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a (b c)+)* + -> Function Scan on generate_series s +(4 rows) + +DROP VIEW rpr_v; +-- ============================================================ +-- Window Function Combinations +-- ============================================================ +-- count(*) with pattern +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1656,7 +3359,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=30.00 loops=1) (9 rows) --- Test 18.2: first_value with pattern +DROP VIEW rpr_v; +-- first_value with pattern +CREATE TEMP VIEW rpr_v AS +SELECT first_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT first_value(v) OVER w @@ -1680,7 +3399,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=30.00 loops=1) (9 rows) --- Test 18.3: last_value with pattern +DROP VIEW rpr_v; +-- last_value with pattern +CREATE TEMP VIEW rpr_v AS +SELECT last_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT last_value(v) OVER w @@ -1704,7 +3439,26 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=30.00 loops=1) (9 rows) --- Test 18.4: Multiple window functions +DROP VIEW rpr_v; +-- Multiple window functions +CREATE TEMP VIEW rpr_v AS +SELECT + count(*) OVER w, + first_value(v) OVER w, + last_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT @@ -1731,10 +3485,28 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=30.00 loops=1) (9 rows) --- --- Section 19: DEFINE Expression Variations --- --- Test 19.1: Complex boolean expressions +DROP VIEW rpr_v; +-- ============================================================ +-- DEFINE Expression Variations +-- ============================================================ +-- Complex boolean expressions +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS (v % 5 <> 0) AND (v % 3 <> 0), + B AS (v % 5 = 0) OR (v % 3 = 0) +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1760,7 +3532,26 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=50.00 loops=1) (9 rows) --- Test 19.2: Using PREV function +DROP VIEW rpr_v; +-- Using PREV function +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (S U+ D+) + DEFINE + S AS TRUE, + U AS v > PREV(v), + D AS v < PREV(v) +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +---------------------- + PATTERN (s u+ d+) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1786,7 +3577,26 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=30.00 loops=1) (8 rows) --- Test 19.3: Using NULL comparisons +DROP VIEW rpr_v; +-- Using NULL comparisons +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM ( + SELECT CASE WHEN v % 5 = 0 THEN NULL ELSE v END AS v + FROM generate_series(1, 30) v +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v IS NOT NULL, B AS v IS NULL +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------- + PATTERN (a+ b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1813,10 +3623,26 @@ WINDOW w AS ( -> Function Scan on generate_series v (actual rows=30.00 loops=1) (9 rows) --- --- Section 20: Large Scale Statistics Verification --- --- Test 20.1: 500 rows - verify statistics scale correctly +DROP VIEW rpr_v; +-- ============================================================ +-- Large Scale Statistics Verification +-- ============================================================ +-- 500 rows - verify statistics scale correctly +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B C) + DEFINE A AS v % 10 < 7, B AS v % 10 = 7, C AS v % 10 = 8 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------- + PATTERN (a+ b c) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1840,7 +3666,23 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=500.00 loops=1) (9 rows) --- Test 20.2: High match count scenario +DROP VIEW rpr_v; +-- High match count scenario +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------ + PATTERN (a b) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1863,7 +3705,28 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=500.00 loops=1) (8 rows) --- Test 20.3: High skip count scenario +DROP VIEW rpr_v; +-- High skip count scenario +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS v % 100 = 1, + B AS v % 100 = 2, + C AS v % 100 = 3, + D AS v % 100 = 4, + E AS v % 100 = 5 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------ + PATTERN (a b c d e) +(1 row) + SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1891,6 +3754,7 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=500.00 loops=1) (8 rows) +DROP VIEW rpr_v; -- Cleanup DROP TABLE nfa_test; DROP TABLE nfa_complex; diff --git a/src/test/regress/sql/rpr.sql b/src/test/regress/sql/rpr.sql index 788a77e5279..1071dacd687 100644 --- a/src/test/regress/sql/rpr.sql +++ b/src/test/regress/sql/rpr.sql @@ -1233,9 +1233,9 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER DOWN AS price < PREV(1) ); --- Maximum pattern variables is 252 (RPR_VARID_MAX) +-- Maximum pattern variables is 251 (RPR_VARID_MAX) --- Ok: 252 variables (maximum allowed) +-- Error: 252 variables exceeds limit of 251 DO $$ DECLARE pattern_vars text; @@ -1256,7 +1256,7 @@ BEGIN END; $$; --- Error: 253 variables exceeds limit of 252 +-- Error: 253 variables exceeds limit of 251 DO $$ DECLARE pattern_vars text; diff --git a/src/test/regress/sql/rpr_base.sql b/src/test/regress/sql/rpr_base.sql index a5a66d2ca81..6f38cd1ae92 100644 --- a/src/test/regress/sql/rpr_base.sql +++ b/src/test/regress/sql/rpr_base.sql @@ -2019,6 +2019,14 @@ SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN (A+ A*) DEFINE A AS val > 0); +-- Consecutive VAR merge: A A+ -> a{2,} +-- Tests line 251: child->max == RPR_QUANTITY_INF branch in mergeConsecutiveVars +-- prev: A{1,1} (finite), child: A+ (infinite) triggers line 251 evaluation +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A A+) DEFINE A AS val > 0); + -- Consecutive GROUP merge with finite quantifiers: ((A B){5}) ((A B){10}) -> merged EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan @@ -2031,6 +2039,14 @@ SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A B)+ (A B)+) DEFINE A AS val <= 50, B AS val > 50); +-- Consecutive GROUP merge: (A B){2} (A B)+ -> (a b){3,} +-- Tests line 325: child->max == RPR_QUANTITY_INF branch in mergeConsecutiveGroups +-- prev: (A B){2,2} (finite), child: (A B)+ (infinite) triggers line 325 evaluation +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B){2} (A B)+) DEFINE A AS val <= 50, B AS val > 50); + -- PREFIX merge: A B (A B)+ -> (a b){2,} EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan @@ -3350,13 +3366,13 @@ WINDOW w AS ( V251 AS val > 0, V252 AS val > 0, V253 AS val > 0 ); --- Test: 252 variables in PATTERN, 253 in DEFINE (boundary - should succeed) +-- Test: 251 variables in PATTERN, 252 in DEFINE (boundary - should succeed) -- Expected: Success - unused DEFINE variables are filtered out SELECT COUNT(*) OVER w FROM rpr_errors WINDOW w AS ( ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - PATTERN (V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40 V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60 V61 V62 V63 V64 V65 V66 V67 V68 V69 V70 V71 V72 V73 V74 V75 V76 V77 V78 V79 V80 V81 V82 V83 V84 V85 V86 V87 V88 V89 V90 V91 V92 V93 V94 V95 V96 V97 V98 V99 V100 V101 V102 V103 V104 V105 V106 V107 V108 V109 V110 V111 V112 V113 V114 V115 V116 V117 V118 V119 V120 V121 V122 V123 V124 V125 V126 V127 V128 V129 V130 V131 V132 V133 V134 V135 V136 V137 V138 V139 V140 V141 V142 V143 V144 V145 V146 V147 V148 V149 V150 V151 V152 V153 V154 V155 V156 V157 V158 V159 V160 V161 V162 V163 V164 V165 V166 V167 V168 V169 V170 V171 V172 V173 V174 V175 V176 V177 V178 V179 V180 V181 V182 V183 V184 V185 V186 V187 V188 V189 V190 V191 V192 V193 V194 V195 V196 V197 V198 V199 V200 V201 V202 V203 V204 V205 V206 V207 V208 V209 V210 V211 V212 V213 V214 V215 V216 V217 V218 V219 V220 V221 V222 V223 V224 V225 V226 V227 V228 V229 V230 V231 V232 V233 V234 V235 V236 V237 V238 V239 V240 V241 V242 V243 V244 V245 V246 V247 V248 V249 V250 V251 V252) + PATTERN (V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40 V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60 V61 V62 V63 V64 V65 V66 V67 V68 V69 V70 V71 V72 V73 V74 V75 V76 V77 V78 V79 V80 V81 V82 V83 V84 V85 V86 V87 V88 V89 V90 V91 V92 V93 V94 V95 V96 V97 V98 V99 V100 V101 V102 V103 V104 V105 V106 V107 V108 V109 V110 V111 V112 V113 V114 V115 V116 V117 V118 V119 V120 V121 V122 V123 V124 V125 V126 V127 V128 V129 V130 V131 V132 V133 V134 V135 V136 V137 V138 V139 V140 V141 V142 V143 V144 V145 V146 V147 V148 V149 V150 V151 V152 V153 V154 V155 V156 V157 V158 V159 V160 V161 V162 V163 V164 V165 V166 V167 V168 V169 V170 V171 V172 V173 V174 V175 V176 V177 V178 V179 V180 V181 V182 V183 V184 V185 V186 V187 V188 V189 V190 V191 V192 V193 V194 V195 V196 V197 V198 V199 V200 V201 V202 V203 V204 V205 V206 V207 V208 V209 V210 V211 V212 V213 V214 V215 V216 V217 V218 V219 V220 V221 V222 V223 V224 V225 V226 V227 V228 V229 V230 V231 V232 V233 V234 V235 V236 V237 V238 V239 V240 V241 V242 V243 V244 V245 V246 V247 V248 V249 V250 V251) DEFINE V1 AS val > 0, V2 AS val > 0, V3 AS val > 0, V4 AS val > 0, V5 AS val > 0, V6 AS val > 0, V7 AS val > 0, V8 AS val > 0, V9 AS val > 0, V10 AS val > 0, V11 AS val > 0, V12 AS val > 0, V13 AS val > 0, V14 AS val > 0, V15 AS val > 0, V16 AS val > 0, V17 AS val > 0, V18 AS val > 0, V19 AS val > 0, V20 AS val > 0, @@ -3383,17 +3399,17 @@ WINDOW w AS ( V221 AS val > 0, V222 AS val > 0, V223 AS val > 0, V224 AS val > 0, V225 AS val > 0, V226 AS val > 0, V227 AS val > 0, V228 AS val > 0, V229 AS val > 0, V230 AS val > 0, V231 AS val > 0, V232 AS val > 0, V233 AS val > 0, V234 AS val > 0, V235 AS val > 0, V236 AS val > 0, V237 AS val > 0, V238 AS val > 0, V239 AS val > 0, V240 AS val > 0, V241 AS val > 0, V242 AS val > 0, V243 AS val > 0, V244 AS val > 0, V245 AS val > 0, V246 AS val > 0, V247 AS val > 0, V248 AS val > 0, V249 AS val > 0, V250 AS val > 0, - V251 AS val > 0, V252 AS val > 0, V253 AS val > 0 + V251 AS val > 0, V252 AS val > 0 ); --- Test: 253 variables in PATTERN, 252 in DEFINE (exceeds limit with implicit TRUE) --- Expected: ERROR - too many pattern variables (Maximum is 252) +-- Test: 252 variables in PATTERN, 251 in DEFINE (exceeds limit with implicit TRUE) +-- Expected: ERROR - too many pattern variables (Maximum is 251) SELECT COUNT(*) OVER w FROM rpr_errors WINDOW w AS ( ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - PATTERN (V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40 V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60 V61 V62 V63 V64 V65 V66 V67 V68 V69 V70 V71 V72 V73 V74 V75 V76 V77 V78 V79 V80 V81 V82 V83 V84 V85 V86 V87 V88 V89 V90 V91 V92 V93 V94 V95 V96 V97 V98 V99 V100 V101 V102 V103 V104 V105 V106 V107 V108 V109 V110 V111 V112 V113 V114 V115 V116 V117 V118 V119 V120 V121 V122 V123 V124 V125 V126 V127 V128 V129 V130 V131 V132 V133 V134 V135 V136 V137 V138 V139 V140 V141 V142 V143 V144 V145 V146 V147 V148 V149 V150 V151 V152 V153 V154 V155 V156 V157 V158 V159 V160 V161 V162 V163 V164 V165 V166 V167 V168 V169 V170 V171 V172 V173 V174 V175 V176 V177 V178 V179 V180 V181 V182 V183 V184 V185 V186 V187 V188 V189 V190 V191 V192 V193 V194 V195 V196 V197 V198 V199 V200 V201 V202 V203 V204 V205 V206 V207 V208 V209 V210 V211 V212 V213 V214 V215 V216 V217 V218 V219 V220 V221 V222 V223 V224 V225 V226 V227 V228 V229 V230 V231 V232 V233 V234 V235 V236 V237 V238 V239 V240 V241 V242 V243 V244 V245 V246 V247 V248 V249 V250 V251 V252 V253) + PATTERN (V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40 V41 V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 V52 V53 V54 V55 V56 V57 V58 V59 V60 V61 V62 V63 V64 V65 V66 V67 V68 V69 V70 V71 V72 V73 V74 V75 V76 V77 V78 V79 V80 V81 V82 V83 V84 V85 V86 V87 V88 V89 V90 V91 V92 V93 V94 V95 V96 V97 V98 V99 V100 V101 V102 V103 V104 V105 V106 V107 V108 V109 V110 V111 V112 V113 V114 V115 V116 V117 V118 V119 V120 V121 V122 V123 V124 V125 V126 V127 V128 V129 V130 V131 V132 V133 V134 V135 V136 V137 V138 V139 V140 V141 V142 V143 V144 V145 V146 V147 V148 V149 V150 V151 V152 V153 V154 V155 V156 V157 V158 V159 V160 V161 V162 V163 V164 V165 V166 V167 V168 V169 V170 V171 V172 V173 V174 V175 V176 V177 V178 V179 V180 V181 V182 V183 V184 V185 V186 V187 V188 V189 V190 V191 V192 V193 V194 V195 V196 V197 V198 V199 V200 V201 V202 V203 V204 V205 V206 V207 V208 V209 V210 V211 V212 V213 V214 V215 V216 V217 V218 V219 V220 V221 V222 V223 V224 V225 V226 V227 V228 V229 V230 V231 V232 V233 V234 V235 V236 V237 V238 V239 V240 V241 V242 V243 V244 V245 V246 V247 V248 V249 V250 V251 V252) DEFINE V1 AS val > 0, V2 AS val > 0, V3 AS val > 0, V4 AS val > 0, V5 AS val > 0, V6 AS val > 0, V7 AS val > 0, V8 AS val > 0, V9 AS val > 0, V10 AS val > 0, V11 AS val > 0, V12 AS val > 0, V13 AS val > 0, V14 AS val > 0, V15 AS val > 0, V16 AS val > 0, V17 AS val > 0, V18 AS val > 0, V19 AS val > 0, V20 AS val > 0, @@ -3420,28 +3436,28 @@ WINDOW w AS ( V221 AS val > 0, V222 AS val > 0, V223 AS val > 0, V224 AS val > 0, V225 AS val > 0, V226 AS val > 0, V227 AS val > 0, V228 AS val > 0, V229 AS val > 0, V230 AS val > 0, V231 AS val > 0, V232 AS val > 0, V233 AS val > 0, V234 AS val > 0, V235 AS val > 0, V236 AS val > 0, V237 AS val > 0, V238 AS val > 0, V239 AS val > 0, V240 AS val > 0, V241 AS val > 0, V242 AS val > 0, V243 AS val > 0, V244 AS val > 0, V245 AS val > 0, V246 AS val > 0, V247 AS val > 0, V248 AS val > 0, V249 AS val > 0, V250 AS val > 0, - V251 AS val > 0, V252 AS val > 0 + V251 AS val > 0 ); --- Test: Pattern nesting at maximum depth (depth 254) +-- Test: Pattern nesting at maximum depth (depth 253) -- Expected: Should succeed --- Note: 254 nested GROUP{3,7} quantifiers produce depth 254 after optimization +-- Note: 253 nested GROUP{3,7} quantifiers produce depth 253 after optimization SELECT id, val, COUNT(*) OVER w FROM rpr_errors WINDOW w AS ( ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - PATTERN (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A{3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}) + PATTERN ((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A{3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}) DEFINE A AS val > 0 ); --- Test: Pattern nesting depth exceeds maximum (depth 255) +-- Test: Pattern nesting depth exceeds maximum (depth 254) -- Expected: ERROR - pattern nesting too deep --- Note: 255 nested GROUP{3,7} quantifiers produce depth 255 after optimization +-- Note: 254 nested GROUP{3,7} quantifiers produce depth 254 after optimization SELECT id, val, COUNT(*) OVER w FROM rpr_errors WINDOW w AS ( ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - PATTERN ((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A{3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}) + PATTERN (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A{3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}) DEFINE A AS val > 0 ); @@ -3611,6 +3627,24 @@ WINDOW w AS ( DEFINE A AS TRUE ); +-- Optional group with alternation: A ((B | C) (D | E))* F? +-- When only A matches, the * group matches 0 times and F? matches 0 times +SELECT id, val, match_len +FROM (SELECT id, val, + COUNT(*) OVER w AS match_len + FROM (VALUES (1, 1), (2, 99)) AS t(id, val) + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A ((B | C) (D | E))* F?) + DEFINE A AS val = 1, + B AS val = 2, C AS val = 3, + D AS val = 4, E AS val = 5, + F AS val = 6 + ) +) s; + DROP TABLE rpr_plan; -- ============================================================ diff --git a/src/test/regress/sql/rpr_explain.sql b/src/test/regress/sql/rpr_explain.sql index 8156121b3cd..80088ab18e2 100644 --- a/src/test/regress/sql/rpr_explain.sql +++ b/src/test/regress/sql/rpr_explain.sql @@ -1,14 +1,43 @@ +-- ============================================================ +-- RPR EXPLAIN Tests +-- Tests for Row Pattern Recognition EXPLAIN output +-- ============================================================ -- --- Test: EXPLAIN ANALYZE output for Row Pattern Recognition NFA statistics +-- This test suite validates EXPLAIN output for RPR queries, +-- including NFA statistics shown in EXPLAIN ANALYZE: +-- - NFA States: peak, total, merged +-- - NFA Contexts: peak, total, absorbed, skipped +-- - NFA: matched (len min/max/avg), mismatched (len min/max/avg) +-- - Pattern deparse formatting +-- - Multiple output formats (text, JSON, XML) -- --- This file tests the NFA statistics shown in EXPLAIN ANALYZE output: --- - NFA States: peak, total, merged --- - NFA Contexts: peak, total, absorbed, skipped --- - NFA: matched (len min/max/avg), mismatched (len min/max/avg) --- - --- Filter function to normalize Storage memory values only (not NFA statistics) --- Works for text, JSON, and XML formats +-- Test Coverage: +-- Basic NFA Statistics Tests +-- State Statistics Tests +-- Context Statistics Tests +-- Match Length Statistics Tests +-- Mismatch Length Statistics Tests +-- JSON Format Tests +-- XML Format Tests +-- Multiple Partitions Tests +-- Edge Cases +-- Complex Pattern Tests +-- Real-world Pattern Examples +-- Performance-oriented Tests +-- INITIAL vs no INITIAL comparison +-- Quantifier Variations +-- Regression Tests for Statistics Accuracy +-- Alternation Pattern Tests +-- Group Pattern Tests +-- Window Function Combinations +-- DEFINE Expression Variations +-- Large Scale Statistics Verification +-- ============================================================ + +-- Filter function to normalize Storage memory values only (not NFA statistics). +-- NFA statistics should not change between platforms; if they do, it could +-- indicate issues such as uninitialized memory access. +-- Works for text, JSON, and XML formats. create function rpr_explain_filter(text) returns setof text language plpgsql as $$ @@ -75,11 +104,21 @@ VALUES (121, 'U'), (123, 'U'), (125, 'U'), (127, 'U'), (129, 'U'), (131, 'U'), (133, 'U'), (130, 'D'), (127, 'D'), (124, 'D'); --- --- Section 1: Basic NFA Statistics Tests --- +-- ============================================================ +-- Basic NFA Statistics Tests +-- ============================================================ --- Test 1.1: Simple pattern - should show basic statistics +-- Simple pattern - should show basic statistics +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS cat = 'A', B AS cat = 'B' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -90,8 +129,19 @@ WINDOW w AS ( PATTERN (A B) DEFINE A AS cat = ''A'', B AS cat = ''B'' )'); +DROP VIEW rpr_v; --- Test 1.2: Pattern with no matches - 0 matched +-- Pattern with no matches - 0 matched +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (X Y Z) + DEFINE X AS cat = 'X', Y AS cat = 'Y', Z AS cat = 'Z' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -102,8 +152,19 @@ WINDOW w AS ( PATTERN (X Y Z) DEFINE X AS cat = ''X'', Y AS cat = ''Y'', Z AS cat = ''Z'' );'); +DROP VIEW rpr_v; --- Test 1.3: Pattern matching every row - high match count +-- Pattern matching every row - high match count +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (R) + DEFINE R AS TRUE +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -114,12 +175,68 @@ WINDOW w AS ( PATTERN (R) DEFINE R AS TRUE );'); +DROP VIEW rpr_v; --- --- Section 2: State Statistics Tests (peak, total, merged) --- +-- Regression test: Space before parenthesis in pattern deparse +-- Verifies that "A (B | C)" correctly outputs as "a (b | c)" with space +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A (B | C)) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A (B | C)) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +);'); +DROP VIEW rpr_v; + +-- Regression test: Sequential alternations at same depth +-- Verifies that "((B | C) (D | E))" correctly outputs as "(b | c) (d | e)" +-- Previously failed due to missing parentheses on ALT depth decrease +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B | C) (D | E))*) + DEFINE A AS v % 5 = 1, B AS v % 5 = 2, C AS v % 5 = 3, D AS v % 5 = 4, E AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B | C) (D | E))*) + DEFINE A AS v % 5 = 1, B AS v % 5 = 2, C AS v % 5 = 3, D AS v % 5 = 4, E AS v % 5 = 0 +);'); +DROP VIEW rpr_v; + +-- ============================================================ +-- State Statistics Tests (peak, total, merged) +-- ============================================================ --- Test 2.1: Simple quantifier pattern - A+ with short matches (no merging) +-- Simple quantifier pattern - A+ with short matches (no merging) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS v % 2 = 1 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -130,8 +247,21 @@ WINDOW w AS ( PATTERN (A+) DEFINE A AS v % 2 = 1 );'); +DROP VIEW rpr_v; --- Test 2.2: Alternation pattern - multiple state branches +-- Alternation pattern - multiple state branches +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C) (D | E)) + DEFINE + A AS cat = 'A', B AS cat = 'B', C AS cat = 'C', + D AS cat = 'D', E AS cat = 'E' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -144,8 +274,22 @@ WINDOW w AS ( A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'', D AS cat = ''D'', E AS cat = ''E'' );'); +DROP VIEW rpr_v; --- Test 2.3: Complex pattern with high state count +-- Complex pattern with high state count +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B* C+) + DEFINE + A AS v % 3 = 1, + B AS v % 3 = 2, + C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -159,8 +303,19 @@ WINDOW w AS ( B AS v % 3 = 2, C AS v % 3 = 0 );'); +DROP VIEW rpr_v; --- Test 2.4: Grouped pattern with quantifier - state merging +-- Grouped pattern with quantifier - state merging +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -171,9 +326,20 @@ WINDOW w AS ( PATTERN ((A B)+) DEFINE A AS v % 2 = 1, B AS v % 2 = 0 );'); +DROP VIEW rpr_v; --- Test 2.5: State explosion pattern - many alternations +-- State explosion pattern - many alternations -- Pattern (A|B)(A|B)(A|B)(A|B) can create many parallel states +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B)) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -184,8 +350,90 @@ WINDOW w AS ( PATTERN ((A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B)) DEFINE A AS v % 2 = 1, B AS v % 2 = 0 );'); +DROP VIEW rpr_v; + +-- Consecutive ALT merge followed by different ALT +-- Tests mergeConsecutiveAlts flush on ALT change: (A|B){2} (C|D) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (C | D)) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (C | D)) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +);'); +DROP VIEW rpr_v; + +-- Consecutive ALT merge followed by non-ALT element +-- Tests mergeConsecutiveAlts flush on non-ALT: (A|B){2} c +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); +DROP VIEW rpr_v; + +-- ALT prefix/suffix absorbed into GROUP: (A|B) (A|B)+ (A|B) -> (A|B){3,} +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B)+ (A | B)) + DEFINE A AS v % 2 = 0, B AS v % 2 = 1 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B)+ (A | B)) + DEFINE A AS v % 2 = 0, B AS v % 2 = 1 +);'); +DROP VIEW rpr_v; --- Test 2.6: High state merging - alternation with plus quantifier +-- High state merging - alternation with plus quantifier +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C)+ D) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3, D AS v % 4 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -196,8 +444,19 @@ WINDOW w AS ( PATTERN ((A | B | C)+ D) DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3, D AS v % 4 = 0 );'); +DROP VIEW rpr_v; --- Test 2.7: Nested quantifiers causing state growth +-- Nested quantifiers causing state growth +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A | B)+)+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -208,12 +467,23 @@ WINDOW w AS ( PATTERN (((A | B)+)+) DEFINE A AS v % 3 = 1, B AS v % 3 = 2 );'); +DROP VIEW rpr_v; --- --- Section 3: Context Statistics Tests (peak, total, absorbed, skipped) --- +-- ============================================================ +-- Context Statistics Tests (peak, total, absorbed, skipped) +-- ============================================================ --- Test 3.1: Context absorption with unbounded quantifier at start +-- Context absorption with unbounded quantifier at start +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -224,8 +494,19 @@ WINDOW w AS ( PATTERN (A+ B) DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 );'); +DROP VIEW rpr_v; --- Test 3.2: No absorption - bounded quantifier +-- No absorption - bounded quantifier +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,4} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -236,8 +517,19 @@ WINDOW w AS ( PATTERN (A{2,4} B) DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 );'); +DROP VIEW rpr_v; --- Test 3.3: Contexts skipped by SKIP PAST LAST ROW +-- Contexts skipped by SKIP PAST LAST ROW +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v % 10 = 1, B AS v % 10 = 2, C AS v % 10 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -248,8 +540,19 @@ WINDOW w AS ( PATTERN (A B C) DEFINE A AS v % 10 = 1, B AS v % 10 = 2, C AS v % 10 = 3 );'); +DROP VIEW rpr_v; --- Test 3.4: High context absorption - unbounded group +-- High context absorption - unbounded group +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -260,12 +563,25 @@ WINDOW w AS ( PATTERN ((A B)+ C) DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 );'); +DROP VIEW rpr_v; --- --- Section 4: Match Length Statistics Tests --- +-- ============================================================ +-- Match Length Statistics Tests +-- ============================================================ --- Test 4.1: Fixed length matches - all same length +-- Fixed length matches - all same length +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS cat = 'A', B AS cat = 'B', C AS cat = 'C', + D AS cat = 'D', E AS cat = 'E' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -278,8 +594,19 @@ WINDOW w AS ( A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'', D AS cat = ''D'', E AS cat = ''E'' );'); +DROP VIEW rpr_v; --- Test 4.2: Variable length matches - min/max/avg differ +-- Variable length matches - min/max/avg differ +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -290,8 +617,19 @@ WINDOW w AS ( PATTERN (A+ B) DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 );'); +DROP VIEW rpr_v; --- Test 4.3: Very long matches +-- Very long matches +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 200) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v <= 195, B AS v > 195 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -302,8 +640,21 @@ WINDOW w AS ( PATTERN (A+ B) DEFINE A AS v <= 195, B AS v > 195 );'); +DROP VIEW rpr_v; --- Test 4.4: Mix of short and long matches +-- Mix of short and long matches +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS (v % 20 <> 0) AND (v % 20 <= 10 OR v % 20 > 15), + B AS v % 20 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -316,13 +667,31 @@ WINDOW w AS ( A AS (v % 20 <> 0) AND (v % 20 <= 10 OR v % 20 > 15), B AS v % 20 = 0 );'); +DROP VIEW rpr_v; --- --- Section 5: Mismatch Length Statistics Tests --- +-- ============================================================ +-- Mismatch Length Statistics Tests +-- ============================================================ --- Test 5.1: Pattern that causes mismatches with length > 1 +-- Pattern that causes mismatches with length > 1 -- Mismatch happens when partial match fails after processing multiple rows +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM ( + SELECT v, + CASE WHEN v % 10 IN (1,2,3) THEN 'A' + WHEN v % 10 IN (4,5) THEN 'B' + WHEN v % 10 = 6 THEN 'C' + ELSE 'X' END AS cat + FROM generate_series(1, 100) AS s(v) +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+ C) + DEFINE A AS cat = 'A', B AS cat = 'B', C AS cat = 'C' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -340,8 +709,31 @@ WINDOW w AS ( PATTERN (A+ B+ C) DEFINE A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'' );'); +DROP VIEW rpr_v; --- Test 5.2: Long partial matches that fail +-- Long partial matches that fail +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM ( + SELECT i AS v, + CASE + WHEN i <= 20 THEN 'A' + WHEN i <= 25 THEN 'B' + WHEN i = 26 THEN 'X' -- breaks the pattern + WHEN i <= 50 THEN 'A' + WHEN i <= 55 THEN 'B' + WHEN i = 56 THEN 'C' -- completes pattern + ELSE 'Y' + END AS cat + FROM generate_series(1, 60) i +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+ C) + DEFINE A AS cat = 'A', B AS cat = 'B', C AS cat = 'C' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -364,14 +756,14 @@ WINDOW w AS ( PATTERN (A+ B+ C) DEFINE A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'' );'); +DROP VIEW rpr_v; --- --- Section 6: JSON Format Tests --- +-- ============================================================ +-- JSON Format Tests +-- ============================================================ --- Test 6.1: JSON format output with all statistics -SELECT rpr_explain_filter(' -EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT JSON) +-- JSON format output with all statistics +CREATE TEMP VIEW rpr_v AS SELECT count(*) OVER w FROM generate_series(1, 50) AS s(v) WINDOW w AS ( @@ -379,9 +771,31 @@ WINDOW w AS ( AFTER MATCH SKIP PAST LAST ROW PATTERN (A+ B+) DEFINE A AS v % 3 = 1, B AS v % 3 = 2 -)'); - --- Test 6.2: JSON format with match length statistics +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT JSON) +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2 +)'); +DROP VIEW rpr_v; + +-- JSON format with match length statistics +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT JSON) SELECT count(*) OVER w @@ -392,12 +806,23 @@ WINDOW w AS ( PATTERN (A+ B) DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 )'); +DROP VIEW rpr_v; --- --- Section 7: XML Format Tests --- +-- ============================================================ +-- XML Format Tests +-- ============================================================ --- Test 7.1: XML format output +-- XML format output +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT XML) SELECT count(*) OVER w @@ -408,12 +833,76 @@ WINDOW w AS ( PATTERN (A B) DEFINE A AS v % 2 = 1, B AS v % 2 = 0 )'); +DROP VIEW rpr_v; --- --- Section 8: Multiple Partitions Tests --- +-- JSON format with mismatch statistics +-- Pattern A B C expects 1,2,3 but gets 1,2,4 twice causing mismatches +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM (VALUES (1),(2),(4), (1),(2),(4), (1),(2),(3)) AS t(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v = 1, B AS v = 2, C AS v = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT JSON) +SELECT count(*) OVER w +FROM (VALUES (1),(2),(4), (1),(2),(4), (1),(2),(3)) AS t(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v = 1, B AS v = 2, C AS v = 3 +)'); +DROP VIEW rpr_v; + +-- JSON format with skipped context statistics +-- Alternation pattern with SKIP PAST LAST ROW causes many contexts to be skipped +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B)) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF, FORMAT JSON) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B) (A | B)) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +)'); +DROP VIEW rpr_v; --- Test 8.1: Statistics across multiple partitions +-- ============================================================ +-- Multiple Partitions Tests +-- ============================================================ + +-- Statistics across multiple partitions +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM ( + SELECT p, v + FROM generate_series(1, 3) p, + generate_series(1, 30) v +) t +WINDOW w AS ( + PARTITION BY p + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -429,8 +918,25 @@ WINDOW w AS ( PATTERN (A+ B) DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 );'); +DROP VIEW rpr_v; --- Test 8.2: Different pattern behavior per partition +-- Different pattern behavior per partition +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM ( + SELECT + CASE WHEN v <= 25 THEN 1 ELSE 2 END AS p, + v % 10 AS val + FROM generate_series(1, 50) v +) t +WINDOW w AS ( + PARTITION BY p + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS val < 5, B AS val >= 5 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -447,12 +953,23 @@ WINDOW w AS ( PATTERN (A+ B) DEFINE A AS val < 5, B AS val >= 5 );'); +DROP VIEW rpr_v; --- --- Section 9: Edge Cases --- +-- ============================================================ +-- Edge Cases +-- ============================================================ --- Test 9.1: Empty result set +-- Empty result set +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 0) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v = 1, B AS v = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -463,8 +980,19 @@ WINDOW w AS ( PATTERN (A B) DEFINE A AS v = 1, B AS v = 2 );'); +DROP VIEW rpr_v; --- Test 9.2: Single row +-- Single row +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 1) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A) + DEFINE A AS TRUE +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -475,8 +1003,21 @@ WINDOW w AS ( PATTERN (A) DEFINE A AS TRUE );'); +DROP VIEW rpr_v; --- Test 9.3: Pattern longer than data +-- Pattern longer than data +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 5) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E F G H I J) + DEFINE + A AS v = 1, B AS v = 2, C AS v = 3, D AS v = 4, E AS v = 5, + F AS v = 6, G AS v = 7, H AS v = 8, I AS v = 9, J AS v = 10 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -489,8 +1030,19 @@ WINDOW w AS ( A AS v = 1, B AS v = 2, C AS v = 3, D AS v = 4, E AS v = 5, F AS v = 6, G AS v = 7, H AS v = 8, I AS v = 9, J AS v = 10 );'); +DROP VIEW rpr_v; --- Test 9.4: All rows match as single match +-- All rows match as single match +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS TRUE +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -501,12 +1053,23 @@ WINDOW w AS ( PATTERN (A+) DEFINE A AS TRUE );'); +DROP VIEW rpr_v; --- --- Section 10: Complex Pattern Tests --- +-- ============================================================ +-- Complex Pattern Tests +-- ============================================================ --- Test 10.1: Nested groups +-- Nested groups +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A B) C)+) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -517,8 +1080,21 @@ WINDOW w AS ( PATTERN (((A B) C)+) DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 );'); +DROP VIEW rpr_v; --- Test 10.2: Multiple alternations +-- Multiple alternations +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) (C | D | E)) + DEFINE + A AS cat = 'A', B AS cat = 'B', C AS cat = 'C', + D AS cat = 'D', E AS cat = 'E' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -531,8 +1107,19 @@ WINDOW w AS ( A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'', D AS cat = ''D'', E AS cat = ''E'' );'); +DROP VIEW rpr_v; --- Test 10.3: Optional elements +-- Optional elements +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B? C) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -543,8 +1130,19 @@ WINDOW w AS ( PATTERN (A B? C) DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3 );'); +DROP VIEW rpr_v; --- Test 10.4: Bounded quantifiers +-- Bounded quantifiers +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,5} B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -555,8 +1153,19 @@ WINDOW w AS ( PATTERN (A{2,5} B) DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 );'); +DROP VIEW rpr_v; --- Test 10.5: Star quantifier +-- Star quantifier +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B* C) + DEFINE A AS v % 10 = 1, B AS v % 10 IN (2,3,4,5,6,7,8), C AS v % 10 = 9 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -567,12 +1176,23 @@ WINDOW w AS ( PATTERN (A B* C) DEFINE A AS v % 10 = 1, B AS v % 10 IN (2,3,4,5,6,7,8), C AS v % 10 = 9 );'); +DROP VIEW rpr_v; --- --- Section 11: Real-world Pattern Examples --- +-- ============================================================ +-- Real-world Pattern Examples +-- ============================================================ --- Test 11.1: Stock price pattern - V-shape (down then up) +-- Stock price pattern - V-shape (down then up) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_complex +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (D+ U+) + DEFINE D AS trend = 'D', U AS trend = 'U' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -583,8 +1203,19 @@ WINDOW w AS ( PATTERN (D+ U+) DEFINE D AS trend = ''D'', U AS trend = ''U'' );'); +DROP VIEW rpr_v; --- Test 11.2: Stock price pattern - peak (up, stable, down) +-- Stock price pattern - peak (up, stable, down) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_complex +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (U+ S* D+) + DEFINE U AS trend = 'U', S AS trend = 'S', D AS trend = 'D' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -595,8 +1226,19 @@ WINDOW w AS ( PATTERN (U+ S* D+) DEFINE U AS trend = ''U'', S AS trend = ''S'', D AS trend = ''D'' );'); +DROP VIEW rpr_v; --- Test 11.3: Consecutive increasing values (using PREV) +-- Consecutive increasing values (using PREV) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3,}) + DEFINE A AS v > PREV(v) OR PREV(v) IS NULL +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -607,12 +1249,23 @@ WINDOW w AS ( PATTERN (A{3,}) DEFINE A AS v > PREV(v) OR PREV(v) IS NULL );'); +DROP VIEW rpr_v; --- --- Section 12: Performance-oriented Tests --- +-- ============================================================ +-- Performance-oriented Tests +-- ============================================================ --- Test 12.1: Large dataset with simple pattern +-- Large dataset with simple pattern +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -623,8 +1276,19 @@ WINDOW w AS ( PATTERN (A B) DEFINE A AS v % 2 = 1, B AS v % 2 = 0 );'); +DROP VIEW rpr_v; --- Test 12.2: Large dataset with absorption +-- Large dataset with absorption +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 1000) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 100 <> 0, B AS v % 100 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -635,8 +1299,19 @@ WINDOW w AS ( PATTERN (A+ B) DEFINE A AS v % 100 <> 0, B AS v % 100 = 0 );'); +DROP VIEW rpr_v; --- Test 12.3: High state merge ratio +-- High state merge ratio +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -647,12 +1322,24 @@ WINDOW w AS ( PATTERN ((A | B)+ C) DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 );'); +DROP VIEW rpr_v; --- --- Section 13: INITIAL vs no INITIAL comparison --- +-- ============================================================ +-- INITIAL vs no INITIAL comparison +-- ============================================================ --- Test 13.1: With INITIAL keyword +-- With INITIAL keyword +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -664,8 +1351,19 @@ WINDOW w AS ( PATTERN (A+ B) DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 );'); +DROP VIEW rpr_v; --- Test 13.2: Without INITIAL keyword (same behavior currently) +-- Without INITIAL keyword (same behavior currently) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -676,12 +1374,23 @@ WINDOW w AS ( PATTERN (A+ B) DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 );'); +DROP VIEW rpr_v; --- --- Section 14: Quantifier Variations --- +-- ============================================================ +-- Quantifier Variations +-- ============================================================ --- Test 14.1: Plus quantifier +-- Plus quantifier +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE A AS v % 4 <> 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -692,10 +1401,10 @@ WINDOW w AS ( PATTERN (A+) DEFINE A AS v % 4 <> 0 );'); +DROP VIEW rpr_v; --- Test 14.2: Star quantifier (zero or more) -SELECT rpr_explain_filter(' -EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +-- Star quantifier (zero or more) +CREATE TEMP VIEW rpr_v AS SELECT count(*) OVER w FROM generate_series(1, 40) AS s(v) WINDOW w AS ( @@ -703,9 +1412,8 @@ WINDOW w AS ( AFTER MATCH SKIP PAST LAST ROW PATTERN (A* B) DEFINE A AS v % 4 IN (1, 2), B AS v % 4 = 3 -);'); - --- Test 14.3: Question mark (zero or one) +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -713,11 +1421,45 @@ FROM generate_series(1, 40) AS s(v) WINDOW w AS ( ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING AFTER MATCH SKIP PAST LAST ROW - PATTERN (A? B C) - DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3 + PATTERN (A* B) + DEFINE A AS v % 4 IN (1, 2), B AS v % 4 = 3 );'); +DROP VIEW rpr_v; --- Test 14.4: Exact count {n} +-- Question mark (zero or one) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A? B C) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A? B C) + DEFINE A AS v % 4 = 1, B AS v % 4 = 2, C AS v % 4 = 3 +);'); +DROP VIEW rpr_v; + +-- Exact count {n} +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -728,8 +1470,19 @@ WINDOW w AS ( PATTERN (A{3} B) DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 );'); +DROP VIEW rpr_v; --- Test 14.5: Range {n,m} +-- Range {n,m} +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{2,4} B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -740,8 +1493,19 @@ WINDOW w AS ( PATTERN (A{2,4} B) DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 );'); +DROP VIEW rpr_v; --- Test 14.6: At least {n,} +-- At least {n,} +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A{3,} B) + DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -752,13 +1516,24 @@ WINDOW w AS ( PATTERN (A{3,} B) DEFINE A AS v % 10 <> 0, B AS v % 10 = 0 );'); +DROP VIEW rpr_v; --- --- Section 15: Regression Tests for Statistics Accuracy --- +-- ============================================================ +-- Regression Tests for Statistics Accuracy +-- ============================================================ --- Test 15.1: Verify state count accuracy +-- Verify state count accuracy -- Pattern A+ B with 20 rows should show predictable state behavior +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -769,8 +1544,19 @@ WINDOW w AS ( PATTERN (A+ B) DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 );'); +DROP VIEW rpr_v; --- Test 15.2: Verify context count with known absorption +-- Verify context count with known absorption +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B C) + DEFINE A AS v % 10 IN (1,2,3,4,5,6,7), B AS v % 10 = 8, C AS v % 10 = 9 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -781,8 +1567,19 @@ WINDOW w AS ( PATTERN (A+ B C) DEFINE A AS v % 10 IN (1,2,3,4,5,6,7), B AS v % 10 = 8, C AS v % 10 = 9 );'); +DROP VIEW rpr_v; --- Test 15.3: Verify match length with fixed-length pattern +-- Verify match length with fixed-length pattern +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -793,12 +1590,23 @@ WINDOW w AS ( PATTERN (A B C) DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 );'); +DROP VIEW rpr_v; --- --- Section 16: Alternation Pattern Tests --- +-- ============================================================ +-- Alternation Pattern Tests +-- ============================================================ --- Test 16.1: Simple alternation +-- Simple alternation +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B) C) + DEFINE A AS cat = 'A', B AS cat = 'B', C AS cat = 'C' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -809,8 +1617,21 @@ WINDOW w AS ( PATTERN ((A | B) C) DEFINE A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'' );'); +DROP VIEW rpr_v; --- Test 16.2: Multiple items in alternation +-- Multiple items in alternation +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM nfa_test +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B | C | D) E) + DEFINE + A AS cat = 'A', B AS cat = 'B', C AS cat = 'C', + D AS cat = 'D', E AS cat = 'E' +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -823,8 +1644,19 @@ WINDOW w AS ( A AS cat = ''A'', B AS cat = ''B'', C AS cat = ''C'', D AS cat = ''D'', E AS cat = ''E'' );'); +DROP VIEW rpr_v; --- Test 16.3: Alternation with quantifiers +-- Alternation with quantifiers +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A | B)+ C) + DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -835,12 +1667,172 @@ WINDOW w AS ( PATTERN ((A | B)+ C) DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 );'); +DROP VIEW rpr_v; --- --- Section 17: Group Pattern Tests --- +-- Multiple alternatives (4+) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A | B | C | D | E) + DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A | B | C | D | E) + DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 +);'); +DROP VIEW rpr_v; + +-- Alternation at start +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) C D) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) C D) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +);'); +DROP VIEW rpr_v; + +-- Multiple sequential alternations +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) C (D | E) F) + DEFINE A AS v % 6 = 0, B AS v % 6 = 1, C AS v % 6 = 2, D AS v % 6 = 3, E AS v % 6 = 4, F AS v % 6 = 5 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 100) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B) C (D | E) F) + DEFINE A AS v % 6 = 0, B AS v % 6 = 1, C AS v % 6 = 2, D AS v % 6 = 3, E AS v % 6 = 4, F AS v % 6 = 5 +);'); +DROP VIEW rpr_v; + +-- Quantified alternatives +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A+ | B+) C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A+ | B+) C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); +DROP VIEW rpr_v; + +-- Alternation at end +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (C | D)) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (C | D)) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +);'); +DROP VIEW rpr_v; + +-- Nested ALT at start of branch inside outer ALT +-- Pattern: (A ((B | C) D | E)) - preceding VAR + inner ALT as first branch element +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B | C) D | E)) + DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A ((B | C) D | E)) + DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 +);'); +DROP VIEW rpr_v; + +-- Nested ALT at end of branch inside outer ALT +-- Pattern: (C (A | B) | D) - inner ALT is last element in outer branch +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (C (A | B) | D) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 20) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (C (A | B) | D) + DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 +);'); +DROP VIEW rpr_v; --- Test 17.1: Simple group +-- ============================================================ +-- Group Pattern Tests +-- ============================================================ + +-- Simple group +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -851,8 +1843,19 @@ WINDOW w AS ( PATTERN ((A B)+) DEFINE A AS v % 2 = 1, B AS v % 2 = 0 );'); +DROP VIEW rpr_v; --- Test 17.2: Group with bounded quantifier +-- Group with bounded quantifier +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B){2,4}) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -863,8 +1866,19 @@ WINDOW w AS ( PATTERN ((A B){2,4}) DEFINE A AS v % 2 = 1, B AS v % 2 = 0 );'); +DROP VIEW rpr_v; --- Test 17.3: Nested groups +-- Nested groups +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A B){2})+) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -875,12 +1889,107 @@ WINDOW w AS ( PATTERN (((A B){2})+) DEFINE A AS v % 2 = 1, B AS v % 2 = 0 );'); +DROP VIEW rpr_v; --- --- Section 18: Window Function Combinations --- +-- Deep nesting (3+ levels) +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((((A | B)+)+)+) + DEFINE A AS v % 2 = 0, B AS v % 2 = 1 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 40) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((((A | B)+)+)+) + DEFINE A AS v % 2 = 0, B AS v % 2 = 1 +);'); +DROP VIEW rpr_v; --- Test 18.1: count(*) with pattern +-- Bounded quantifier on alternation +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B){2,3} C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | B){2,3} C) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); +DROP VIEW rpr_v; + +-- Nested groups with quantifiers +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B)+ C)*) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (((A B)+ C)*) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); +DROP VIEW rpr_v; + +-- Partial nested quantification +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A (B C)+)*) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (COSTS OFF) +SELECT count(*) OVER w +FROM generate_series(1, 60) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A (B C)+)*) + DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 +);'); +DROP VIEW rpr_v; + +-- ============================================================ +-- Window Function Combinations +-- ============================================================ + +-- count(*) with pattern +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -891,8 +2000,19 @@ WINDOW w AS ( PATTERN (A+ B) DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 );'); +DROP VIEW rpr_v; --- Test 18.2: first_value with pattern +-- first_value with pattern +CREATE TEMP VIEW rpr_v AS +SELECT first_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT first_value(v) OVER w @@ -903,8 +2023,19 @@ WINDOW w AS ( PATTERN (A+ B) DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 );'); +DROP VIEW rpr_v; --- Test 18.3: last_value with pattern +-- last_value with pattern +CREATE TEMP VIEW rpr_v AS +SELECT last_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT last_value(v) OVER w @@ -915,8 +2046,22 @@ WINDOW w AS ( PATTERN (A+ B) DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 );'); +DROP VIEW rpr_v; --- Test 18.4: Multiple window functions +-- Multiple window functions +CREATE TEMP VIEW rpr_v AS +SELECT + count(*) OVER w, + first_value(v) OVER w, + last_value(v) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT @@ -930,12 +2075,25 @@ WINDOW w AS ( PATTERN (A+ B) DEFINE A AS v % 5 <> 0, B AS v % 5 = 0 );'); +DROP VIEW rpr_v; --- --- Section 19: DEFINE Expression Variations --- +-- ============================================================ +-- DEFINE Expression Variations +-- ============================================================ --- Test 19.1: Complex boolean expressions +-- Complex boolean expressions +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 50) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS (v % 5 <> 0) AND (v % 3 <> 0), + B AS (v % 5 = 0) OR (v % 3 = 0) +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -948,8 +2106,22 @@ WINDOW w AS ( A AS (v % 5 <> 0) AND (v % 3 <> 0), B AS (v % 5 = 0) OR (v % 3 = 0) );'); +DROP VIEW rpr_v; --- Test 19.2: Using PREV function +-- Using PREV function +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 30) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (S U+ D+) + DEFINE + S AS TRUE, + U AS v > PREV(v), + D AS v < PREV(v) +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -963,8 +2135,22 @@ WINDOW w AS ( U AS v > PREV(v), D AS v < PREV(v) );'); +DROP VIEW rpr_v; --- Test 19.3: Using NULL comparisons +-- Using NULL comparisons +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM ( + SELECT CASE WHEN v % 5 = 0 THEN NULL ELSE v END AS v + FROM generate_series(1, 30) v +) t +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE A AS v IS NOT NULL, B AS v IS NULL +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -978,12 +2164,23 @@ WINDOW w AS ( PATTERN (A+ B) DEFINE A AS v IS NOT NULL, B AS v IS NULL );'); +DROP VIEW rpr_v; --- --- Section 20: Large Scale Statistics Verification --- +-- ============================================================ +-- Large Scale Statistics Verification +-- ============================================================ --- Test 20.1: 500 rows - verify statistics scale correctly +-- 500 rows - verify statistics scale correctly +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B C) + DEFINE A AS v % 10 < 7, B AS v % 10 = 7, C AS v % 10 = 8 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -994,8 +2191,19 @@ WINDOW w AS ( PATTERN (A+ B C) DEFINE A AS v % 10 < 7, B AS v % 10 = 7, C AS v % 10 = 8 );'); +DROP VIEW rpr_v; --- Test 20.2: High match count scenario +-- High match count scenario +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE A AS v % 2 = 1, B AS v % 2 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1006,8 +2214,24 @@ WINDOW w AS ( PATTERN (A B) DEFINE A AS v % 2 = 1, B AS v % 2 = 0 );'); +DROP VIEW rpr_v; --- Test 20.3: High skip count scenario +-- High skip count scenario +CREATE TEMP VIEW rpr_v AS +SELECT count(*) OVER w +FROM generate_series(1, 500) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS v % 100 = 1, + B AS v % 100 = 2, + C AS v % 100 = 3, + D AS v % 100 = 4, + E AS v % 100 = 5 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w @@ -1023,6 +2247,7 @@ WINDOW w AS ( D AS v % 100 = 4, E AS v % 100 = 5 );'); +DROP VIEW rpr_v; -- Cleanup DROP TABLE nfa_test; -- 2.50.1 (Apple Git-155)