From 87986f98416ac0d145d654c65c5c8530ada78f3f Mon Sep 17 00:00:00 2001 From: Henson Choi Date: Sat, 4 Apr 2026 21:47:01 +0900 Subject: [PATCH] Add fixed-length group absorption for RPR Extend context absorption to unbounded groups with fixed-length children (min == max, recursively). Patterns like (A B{2})+ or ((A (B C){2}){2})+ are now absorbable, equivalent to unrolling to {1,1} VARs at compile time without actually unrolling. isFixedLengthChildren() recursively verifies min == max for all children including nested subgroups, extending the existing Case 2 in isUnboundedStart(). Absorption comparison in nfa_states_covered requires states to be at an ABSORBABLE judgment point, where count-dominance is guaranteed. The inline advance in nfa_match is generalized to advance bounded VARs within the absorbable region through END chains to reach the judgment point. Fix isAbsorbable propagation in nfa_advance_var and nfa_advance_end exit paths, where reusing a state object skipped recomputation. Mark VAR elements in the DFS visited bitmap at nfa_add_state_unique instead of at nfa_advance_state entry, so that loop-back through ALT to the same VAR is not incorrectly blocked by cycle detection. --- src/backend/executor/execRPR.c | 110 ++++++-- src/backend/optimizer/plan/rpr.c | 136 +++++++-- src/test/regress/expected/rpr_base.out | 83 +++++- src/test/regress/expected/rpr_explain.out | 206 +++++++++++++- src/test/regress/expected/rpr_nfa.out | 321 ++++++++++++++++++++++ src/test/regress/sql/rpr_base.sql | 36 +++ src/test/regress/sql/rpr_explain.sql | 114 ++++++++ src/test/regress/sql/rpr_nfa.sql | 168 +++++++++++ 8 files changed, 1111 insertions(+), 63 deletions(-) diff --git a/src/backend/executor/execRPR.c b/src/backend/executor/execRPR.c index 1cbe8e14780..8f0457e2b3c 100644 --- a/src/backend/executor/execRPR.c +++ b/src/backend/executor/execRPR.c @@ -1760,6 +1760,10 @@ nfa_add_state_unique(WindowAggState *winstate, RPRNFAContext *ctx, RPRNFAState * RPRNFAState *s; RPRNFAState *tail = NULL; + /* Mark VAR in visited before duplicate check to prevent DFS loops */ + winstate->nfaVisitedElems[WORDNUM(state->elemIdx)] |= + ((bitmapword) 1 << BITNUM(state->elemIdx)); + /* Check for duplicate and find tail */ for (s = ctx->states; s != NULL; s = s->next) { @@ -2033,6 +2037,14 @@ nfa_states_covered(RPRPattern *pattern, RPRNFAContext *older, RPRNFAContext *new elem = &pattern->elements[newerState->elemIdx]; depth = elem->depth; + /* + * Only compare at absorption judgment points (RPR_ELEM_ABSORBABLE). + * Judgment points are where count-dominance guarantees the newer + * context's future matches are a subset of the older's. + */ + if (!RPRElemIsAbsorbable(elem)) + return false; + for (olderState = older->states; olderState != NULL; olderState = olderState->next) { CHECK_FOR_INTERRUPTS(); @@ -2175,9 +2187,10 @@ nfa_eval_var_match(WindowAggState *winstate, RPRPatternElement *elem, * - not matched: remove state (exit alternatives already exist from * previous advance when count >= min was satisfied) * - * For simple VARs (min=max=1) followed by END: - * - Advance to END and update group count before absorb phase - * - This ensures absorption can compare states by group completion + * For VARs that reached max count followed by END: + * - Advance through END chain to reach absorption judgment point + * - Only deterministic exits (count >= max, max != INF) are handled + * - Chains through END elements while count >= max (must-exit path) * * Non-VAR elements (ALT, END, FIN) are kept as-is for advance phase. */ @@ -2191,9 +2204,9 @@ nfa_match(WindowAggState *winstate, RPRNFAContext *ctx, bool *varMatched) RPRNFAState *nextState; /* - * Evaluate VAR elements against current row. For simple VARs with END - * next, advance to END and update group count inline so absorb phase can - * compare states properly. + * Evaluate VAR elements against current row. For VARs that reach max + * count with END next, advance through END chain inline so absorb phase + * can compare states at judgment points. */ for (state = ctx->states; state != NULL; state = nextState) { @@ -2223,34 +2236,61 @@ nfa_match(WindowAggState *winstate, RPRNFAContext *ctx, bool *varMatched) state->counts[depth] = count; /* - * For simple VAR (min=max=1) with END next, advance to END - * and update group count inline. This keeps state in place, - * preserving lexical order. + * For VAR at max count with END next, advance through END + * chain to reach the absorption judgment point. Only + * deterministic exits (count >= max, max finite) are handled; + * unbounded VARs stay for advance phase. */ - if (elem->min == 1 && elem->max == 1 && + if (RPRElemIsAbsorbableBranch(elem) && + !RPRElemIsAbsorbable(elem) && + count >= elem->max && RPRElemIsEnd(&elements[elem->next])) { RPRPatternElement *endElem = &elements[elem->next]; int endDepth = endElem->depth; int32 endCount = state->counts[endDepth]; - Assert(count == 1); - - /* Increment group count with overflow protection */ + /* Increment group count */ if (endCount < RPR_COUNT_MAX) endCount++; - - /* - * END's max can never be exceeded here because - * nfa_advance_end only loops when count < max, so - * endCount entering inline advance is at most max-1, and - * incrementing yields at most max. - */ Assert(endElem->max == RPR_QUANTITY_INF || endCount <= endElem->max); state->elemIdx = elem->next; state->counts[endDepth] = endCount; + + /* + * Chain through END elements within the absorbable region + * (ABSORBABLE_BRANCH) until reaching the judgment point + * (ABSORBABLE). Continue only on must-exit path (count + * >= max) with END next. + */ + while (RPRElemIsAbsorbableBranch(endElem) && + !RPRElemIsAbsorbable(endElem) && + endCount >= endElem->max && + RPRElemIsEnd(&elements[endElem->next])) + { + RPRPatternElement *outerEnd = &elements[endElem->next]; + int outerDepth = outerEnd->depth; + int32 outerCount = state->counts[outerDepth]; + + /* Reset exited group's count */ + state->counts[endDepth] = 0; + + /* Increment outer group count */ + if (outerCount < RPR_COUNT_MAX) + outerCount++; + Assert(outerEnd->max == RPR_QUANTITY_INF || + outerCount <= outerEnd->max); + + state->elemIdx = endElem->next; + state->counts[outerDepth] = outerCount; + + /* Advance to next END in chain */ + endElem = outerEnd; + endDepth = outerDepth; + endCount = outerCount; + } } /* else: stay at VAR for advance phase */ } @@ -2468,6 +2508,10 @@ nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, state->elemIdx = elem->next; nextElem = &elements[state->elemIdx]; + /* Update isAbsorbable for target element (monotonic) */ + state->isAbsorbable = state->isAbsorbable && + RPRElemIsAbsorbableBranch(nextElem); + /* END->END: increment outer END's count */ if (RPRElemIsEnd(nextElem) && state->counts[nextElem->depth] < RPR_COUNT_MAX) state->counts[nextElem->depth]++; @@ -2621,6 +2665,13 @@ nfa_advance_var(WindowAggState *winstate, RPRNFAContext *ctx, state->elemIdx = elem->next; nextElem = &elements[state->elemIdx]; + /* + * Update isAbsorbable for target element (monotonic: AND + * preserves false) + */ + state->isAbsorbable = state->isAbsorbable && + RPRElemIsAbsorbableBranch(nextElem); + /* * When exiting directly to an outer END, increment its iteration * count. Simple VARs (min=max=1) handle this via inline advance @@ -2650,6 +2701,13 @@ nfa_advance_var(WindowAggState *winstate, RPRNFAContext *ctx, state->elemIdx = elem->next; nextElem = &elements[state->elemIdx]; + /* + * Update isAbsorbable for target element (monotonic: AND preserves + * false) + */ + state->isAbsorbable = state->isAbsorbable && + RPRElemIsAbsorbableBranch(nextElem); + /* See comment above: increment outer END count for quantified VARs */ if (RPRElemIsEnd(nextElem)) { @@ -2686,11 +2744,19 @@ nfa_advance_state(WindowAggState *winstate, RPRNFAContext *ctx, nfa_state_free(winstate, state); return; } - winstate->nfaVisitedElems[WORDNUM(state->elemIdx)] |= - ((bitmapword) 1 << BITNUM(state->elemIdx)); elem = &pattern->elements[state->elemIdx]; + /* + * Mark epsilon elements (END, ALT, BEGIN, FIN) in visited to prevent + * infinite epsilon cycles. VAR elements are marked later when added to + * the state list (nfa_add_state_unique), allowing legitimate loop-back to + * the same VAR in a new iteration. + */ + if (!RPRElemIsVar(elem)) + winstate->nfaVisitedElems[WORDNUM(state->elemIdx)] |= + ((bitmapword) 1 << BITNUM(state->elemIdx)); + switch (elem->varId) { case RPR_VARID_FIN: diff --git a/src/backend/optimizer/plan/rpr.c b/src/backend/optimizer/plan/rpr.c index 2728a0b9fca..c0e9d134aa9 100644 --- a/src/backend/optimizer/plan/rpr.c +++ b/src/backend/optimizer/plan/rpr.c @@ -92,6 +92,8 @@ static bool fillRPRPattern(RPRPatternNode *node, RPRPattern *pat, static void finalizeRPRPattern(RPRPattern *result); /* Forward declarations - context absorption */ +static bool isFixedLengthChildren(RPRPattern *pattern, RPRElemIdx idx, + RPRDepth scopeDepth); static bool isUnboundedStart(RPRPattern *pattern, RPRElemIdx idx); static void computeAbsorbabilityRecursive(RPRPattern *pattern, RPRElemIdx startIdx, @@ -1524,6 +1526,70 @@ finalizeRPRPattern(RPRPattern *result) *------------------------------------------------------------------------- */ +/* + * isFixedLengthChildren + * Check if all children at scopeDepth have fixed-length quantifiers + * (min == max), recursively for nested subgroups. + * + * A fixed-length group is semantically equivalent to unrolling each child + * to {1,1} copies, which is the existing Case 2 already proven correct + * for absorption. This check recognizes fixed-length groups at compile + * time without actually unrolling them. + * + * Traverses the flat element array starting at idx. For VAR elements, + * checks min == max. For BEGIN elements (nested subgroups), recurses + * into the subgroup and also checks the subgroup's END quantifier. + * ALT elements are rejected (alternation inside absorbable group is + * not supported). + * + * Returns true if all children are fixed-length, stopping at the END + * element at scopeDepth - 1. + */ +static bool +isFixedLengthChildren(RPRPattern *pattern, RPRElemIdx idx, RPRDepth scopeDepth) +{ + RPRPatternElement *e = &pattern->elements[idx]; + + check_stack_depth(); + + while (e->depth == scopeDepth) + { + if (RPRElemIsVar(e)) + { + if (e->min != e->max) + return false; + } + else if (RPRElemIsBegin(e)) + { + RPRElemIdx childIdx = e->next; + + /* Recurse into subgroup children at scopeDepth + 1 */ + if (!isFixedLengthChildren(pattern, childIdx, scopeDepth + 1)) + return false; + + /* Advance past the subgroup to its END element */ + e = &pattern->elements[e->next]; + while (e->depth > scopeDepth) + e = &pattern->elements[e->next]; + + /* e is now the END at scopeDepth; check its quantifier */ + Assert(RPRElemIsEnd(e) && e->depth == scopeDepth); + if (e->min != e->max) + return false; + } + else + { + /* ALT inside group: not supported for absorption */ + return false; + } + + Assert(e->next != RPR_ELEMIDX_INVALID); + e = &pattern->elements[e->next]; + } + + return true; +} + /* * isUnboundedStart * Check if the element at idx starts an unbounded greedy sequence. @@ -1533,29 +1599,31 @@ finalizeRPRPattern(RPRPattern *result) * - Greedy (not reluctant) * - At the start of current scope * - * Algorithm: - * - Traverse elements within current scope (parentDepth to startDepth) - * - For GROUP: must be unbounded greedy AND contain only simple {1,1} VARs - * - Sets ABSORBABLE and ABSORBABLE_BRANCH flags on matching elements - * * Two cases are handled: * 1. Simple VAR: A+ B C - A has max=INF, gets both flags - * 2. Group: (A B)+ C - END has max=INF, all children are {1,1} VARs - * A,B,END get ABSORBABLE_BRANCH, only END gets ABSORBABLE + * 2. Unbounded GROUP with fixed-length children: (A B{2})+ C + * All children must have min == max (recursively for nested subgroups). + * This is equivalent to unrolling to {1,1} VARs, e.g., (A B B)+ C. + * All elements within the group get ABSORBABLE_BRANCH. + * Only the unbounded END gets ABSORBABLE (judgment point). + * Examples: + * (A B{2})+ C - B{2} has min==max, step=3 + * (A (B C){2} D)+ E - nested {2} subgroup, step=6 + * ((A (B C){2}){2})+ - doubly nested {2}, step=10 + * (A ((B C{3}){2} D){2} E)+ F - deep nesting, step=20 * * Returns false for patterns where absorption cannot work: * - A B+ (unbounded not at start) * - A+? B (reluctant quantifier) * - (A | B)+ (ALT inside group) - * - (A B+)+ (unbounded element inside group) - * - ((A B)+ C)+ (nested unbounded groups) + * - (A B+)+ (variable-length element inside group) + * - (A B{2,5})+ (min != max inside group) */ static bool isUnboundedStart(RPRPattern *pattern, RPRElemIdx idx) { RPRPatternElement *elem = &pattern->elements[idx]; RPRDepth startDepth = elem->depth; - RPRPatternElement *nextElem; RPRPatternElement *e; /* Case 1: Simple unbounded VAR at start (greedy only) */ @@ -1568,21 +1636,19 @@ isUnboundedStart(RPRPattern *pattern, RPRElemIdx idx) } /* - * Case 2: Unbounded GROUP - traverse siblings at startDepth and check if - * they're all simple {1,1} VARs, then check if END at startDepth - 1 is - * unbounded greedy. + * Case 2: Unbounded GROUP with fixed-length children. Each child must + * have min == max (recursively for nested subgroups), ensuring a fixed + * step size per iteration so that count-dominance holds. */ - for (e = elem; e->depth == startDepth; e = nextElem) - { - /* Must be simple {1,1} VAR */ - if (!RPRElemIsVar(e) || e->min != 1 || e->max != 1) - return false; + if (!isFixedLengthChildren(pattern, idx, startDepth)) + return false; - Assert(e->next != RPR_ELEMIDX_INVALID); - nextElem = &pattern->elements[e->next]; - } + /* Find the END element at startDepth - 1 */ + e = &pattern->elements[idx]; + while (e->depth >= startDepth) + e = &pattern->elements[e->next]; - /* Now e should be END at startDepth - 1 */ + /* END must be unbounded greedy */ if (e->depth == startDepth - 1 && RPRElemIsEnd(e) && e->max == RPR_QUANTITY_INF && !RPRElemIsReluctant(e)) @@ -1590,7 +1656,8 @@ isUnboundedStart(RPRPattern *pattern, RPRElemIdx idx) Assert(e->jump == idx); /* END points back to first child */ /* Set ABSORBABLE_BRANCH on all children, ABSORBABLE on END only */ - for (e = elem; !RPRElemIsEnd(e); e = &pattern->elements[e->next]) + for (e = elem; !RPRElemIsEnd(e) || e->depth >= startDepth; + e = &pattern->elements[e->next]) e->flags |= RPR_ELEM_ABSORBABLE_BRANCH; e->flags |= RPR_ELEM_ABSORBABLE_BRANCH | RPR_ELEM_ABSORBABLE; return true; @@ -1654,12 +1721,25 @@ computeAbsorbabilityRecursive(RPRPattern *pattern, RPRElemIdx startIdx, } else if (RPRElemIsBegin(elem)) { - /* BEGIN: skip to first child and check that */ - computeAbsorbabilityRecursive(pattern, elem->next, hasAbsorbable); - - /* Mark BEGIN element if contents are absorbable */ - if (*hasAbsorbable) + /* + * BEGIN: first try to treat this BEGIN's children as an unbounded + * group directly (handles nested fixed-length groups like ((A{2} + * B{3}){2})+). If that fails, skip to first child and recurse as + * before. + */ + if (isUnboundedStart(pattern, elem->next)) + { + *hasAbsorbable = true; elem->flags |= RPR_ELEM_ABSORBABLE_BRANCH; + } + else + { + computeAbsorbabilityRecursive(pattern, elem->next, hasAbsorbable); + + /* Mark BEGIN element if contents are absorbable */ + if (*hasAbsorbable) + elem->flags |= RPR_ELEM_ABSORBABLE_BRANCH; + } } else { diff --git a/src/test/regress/expected/rpr_base.out b/src/test/regress/expected/rpr_base.out index 3168468d0ae..7452cf1b3ab 100644 --- a/src/test/regress/expected/rpr_base.out +++ b/src/test/regress/expected/rpr_base.out @@ -3312,7 +3312,7 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING ------------------------------------------------------------------------------- WindowAgg Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: (a{2}){2,} + Pattern: (a{2}'){2,}" -> Sort Sort Key: id -> Seq Scan on rpr_plan @@ -4095,6 +4095,87 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING -> Seq Scan on rpr_plan (6 rows) +-- Fixed-length group absorbable: (A{2} B{3})+ -> (a{2}' b{3}'){2,}" +-- All children have min == max, equivalent to unrolling to {1,1} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A{2} B{3})+) + DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{2}' b{3}')+" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Nested fixed-length group: (A (B C){2} D)+ -> absorbable +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A (B C){2} D)+) + DEFINE A AS val <= 20, B AS val <= 40, C AS val <= 60, D AS val > 60); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' (b' c'){2}' d')+" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Nested fixed-length with inner quantifier: ((A{2} B{3}){2})+ -> absorbable +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (((A{2} B{3}){2})+) + DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: ((a{2}' b{3}'){2}')+" + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Non-absorbable fixed-length: (A B{2,5})+ -> no markers (min != max) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A B{2,5})+) + DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b{2,5})+ + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Non-absorbable fixed-length: (A B?)+ -> no markers (min != max) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A B?)+) + DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b?)+ + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + -- Non-absorbable (unbounded not at start): A B+ -> a b+ (no markers) EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan diff --git a/src/test/regress/expected/rpr_explain.out b/src/test/regress/expected/rpr_explain.out index 79cbc246039..560f21f44c2 100644 --- a/src/test/regress/expected/rpr_explain.out +++ b/src/test/regress/expected/rpr_explain.out @@ -462,10 +462,10 @@ WINDOW w AS ( Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: (a' b')+" Storage: Memory Maximum Storage: NkB - NFA States: 3 peak, 91 total, 0 merged - NFA Contexts: 2 peak, 61 total, 0 pruned + NFA States: 4 peak, 91 total, 0 merged + NFA Contexts: 3 peak, 61 total, 0 pruned NFA: 1 matched (len 60/60/60.0), 0 mismatched - NFA: 29 absorbed (len 1/1/1.0), 30 skipped (len 1/1/1.0) + NFA: 29 absorbed (len 2/2/2.0), 30 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=60.00 loops=1) (9 rows) @@ -904,6 +904,188 @@ WINDOW w AS ( -> Function Scan on generate_series s (actual rows=100.00 loops=1) (9 rows) +-- Fixed-length group absorption: (A B B)+ C +-- B B merged to B{2}; absorbable with fixed-length check +-- step_size=3 (A + B + B); v % 7 cycle gives 2 iterations per match +CREATE VIEW rpr_ev20b AS +SELECT count(*) OVER w +FROM generate_series(1, 70) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B B)+ C) + DEFINE A AS v % 7 IN (1, 4), B AS v % 7 IN (2, 3, 5, 6), C AS v % 7 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev20b'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------- + PATTERN ((a b b)+ c) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 70) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B B)+ C) + DEFINE A AS v % 7 IN (1, 4), B AS v % 7 IN (2, 3, 5, 6), C AS v % 7 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=70.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' b{2}')+" c + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 91 total, 0 merged + NFA Contexts: 4 peak, 71 total, 40 pruned + NFA: 10 matched (len 7/7/7.0), 0 mismatched + NFA: 10 absorbed (len 3/3/3.0), 10 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=70.00 loops=1) +(9 rows) + +-- Nested fixed-length group absorption: (A (B C){2} D)+ E +-- step_size = 1 + (1+1)*2 + 1 = 6; v % 13 cycle gives 2 iterations + E +CREATE VIEW rpr_ev20c AS +SELECT count(*) OVER w +FROM generate_series(1, 65) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A (B C){2} D)+ E) + DEFINE A AS v % 13 IN (1, 7), B AS v % 13 IN (2, 4, 8, 10), + C AS v % 13 IN (3, 5, 9, 11), D AS v % 13 IN (6, 12), + E AS v % 13 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev20c'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +-------------------------------- + PATTERN ((a (b c){2} d)+ e) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 65) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A (B C){2} D)+ E) + DEFINE A AS v % 13 IN (1, 7), B AS v % 13 IN (2, 4, 8, 10), + C AS v % 13 IN (3, 5, 9, 11), D AS v % 13 IN (6, 12), + E AS v % 13 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=65.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' (b' c'){2}' d')+" e + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 76 total, 0 merged + NFA Contexts: 4 peak, 66 total, 50 pruned + NFA: 5 matched (len 13/13/13.0), 0 mismatched + NFA: 5 absorbed (len 6/6/6.0), 5 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=65.00 loops=1) +(9 rows) + +-- Doubly nested fixed-length group absorption: (A ((B C{3}){2} D){2} E)+ F +-- step_size = 1 + ((1+3)*2+1)*2 + 1 = 20; v % 41 cycle gives 2 iterations + F +CREATE VIEW rpr_ev20d AS +SELECT count(*) OVER w +FROM generate_series(1, 82) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A ((B C C C){2} D){2} E)+ F) + DEFINE A AS v % 41 IN (1, 21), + B AS v % 41 IN (2, 6, 11, 15, 22, 26, 31, 35), + C AS v % 41 IN (3,4,5, 7,8,9, 12,13,14, 16,17,18, + 23,24,25, 27,28,29, 32,33,34, 36,37,38), + D AS v % 41 IN (10, 19, 30, 39), + E AS v % 41 IN (20, 40), + F AS v % 41 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev20d'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +------------------------------------------- + PATTERN ((a ((b c c c){2} d){2} e)+ f) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 82) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A ((B C C C){2} D){2} E)+ F) + DEFINE A AS v % 41 IN (1, 21), + B AS v % 41 IN (2, 6, 11, 15, 22, 26, 31, 35), + C AS v % 41 IN (3,4,5, 7,8,9, 12,13,14, 16,17,18, + 23,24,25, 27,28,29, 32,33,34, 36,37,38), + D AS v % 41 IN (10, 19, 30, 39), + E AS v % 41 IN (20, 40), + F AS v % 41 = 0 +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=82.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a' ((b' c{3}'){2}' d'){2}' e')+" f + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 87 total, 0 merged + NFA Contexts: 4 peak, 83 total, 76 pruned + NFA: 2 matched (len 41/41/41.0), 0 mismatched + NFA: 2 absorbed (len 20/20/20.0), 2 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=82.00 loops=1) +(9 rows) + +-- 3-level END chain absorption: ((A (B C){2}){2})+ +-- step_size = (1 + (1+1)*2) * 2 = 10; v % 21 cycle gives 2 iterations +-- END chain: END(BC{2}) -> END(A..{2}) -> END(+, ABSORBABLE) +CREATE VIEW rpr_ev20e AS +SELECT count(*) OVER w +FROM generate_series(1, 42) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A (B C){2}){2})+) + DEFINE A AS v % 21 IN (1, 6, 11, 16), + B AS v % 21 IN (2, 4, 7, 9, 12, 14, 17, 19), + C AS v % 21 IN (3, 5, 8, 10, 13, 15, 18, 20) +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev20e'), E'\n')) AS line WHERE line ~ 'PATTERN'; + line +--------------------------------- + PATTERN (((a (b c){2}){2})+) +(1 row) + +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 42) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A (B C){2}){2})+) + DEFINE A AS v % 21 IN (1, 6, 11, 16), + B AS v % 21 IN (2, 4, 7, 9, 12, 14, 17, 19), + C AS v % 21 IN (3, 5, 8, 10, 13, 15, 18, 20) +);'); + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=42.00 loops=1) + Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: ((a' (b' c'){2}'){2}')+" + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 47 total, 0 merged + NFA Contexts: 5 peak, 43 total, 30 pruned + NFA: 2 matched (len 20/20/20.0), 0 mismatched + NFA: 2 absorbed (len 10/10/10.0), 8 skipped (len 1/5/3.0) + -> Function Scan on generate_series s (actual rows=42.00 loops=1) +(9 rows) + -- ============================================================ -- Match Length Statistics Tests -- ============================================================ @@ -1894,10 +2076,10 @@ WINDOW w AS ( Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: (a' b' c')+" Storage: Memory Maximum Storage: NkB - NFA States: 3 peak, 81 total, 0 merged - NFA Contexts: 3 peak, 61 total, 20 pruned + NFA States: 4 peak, 81 total, 0 merged + NFA Contexts: 4 peak, 61 total, 20 pruned NFA: 1 matched (len 60/60/60.0), 0 mismatched - NFA: 19 absorbed (len 1/1/1.0), 20 skipped (len 1/1/1.0) + NFA: 19 absorbed (len 3/3/3.0), 20 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=60.00 loops=1) (9 rows) @@ -2461,7 +2643,7 @@ WINDOW w AS ( NFA States: 4 peak, 102 total, 0 merged NFA Contexts: 2 peak, 41 total, 10 pruned NFA: 10 matched (len 3/3/3.0), 0 mismatched - NFA: 20 absorbed (len 1/1/1.0), 0 skipped + NFA: 10 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=40.00 loops=1) (9 rows) @@ -3158,10 +3340,10 @@ WINDOW w AS ( Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: (a' b')+" Storage: Memory Maximum Storage: NkB - NFA States: 3 peak, 61 total, 0 merged - NFA Contexts: 2 peak, 41 total, 0 pruned + NFA States: 4 peak, 61 total, 0 merged + NFA Contexts: 3 peak, 41 total, 0 pruned NFA: 1 matched (len 40/40/40.0), 0 mismatched - NFA: 19 absorbed (len 1/1/1.0), 20 skipped (len 1/1/1.0) + NFA: 19 absorbed (len 2/2/2.0), 20 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=40.00 loops=1) (9 rows) @@ -3234,12 +3416,12 @@ WINDOW w AS ( ---------------------------------------------------------------------- WindowAgg (actual rows=60.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) - Pattern: ((a b){2})+ + Pattern: ((a' b'){2}')+" Storage: Memory Maximum Storage: NkB NFA States: 5 peak, 76 total, 0 merged NFA Contexts: 4 peak, 61 total, 15 pruned NFA: 1 matched (len 60/60/60.0), 0 mismatched - NFA: 0 absorbed, 44 skipped (len 1/4/2.3) + NFA: 14 absorbed (len 4/4/4.0), 30 skipped (len 1/2/1.5) -> Function Scan on generate_series s (actual rows=60.00 loops=1) (9 rows) diff --git a/src/test/regress/expected/rpr_nfa.out b/src/test/regress/expected/rpr_nfa.out index 7b5a17fb671..250f7f131b1 100644 --- a/src/test/regress/expected/rpr_nfa.out +++ b/src/test/regress/expected/rpr_nfa.out @@ -447,6 +447,327 @@ WINDOW w AS ( 7 | {X} | | (7 rows) +-- Fixed-length group absorption: (A B{2})+ C +-- B{2} has min == max, equivalent to unrolling to (A B B)+ C +WITH test_absorb_fixedlen AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['A']), + (5, ARRAY['B']), + (6, ARRAY['B']), + (7, ARRAY['A']), + (8, ARRAY['B']), + (9, ARRAY['B']), + (10, ARRAY['C']), + (11, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_fixedlen +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B{2})+ C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 10 + 2 | {B} | | + 3 | {B} | | + 4 | {A} | | + 5 | {B} | | + 6 | {B} | | + 7 | {A} | | + 8 | {B} | | + 9 | {B} | | + 10 | {C} | | + 11 | {X} | | +(11 rows) + +-- Consecutive vars merged to fixed-length: (A B B)+ -> (A B{2})+ +-- mergeConsecutiveVars produces B{2}; now absorbable with fixed-length check +WITH test_absorb_consecutive AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['A']), + (5, ARRAY['B']), + (6, ARRAY['B']), + (7, ARRAY['A']), + (8, ARRAY['B']), + (9, ARRAY['B']), + (10, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_consecutive +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B B)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 9 + 2 | {B} | | + 3 | {B} | | + 4 | {A} | | + 5 | {B} | | + 6 | {B} | | + 7 | {A} | | + 8 | {B} | | + 9 | {B} | | + 10 | {X} | | +(10 rows) + +-- Nested fixed-length group absorption: (A (B C){2} D)+ E +-- Inner group {2} has min == max; absorbable via recursive check +-- step_size = 1 + (1+1)*2 + 1 = 6 +WITH test_absorb_nested_fixedlen AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['B']), + (5, ARRAY['C']), + (6, ARRAY['D']), + (7, ARRAY['A']), + (8, ARRAY['B']), + (9, ARRAY['C']), + (10, ARRAY['B']), + (11, ARRAY['C']), + (12, ARRAY['D']), + (13, ARRAY['E']), + (14, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_nested_fixedlen +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A (B C){2} D)+ E) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 13 + 2 | {B} | | + 3 | {C} | | + 4 | {B} | | + 5 | {C} | | + 6 | {D} | | + 7 | {A} | | + 8 | {B} | | + 9 | {C} | | + 10 | {B} | | + 11 | {C} | | + 12 | {D} | | + 13 | {E} | | + 14 | {X} | | +(14 rows) + +-- Doubly nested fixed-length group absorption: (A ((B C{3}){2} D){2} E)+ F +-- step_size = 1 + ((1+3)*2+1)*2 + 1 = 20; 2 iterations + F = 41 rows +WITH test_absorb_doubly_nested AS ( + SELECT v AS id, ARRAY[ + CASE + WHEN v % 41 IN (1, 21) THEN 'A' + WHEN v % 41 IN (2, 6, 11, 15, 22, 26, 31, 35) THEN 'B' + WHEN v % 41 IN (3,4,5, 7,8,9, 12,13,14, 16,17,18, + 23,24,25, 27,28,29, 32,33,34, 36,37,38) THEN 'C' + WHEN v % 41 IN (10, 19, 30, 39) THEN 'D' + WHEN v % 41 IN (20, 40) THEN 'E' + WHEN v % 41 = 0 THEN 'F' + ELSE 'X' + END + ] AS flags + FROM generate_series(1, 82) AS s(v) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_doubly_nested +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A ((B C C C){2} D){2} E)+ F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 41 + 2 | {B} | | + 3 | {C} | | + 4 | {C} | | + 5 | {C} | | + 6 | {B} | | + 7 | {C} | | + 8 | {C} | | + 9 | {C} | | + 10 | {D} | | + 11 | {B} | | + 12 | {C} | | + 13 | {C} | | + 14 | {C} | | + 15 | {B} | | + 16 | {C} | | + 17 | {C} | | + 18 | {C} | | + 19 | {D} | | + 20 | {E} | | + 21 | {A} | | + 22 | {B} | | + 23 | {C} | | + 24 | {C} | | + 25 | {C} | | + 26 | {B} | | + 27 | {C} | | + 28 | {C} | | + 29 | {C} | | + 30 | {D} | | + 31 | {B} | | + 32 | {C} | | + 33 | {C} | | + 34 | {C} | | + 35 | {B} | | + 36 | {C} | | + 37 | {C} | | + 38 | {C} | | + 39 | {D} | | + 40 | {E} | | + 41 | {F} | | + 42 | {A} | 42 | 82 + 43 | {B} | | + 44 | {C} | | + 45 | {C} | | + 46 | {C} | | + 47 | {B} | | + 48 | {C} | | + 49 | {C} | | + 50 | {C} | | + 51 | {D} | | + 52 | {B} | | + 53 | {C} | | + 54 | {C} | | + 55 | {C} | | + 56 | {B} | | + 57 | {C} | | + 58 | {C} | | + 59 | {C} | | + 60 | {D} | | + 61 | {E} | | + 62 | {A} | | + 63 | {B} | | + 64 | {C} | | + 65 | {C} | | + 66 | {C} | | + 67 | {B} | | + 68 | {C} | | + 69 | {C} | | + 70 | {C} | | + 71 | {D} | | + 72 | {B} | | + 73 | {C} | | + 74 | {C} | | + 75 | {C} | | + 76 | {B} | | + 77 | {C} | | + 78 | {C} | | + 79 | {C} | | + 80 | {D} | | + 81 | {E} | | + 82 | {F} | | +(82 rows) + +-- 3-level END chain: ((A (B C){2}){2})+ +-- Tests END(BC{2}) -> END(A..{2}) -> END(+) chaining +-- 2 iterations of +, each 10 rows: (A B C B C)(A B C B C) +WITH test_absorb_3level_end AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), -- 1st + iter, 1st {2}, A + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['B']), + (5, ARRAY['C']), -- 1st (BC){2} done + (6, ARRAY['A']), -- 1st + iter, 2nd {2}, A + (7, ARRAY['B']), + (8, ARRAY['C']), + (9, ARRAY['B']), + (10, ARRAY['C']), -- 2nd (BC){2} done, 1st {2} done, 1st + iter done + (11, ARRAY['A']), -- 2nd + iter, 1st {2}, A + (12, ARRAY['B']), + (13, ARRAY['C']), + (14, ARRAY['B']), + (15, ARRAY['C']), + (16, ARRAY['A']), -- 2nd + iter, 2nd {2}, A + (17, ARRAY['B']), + (18, ARRAY['C']), + (19, ARRAY['B']), + (20, ARRAY['C']), -- 2nd + iter done + (21, ARRAY['X']) -- no match, + ends + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_3level_end +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A (B C){2}){2})+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 20 + 2 | {B} | | + 3 | {C} | | + 4 | {B} | | + 5 | {C} | | + 6 | {A} | | + 7 | {B} | | + 8 | {C} | | + 9 | {B} | | + 10 | {C} | | + 11 | {A} | | + 12 | {B} | | + 13 | {C} | | + 14 | {B} | | + 15 | {C} | | + 16 | {A} | | + 17 | {B} | | + 18 | {C} | | + 19 | {B} | | + 20 | {C} | | + 21 | {X} | | +(21 rows) + -- Multiple unbounded: A+ B+ (first element unbounded enables absorption) WITH test_multi_unbounded AS ( SELECT * FROM (VALUES diff --git a/src/test/regress/sql/rpr_base.sql b/src/test/regress/sql/rpr_base.sql index cf6c062ae85..8c23c7598a3 100644 --- a/src/test/regress/sql/rpr_base.sql +++ b/src/test/regress/sql/rpr_base.sql @@ -2600,6 +2600,42 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING AFTER MATCH SKIP PAST LAST ROW PATTERN ((A+ B | A B)*) DEFINE A AS val <= 50, B AS val > 50); +-- Fixed-length group absorbable: (A{2} B{3})+ -> (a{2}' b{3}'){2,}" +-- All children have min == max, equivalent to unrolling to {1,1} +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A{2} B{3})+) + DEFINE A AS val <= 50, B AS val > 50); + +-- Nested fixed-length group: (A (B C){2} D)+ -> absorbable +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A (B C){2} D)+) + DEFINE A AS val <= 20, B AS val <= 40, C AS val <= 60, D AS val > 60); + +-- Nested fixed-length with inner quantifier: ((A{2} B{3}){2})+ -> absorbable +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (((A{2} B{3}){2})+) + DEFINE A AS val <= 50, B AS val > 50); + +-- Non-absorbable fixed-length: (A B{2,5})+ -> no markers (min != max) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A B{2,5})+) + DEFINE A AS val <= 50, B AS val > 50); + +-- Non-absorbable fixed-length: (A B?)+ -> no markers (min != max) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN ((A B?)+) + DEFINE A AS val <= 50, B AS val > 50); + -- Non-absorbable (unbounded not at start): A B+ -> a b+ (no markers) EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan diff --git a/src/test/regress/sql/rpr_explain.sql b/src/test/regress/sql/rpr_explain.sql index 93e06b0cbdf..237f0366631 100644 --- a/src/test/regress/sql/rpr_explain.sql +++ b/src/test/regress/sql/rpr_explain.sql @@ -578,6 +578,120 @@ WINDOW w AS ( DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 );'); +-- Fixed-length group absorption: (A B B)+ C +-- B B merged to B{2}; absorbable with fixed-length check +-- step_size=3 (A + B + B); v % 7 cycle gives 2 iterations per match +CREATE VIEW rpr_ev20b AS +SELECT count(*) OVER w +FROM generate_series(1, 70) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B B)+ C) + DEFINE A AS v % 7 IN (1, 4), B AS v % 7 IN (2, 3, 5, 6), C AS v % 7 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev20b'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 70) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B B)+ C) + DEFINE A AS v % 7 IN (1, 4), B AS v % 7 IN (2, 3, 5, 6), C AS v % 7 = 0 +);'); + +-- Nested fixed-length group absorption: (A (B C){2} D)+ E +-- step_size = 1 + (1+1)*2 + 1 = 6; v % 13 cycle gives 2 iterations + E +CREATE VIEW rpr_ev20c AS +SELECT count(*) OVER w +FROM generate_series(1, 65) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A (B C){2} D)+ E) + DEFINE A AS v % 13 IN (1, 7), B AS v % 13 IN (2, 4, 8, 10), + C AS v % 13 IN (3, 5, 9, 11), D AS v % 13 IN (6, 12), + E AS v % 13 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev20c'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 65) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A (B C){2} D)+ E) + DEFINE A AS v % 13 IN (1, 7), B AS v % 13 IN (2, 4, 8, 10), + C AS v % 13 IN (3, 5, 9, 11), D AS v % 13 IN (6, 12), + E AS v % 13 = 0 +);'); + +-- Doubly nested fixed-length group absorption: (A ((B C{3}){2} D){2} E)+ F +-- step_size = 1 + ((1+3)*2+1)*2 + 1 = 20; v % 41 cycle gives 2 iterations + F +CREATE VIEW rpr_ev20d AS +SELECT count(*) OVER w +FROM generate_series(1, 82) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A ((B C C C){2} D){2} E)+ F) + DEFINE A AS v % 41 IN (1, 21), + B AS v % 41 IN (2, 6, 11, 15, 22, 26, 31, 35), + C AS v % 41 IN (3,4,5, 7,8,9, 12,13,14, 16,17,18, + 23,24,25, 27,28,29, 32,33,34, 36,37,38), + D AS v % 41 IN (10, 19, 30, 39), + E AS v % 41 IN (20, 40), + F AS v % 41 = 0 +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev20d'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 82) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A ((B C C C){2} D){2} E)+ F) + DEFINE A AS v % 41 IN (1, 21), + B AS v % 41 IN (2, 6, 11, 15, 22, 26, 31, 35), + C AS v % 41 IN (3,4,5, 7,8,9, 12,13,14, 16,17,18, + 23,24,25, 27,28,29, 32,33,34, 36,37,38), + D AS v % 41 IN (10, 19, 30, 39), + E AS v % 41 IN (20, 40), + F AS v % 41 = 0 +);'); + +-- 3-level END chain absorption: ((A (B C){2}){2})+ +-- step_size = (1 + (1+1)*2) * 2 = 10; v % 21 cycle gives 2 iterations +-- END chain: END(BC{2}) -> END(A..{2}) -> END(+, ABSORBABLE) +CREATE VIEW rpr_ev20e AS +SELECT count(*) OVER w +FROM generate_series(1, 42) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A (B C){2}){2})+) + DEFINE A AS v % 21 IN (1, 6, 11, 16), + B AS v % 21 IN (2, 4, 7, 9, 12, 14, 17, 19), + C AS v % 21 IN (3, 5, 8, 10, 13, 15, 18, 20) +); +SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_ev20e'), E'\n')) AS line WHERE line ~ 'PATTERN'; +SELECT rpr_explain_filter(' +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) +SELECT count(*) OVER w +FROM generate_series(1, 42) AS s(v) +WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A (B C){2}){2})+) + DEFINE A AS v % 21 IN (1, 6, 11, 16), + B AS v % 21 IN (2, 4, 7, 9, 12, 14, 17, 19), + C AS v % 21 IN (3, 5, 8, 10, 13, 15, 18, 20) +);'); + -- ============================================================ -- Match Length Statistics Tests -- ============================================================ diff --git a/src/test/regress/sql/rpr_nfa.sql b/src/test/regress/sql/rpr_nfa.sql index 5edcb3357e6..aaa7b44f789 100644 --- a/src/test/regress/sql/rpr_nfa.sql +++ b/src/test/regress/sql/rpr_nfa.sql @@ -346,6 +346,174 @@ WINDOW w AS ( B AS 'B' = ANY(flags) ); +-- Fixed-length group absorption: (A B{2})+ C +-- B{2} has min == max, equivalent to unrolling to (A B B)+ C +WITH test_absorb_fixedlen AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['A']), + (5, ARRAY['B']), + (6, ARRAY['B']), + (7, ARRAY['A']), + (8, ARRAY['B']), + (9, ARRAY['B']), + (10, ARRAY['C']), + (11, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_fixedlen +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B{2})+ C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + +-- Consecutive vars merged to fixed-length: (A B B)+ -> (A B{2})+ +-- mergeConsecutiveVars produces B{2}; now absorbable with fixed-length check +WITH test_absorb_consecutive AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['A']), + (5, ARRAY['B']), + (6, ARRAY['B']), + (7, ARRAY['A']), + (8, ARRAY['B']), + (9, ARRAY['B']), + (10, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_consecutive +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B B)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Nested fixed-length group absorption: (A (B C){2} D)+ E +-- Inner group {2} has min == max; absorbable via recursive check +-- step_size = 1 + (1+1)*2 + 1 = 6 +WITH test_absorb_nested_fixedlen AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['B']), + (5, ARRAY['C']), + (6, ARRAY['D']), + (7, ARRAY['A']), + (8, ARRAY['B']), + (9, ARRAY['C']), + (10, ARRAY['B']), + (11, ARRAY['C']), + (12, ARRAY['D']), + (13, ARRAY['E']), + (14, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_nested_fixedlen +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A (B C){2} D)+ E) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + +-- Doubly nested fixed-length group absorption: (A ((B C{3}){2} D){2} E)+ F +-- step_size = 1 + ((1+3)*2+1)*2 + 1 = 20; 2 iterations + F = 41 rows +WITH test_absorb_doubly_nested AS ( + SELECT v AS id, ARRAY[ + CASE + WHEN v % 41 IN (1, 21) THEN 'A' + WHEN v % 41 IN (2, 6, 11, 15, 22, 26, 31, 35) THEN 'B' + WHEN v % 41 IN (3,4,5, 7,8,9, 12,13,14, 16,17,18, + 23,24,25, 27,28,29, 32,33,34, 36,37,38) THEN 'C' + WHEN v % 41 IN (10, 19, 30, 39) THEN 'D' + WHEN v % 41 IN (20, 40) THEN 'E' + WHEN v % 41 = 0 THEN 'F' + ELSE 'X' + END + ] AS flags + FROM generate_series(1, 82) AS s(v) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_doubly_nested +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A ((B C C C){2} D){2} E)+ F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); + +-- 3-level END chain: ((A (B C){2}){2})+ +-- Tests END(BC{2}) -> END(A..{2}) -> END(+) chaining +-- 2 iterations of +, each 10 rows: (A B C B C)(A B C B C) +WITH test_absorb_3level_end AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), -- 1st + iter, 1st {2}, A + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['B']), + (5, ARRAY['C']), -- 1st (BC){2} done + (6, ARRAY['A']), -- 1st + iter, 2nd {2}, A + (7, ARRAY['B']), + (8, ARRAY['C']), + (9, ARRAY['B']), + (10, ARRAY['C']), -- 2nd (BC){2} done, 1st {2} done, 1st + iter done + (11, ARRAY['A']), -- 2nd + iter, 1st {2}, A + (12, ARRAY['B']), + (13, ARRAY['C']), + (14, ARRAY['B']), + (15, ARRAY['C']), + (16, ARRAY['A']), -- 2nd + iter, 2nd {2}, A + (17, ARRAY['B']), + (18, ARRAY['C']), + (19, ARRAY['B']), + (20, ARRAY['C']), -- 2nd + iter done + (21, ARRAY['X']) -- no match, + ends + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_3level_end +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (((A (B C){2}){2})+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + -- Multiple unbounded: A+ B+ (first element unbounded enables absorption) WITH test_multi_unbounded AS ( SELECT * FROM (VALUES -- 2.50.1 (Apple Git-155)