From 9d2796ca6cc434ef323e31f60d4a1c9976bfeb1c Mon Sep 17 00:00:00 2001 From: Henson Choi Date: Mon, 9 Feb 2026 12:55:01 +0900 Subject: [PATCH 1/1] Review NFA executor and add comprehensive runtime tests Review the RPR NFA executor code, fix issues found, and add a comprehensive runtime test suite (rpr_nfa.sql) to validate NFA behavior at pattern matching boundaries. Code review fixes in nodeWindowAgg.c: - Extract nfa_process_row() with three clear phases: match (convergence), absorb, and advance (divergence). Previously these phases were interleaved in update_reduced_frame(). - Simplify update_reduced_frame() result registration by separating match and mismatch paths into early-return branches, removing the combined if-else block. - Remove nfa_remove_contexts_up_to(). Context cleanup is now handled uniformly by nfa_context_free() for both SKIP PAST LAST ROW and SKIP TO NEXT ROW modes. - Rename nfa_state_clone -> nfa_state_create, nfa_find_context_for_pos -> nfa_get_head_context for clarity. - Add nfa_record_context_success/skipped/absorbed statistics helpers. - Remove unused RPRPattern parameter from nfa_update_absorption_flags(). - Document two FIXME issues found during review: (1) altPriority tracks only the last ALT choice, so repeated or nested ALTs like (A|B)+ cannot correctly implement SQL standard lexical ordering. A full-path classifier structure is needed. (2) Cycle prevention condition (count == 0 && min == 0) is insufficient for patterns like (A*)* where cycles occur at count > 0. Currently relies on implicit duplicate detection in nfa_add_state_unique. Code review fixes in rpr.c: - Remove parentDepth parameter from isUnboundedStart() and computeAbsorbabilityRecursive(). Scope boundaries are now determined by element depth comparison instead of an explicit parent depth parameter. - Simplify isUnboundedStart(): check simple VAR case first, then GROUP case with a depth-bounded loop instead of a FIN-terminated traversal with multiple break conditions. Code review fixes in parse_rpr.c: - Remove dead frame-type determination code. RANGE and GROUPS frames are already rejected before the start-position check, so frameType is always "ROWS". Test changes: - Add rpr_nfa.sql: comprehensive NFA runtime tests covering quantifier boundaries, alternation priority, nested patterns, frame boundary variations, INITIAL mode, pathological patterns, context absorption, and FIXME reproduction cases. - Add nth_value out-of-frame tests, ReScan/LATERAL test, and nth_value IGNORE NULLS test to rpr.sql. - Fix stale comments across rpr.sql, rpr_base.sql, and rpr_nfa.sql: remove resolved BUG annotations, update error messages to match actual output, correct optimization result descriptions, and standardize Expected comment placement to after SQL statements. --- src/backend/commands/explain.c | 12 +- src/backend/executor/nodeWindowAgg.c | 609 ++--- src/backend/optimizer/plan/rpr.c | 193 +- src/backend/parser/parse_rpr.c | 10 +- src/include/optimizer/rpr.h | 2 +- src/test/regress/expected/rpr.out | 142 +- src/test/regress/expected/rpr_base.out | 96 +- src/test/regress/expected/rpr_explain.out | 549 +++-- src/test/regress/expected/rpr_nfa.out | 2524 +++++++++++++++++++++ src/test/regress/parallel_schedule | 2 +- src/test/regress/sql/rpr.sql | 88 +- src/test/regress/sql/rpr_base.sql | 92 +- src/test/regress/sql/rpr_explain.sql | 32 +- src/test/regress/sql/rpr_nfa.sql | 1865 +++++++++++++++ 14 files changed, 5445 insertions(+), 771 deletions(-) create mode 100644 src/test/regress/expected/rpr_nfa.out create mode 100644 src/test/regress/sql/rpr_nfa.sql diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 96542e9538d..c6baa6ca990 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -2986,7 +2986,7 @@ deparse_rpr_elements(RPRPattern *pattern, int *idx, StringInfoData *buf, RPRDepth groupDepth, RPRDepth *prevDepth, bool *needSpace) { - List *altSeps = NIL; /* pending alternation separator indices */ + List *altSeps = NIL; /* pending alternation separator indices */ while (*idx < pattern->numElements) { @@ -3047,9 +3047,9 @@ deparse_rpr_group(RPRPattern *pattern, int *idx, StringInfoData *buf, RPRPatternElement *end; /* - * Check if this group wraps a single ALT with no siblings. - * Scan from after ALT to END: if no element at childDepth exists, - * the ALT is the sole child. + * Check if this group wraps a single ALT with no siblings. Scan from + * after ALT to END: if no element at childDepth exists, the ALT is the + * sole child. */ if (*idx + 1 < pattern->numElements && RPRElemIsAlt(&pattern->elements[*idx + 1])) @@ -3080,7 +3080,7 @@ deparse_rpr_group(RPRPattern *pattern, int *idx, StringInfoData *buf, *needSpace = false; } *prevDepth = childDepth; - (*idx)++; /* consume BEGIN */ + (*idx)++; /* consume BEGIN */ /* Process group children; stops at matching END */ deparse_rpr_elements(pattern, idx, buf, begin->depth, @@ -3101,7 +3101,7 @@ deparse_rpr_group(RPRPattern *pattern, int *idx, StringInfoData *buf, append_rpr_quantifier(buf, end); *prevDepth = end->depth; *needSpace = true; - (*idx)++; /* consume END */ + (*idx)++; /* consume END */ } /* diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index b2874bf6e9d..b7b87f65b47 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -256,13 +256,17 @@ static void update_reduced_frame(WindowObject winobj, int64 pos); static void check_rpr_navigation(Node *node, bool is_prev); static bool rpr_navigation_walker(Node *node, void *context); +/* Forward declarations - NFA row processing */ +static void nfa_process_row(WindowAggState *winstate, int64 currentPos, + bool hasLimitedFrame, int64 frameOffset); + /* Forward declarations - NFA state management */ static RPRNFAState *nfa_state_alloc(WindowAggState *winstate); static void nfa_state_free(WindowAggState *winstate, RPRNFAState *state); static void nfa_state_free_list(WindowAggState *winstate, RPRNFAState *list); -static RPRNFAState *nfa_state_clone(WindowAggState *winstate, int16 elemIdx, - int16 altPriority, int32 *counts, - bool sourceAbsorbable); +static RPRNFAState *nfa_state_create(WindowAggState *winstate, int16 elemIdx, + int16 altPriority, int32 *counts, + bool sourceAbsorbable); static bool nfa_states_equal(WindowAggState *winstate, RPRNFAState *s1, RPRNFAState *s2); static bool nfa_add_state_unique(WindowAggState *winstate, RPRNFAContext *ctx, @@ -275,28 +279,30 @@ static RPRNFAContext *nfa_context_alloc(WindowAggState *winstate); static void nfa_unlink_context(WindowAggState *winstate, RPRNFAContext *ctx); static void nfa_context_free(WindowAggState *winstate, RPRNFAContext *ctx); static RPRNFAContext *nfa_start_context(WindowAggState *winstate, int64 startPos); -static RPRNFAContext *nfa_find_context_for_pos(WindowAggState *winstate, int64 pos); +static RPRNFAContext *nfa_get_head_context(WindowAggState *winstate, int64 pos); /* Forward declarations - NFA statistics */ static void nfa_update_length_stats(int64 count, NFALengthStats *stats, int64 newLen); +static void nfa_record_context_success(WindowAggState *winstate, int64 matchLen); static void nfa_record_context_failure(WindowAggState *winstate, int64 failedLen); +static void nfa_record_context_skipped(WindowAggState *winstate, int64 skippedLen); +static void nfa_record_context_absorbed(WindowAggState *winstate, int64 absorbedLen); /* Forward declarations - NFA row evaluation */ static bool nfa_evaluate_row(WindowObject winobj, int64 pos, bool *varMatched); /* Forward declarations - NFA context lifecycle */ -static void nfa_remove_contexts_up_to(WindowAggState *winstate, int64 endPos, - RPRNFAContext *excludeCtx); static void nfa_cleanup_dead_contexts(WindowAggState *winstate, RPRNFAContext *excludeCtx); +static void nfa_finalize_all_contexts(WindowAggState *winstate, int64 lastPos); /* Forward declarations - NFA absorption */ -static void nfa_update_absorption_flags(RPRNFAContext *ctx, RPRPattern *pattern); +static void nfa_update_absorption_flags(RPRNFAContext *ctx); static bool nfa_states_covered(RPRPattern *pattern, RPRNFAContext *older, RPRNFAContext *newer); static bool nfa_try_absorb_context(WindowAggState *winstate, RPRNFAContext *ctx); static void nfa_absorb_contexts(WindowAggState *winstate); -/* Forward declarations - NFA execution */ +/* Forward declarations - NFA match and advance */ static inline bool nfa_eval_var_match(WindowAggState *winstate, RPRPatternElement *elem, bool *varMatched); static void nfa_match(WindowAggState *winstate, RPRNFAContext *ctx, @@ -309,6 +315,9 @@ static void nfa_route_to_elem(WindowAggState *winstate, RPRNFAContext *ctx, static void nfa_advance_alt(WindowAggState *winstate, RPRNFAContext *ctx, RPRNFAState *state, RPRPatternElement *elem, int64 currentPos, bool initialAdvance); +static void nfa_advance_begin(WindowAggState *winstate, RPRNFAContext *ctx, + RPRNFAState *state, RPRPatternElement *elem, + int64 currentPos, bool initialAdvance); static void nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, RPRNFAState *state, RPRPatternElement *elem, int64 currentPos, bool initialAdvance); @@ -317,9 +326,6 @@ static void nfa_advance_var(WindowAggState *winstate, RPRNFAContext *ctx, int64 currentPos, bool initialAdvance); static void nfa_advance(WindowAggState *winstate, RPRNFAContext *ctx, int64 currentPos, bool initialAdvance); -static void nfa_process_row(WindowAggState *winstate, int64 currentPos, - bool hasLimitedFrame, int64 frameOffset); -static void nfa_finalize_all_contexts(WindowAggState *winstate, int64 lastPos); /* * Not null info bit array consists of 2-bit items @@ -4813,6 +4819,7 @@ update_reduced_frame(WindowObject winobj, int64 pos) int frameOptions = winstate->frameOptions; bool hasLimitedFrame; int64 frameOffset = 0; + int64 matchLen; /* * Check if we have a limited frame (ROWS ... N FOLLOWING). Each context @@ -4839,7 +4846,7 @@ update_reduced_frame(WindowObject winobj, int64 pos) /* * Case 2: Find existing context for this pos, or create new one. */ - targetCtx = nfa_find_context_for_pos(winstate, pos); + targetCtx = nfa_get_head_context(winstate, pos); if (targetCtx == NULL) { /* @@ -4920,10 +4927,6 @@ update_reduced_frame(WindowObject winobj, int64 pos) * appropriately as pruned or mismatched. */ nfa_cleanup_dead_contexts(winstate, targetCtx); - - /* Check if target context is now complete */ - if (targetCtx->states == NULL) - break; } register_result: @@ -4934,43 +4937,26 @@ register_result: */ if (targetCtx->matchEndRow < targetCtx->matchStartRow) { - int64 mismatchLen = targetCtx->lastProcessedRow - targetCtx->matchStartRow + 1; + matchLen = targetCtx->lastProcessedRow - targetCtx->matchStartRow + 1; register_reduced_frame_map(winstate, targetCtx->matchStartRow, RF_UNMATCHED); - nfa_record_context_failure(winstate, mismatchLen); + nfa_record_context_failure(winstate, matchLen); + nfa_context_free(winstate, targetCtx); + return; } - else - { - int64 matchLen = targetCtx->matchEndRow - targetCtx->matchStartRow + 1; - register_reduced_frame_map(winstate, targetCtx->matchStartRow, RF_FRAME_HEAD); - for (int64 i = targetCtx->matchStartRow + 1; i <= targetCtx->matchEndRow; i++) - { - register_reduced_frame_map(winstate, i, RF_SKIPPED); - } - winstate->nfaMatchesSucceeded++; - nfa_update_length_stats(winstate->nfaMatchesSucceeded, - &winstate->nfaMatchLen, - matchLen); - } + /* Match succeeded - register frame map and record statistics */ + matchLen = targetCtx->matchEndRow - targetCtx->matchStartRow + 1; - /* - * Cleanup contexts based on SKIP mode. - */ - if (targetCtx->matchEndRow >= targetCtx->matchStartRow && - winstate->rpSkipTo == ST_PAST_LAST_ROW) - { - /* - * Remove all contexts with start <= matchEnd, excluding targetCtx - * from skip count - */ - nfa_remove_contexts_up_to(winstate, targetCtx->matchEndRow, targetCtx); - } - else + register_reduced_frame_map(winstate, targetCtx->matchStartRow, RF_FRAME_HEAD); + for (int64 i = targetCtx->matchStartRow + 1; i <= targetCtx->matchEndRow; i++) { - /* SKIP TO NEXT ROW or no match: just remove the target context */ - nfa_context_free(winstate, targetCtx); + register_reduced_frame_map(winstate, i, RF_SKIPPED); } + nfa_record_context_success(winstate, matchLen); + + /* Remove the matched context */ + nfa_context_free(winstate, targetCtx); } /* @@ -5057,6 +5043,82 @@ register_result: * allows absorption even when Ctx1 has extra non-absorbable states. */ +/* + * nfa_process_row + * + * Process all contexts for one row: + * 1. Match all contexts (convergence) - evaluate VARs, prune dead states + * 2. Absorb redundant contexts - ideal timing after convergence + * 3. Advance all contexts (divergence) - create new states for next row + */ +static void +nfa_process_row(WindowAggState *winstate, int64 currentPos, + bool hasLimitedFrame, int64 frameOffset) +{ + RPRNFAContext *ctx; + bool *varMatched = winstate->nfaVarMatched; + + /* + * Phase 1: Match all contexts (convergence) Evaluate VAR elements, update + * counts, remove dead states. + */ + for (ctx = winstate->nfaContext; ctx != NULL; ctx = ctx->next) + { + if (ctx->states == NULL) + continue; + + /* Check frame boundary - finalize if exceeded */ + if (hasLimitedFrame) + { + int64 ctxFrameEnd = ctx->matchStartRow + frameOffset + 1; + + if (currentPos >= ctxFrameEnd) + { + /* Frame boundary exceeded: force mismatch */ + nfa_match(winstate, ctx, NULL); + continue; + } + } + + nfa_match(winstate, ctx, varMatched); + ctx->lastProcessedRow = currentPos; + } + + /* + * Phase 2: Absorb redundant contexts After match phase, states have + * converged - ideal for absorption. First update absorption flags that + * may have changed due to state removal. + */ + if (winstate->rpPattern->isAbsorbable) + { + for (ctx = winstate->nfaContext; ctx != NULL; ctx = ctx->next) + nfa_update_absorption_flags(ctx); + + nfa_absorb_contexts(winstate); + } + + /* + * Phase 3: Advance all contexts (divergence) Create new states + * (loop/exit) from surviving matched states. + */ + for (ctx = winstate->nfaContext; ctx != NULL; ctx = ctx->next) + { + if (ctx->states == NULL) + continue; + + /* + * Phase 1 already handled frame boundary exceeded contexts by forcing + * mismatch (nfa_match with NULL), which removes all states (all states + * are at VAR positions after advance). So any surviving context here + * must be within its frame boundary. + */ + Assert(!hasLimitedFrame || + currentPos < ctx->matchStartRow + frameOffset + 1); + + nfa_advance(winstate, ctx, currentPos, false); + } +} + /* * nfa_state_alloc * @@ -5109,33 +5171,32 @@ nfa_state_free(WindowAggState *winstate, RPRNFAState *state) /* * nfa_state_free_list * - * Free all states in a list using pfree. + * Return all states in a list to the free list. */ static void nfa_state_free_list(WindowAggState *winstate, RPRNFAState *list) { - RPRNFAState *state; + RPRNFAState *next; - while (list != NULL) + for (; list != NULL; list = next) { - state = list; - list = list->next; - nfa_state_free(winstate, state); + next = list->next; + nfa_state_free(winstate, list); } } /* - * nfa_state_clone + * nfa_state_create * - * Clone a state with given elemIdx, altPriority and counts. + * Create a new state with given elemIdx, altPriority and counts. * isAbsorbable is computed immediately: inherited AND new element's flag. * Monotonic property: once false, stays false through all transitions. * * Caller is responsible for linking the returned state. */ static RPRNFAState * -nfa_state_clone(WindowAggState *winstate, int16 elemIdx, int16 altPriority, - int32 *counts, bool sourceAbsorbable) +nfa_state_create(WindowAggState *winstate, int16 elemIdx, int16 altPriority, + int32 *counts, bool sourceAbsorbable) { RPRPattern *pattern = winstate->rpPattern; int maxDepth = pattern->maxDepth; @@ -5165,13 +5226,19 @@ nfa_state_clone(WindowAggState *winstate, int16 elemIdx, int16 altPriority, static bool nfa_states_equal(WindowAggState *winstate, RPRNFAState *s1, RPRNFAState *s2) { - int maxDepth = winstate->rpPattern->maxDepth; + RPRPattern *pattern = winstate->rpPattern; + RPRPatternElement *elem; + int compareDepth; if (s1->elemIdx != s2->elemIdx) return false; - if (maxDepth > 0 && - memcmp(s1->counts, s2->counts, sizeof(int32) * maxDepth) != 0) + /* Compare counts up to current element's depth */ + elem = &pattern->elements[s1->elemIdx]; + compareDepth = elem->depth + 1; /* depth 0 needs 1 count, etc. */ + + if (compareDepth > 0 && + memcmp(s1->counts, s2->counts, sizeof(int32) * compareDepth) != 0) return false; return true; @@ -5222,6 +5289,19 @@ nfa_add_state_unique(WindowAggState *winstate, RPRNFAContext *ctx, RPRNFAState * * Record a matched state following SQL standard semantics. * Lexical order (lower altPriority) wins first. Among same lexical order, * longer match wins (greedy). + * + * FIXME: altPriority is a single value that only tracks the last ALT choice. + * For patterns with repeated or nested ALTs like (A|B)+, this cannot correctly + * implement SQL standard lexical order, which requires comparing the full path + * from left to right. For example: + * Pattern: (A | B)+ + * Path "A B A" vs "B A B" + * Current: compares last choice (A vs B) → altPriority 10 vs 20 + * Correct: should compare first choice (A < B) → "A B A" wins + * + * A classifier structure tracking the entire ALT path is required for correct + * implementation. Without it, patterns with repeated or nested ALTs will + * produce incorrect match selection. */ static void nfa_add_matched_state(WindowAggState *winstate, RPRNFAContext *ctx, @@ -5247,6 +5327,49 @@ nfa_add_matched_state(WindowAggState *winstate, RPRNFAContext *ctx, ctx->matchedState = state; state->next = NULL; ctx->matchEndRow = matchEndRow; + + /* + * SKIP PAST LAST ROW: eagerly prune contexts within match range. + * + * This function is called whenever a FIN state is reached, including + * during greedy matching when intermediate (shorter) matches are found. + * Each time we update matchEndRow (whether extending a greedy match or + * finding a new match), we can prune pending contexts that started + * within the current match range. + * + * SKIP PAST LAST ROW uses lexical order (matchStartRow). Therefore, + * any pending context that started at or before matchEndRow can never + * produce a valid output row - it would be skipped anyway per SQL + * standard. + * + * Example (greedy matching in progress): + * Pattern: START UP+ + * Rows: 1 2 3 4 5 + * Context A starts at row 1: + * - Matches START UP (rows 1-2) → matchEndRow=2 → prune Context B(row 2) + * - Matches START UP UP (rows 1-3) → matchEndRow=3 → prune Context C(row 3) + * - Continues greedy extension while pruning incrementally + */ + if (winstate->rpSkipTo == ST_PAST_LAST_ROW) + { + RPRNFAContext *nextCtx; + int64 skippedLen; + + while (ctx->next != NULL && + ctx->next->matchStartRow <= matchEndRow) + { + nextCtx = ctx->next; + ctx->next = ctx->next->next; + + Assert(nextCtx->lastProcessedRow >= nextCtx->matchStartRow); + skippedLen = nextCtx->lastProcessedRow - nextCtx->matchStartRow + 1; + nfa_record_context_skipped(winstate, skippedLen); + + nfa_context_free(winstate, nextCtx); + } + if (ctx->next == NULL) + winstate->nfaContextTail = ctx; + } } else { @@ -5258,7 +5381,7 @@ nfa_add_matched_state(WindowAggState *winstate, RPRNFAContext *ctx, /* * nfa_context_alloc * - * Allocate an NFA context from free list or palloc. + * Allocate an NFA context, reusing from free list if available. */ static RPRNFAContext * nfa_context_alloc(WindowAggState *winstate) @@ -5351,8 +5474,9 @@ nfa_context_free(WindowAggState *winstate, RPRNFAContext *ctx) * nfa_start_context * * Start a new match context at given position. - * Initializes context and state absorption flags. - * Adds context to winstate->nfaContext list and returns the new context. + * Initializes context, state absorption flags, and performs initial advance + * to expand epsilon transitions (ALT branches, optional elements). + * Adds context to the tail of winstate->nfaContext list. */ static RPRNFAContext * nfa_start_context(WindowAggState *winstate, int64 startPos) @@ -5418,28 +5542,24 @@ nfa_start_context(WindowAggState *winstate, int64 startPos) } /* - * nfa_find_context_for_pos + * nfa_get_head_context * - * Find a context with the given start position. - * Returns NULL if not found. + * Return the head context if its start position matches pos. + * Returns NULL if no context exists or head doesn't match pos. */ static RPRNFAContext * -nfa_find_context_for_pos(WindowAggState *winstate, int64 pos) +nfa_get_head_context(WindowAggState *winstate, int64 pos) { - RPRNFAContext *ctx; + RPRNFAContext *ctx = winstate->nfaContext; /* - * List is sorted by matchStartRow ascending (oldest/smallest at head). - * Stop early if we pass the target position. + * Contexts are sorted by matchStartRow ascending. If the head + * context doesn't match pos, no context exists for this position. */ - for (ctx = winstate->nfaContext; ctx != NULL; ctx = ctx->next) - { - if (ctx->matchStartRow == pos) - return ctx; - if (ctx->matchStartRow > pos) - break; /* won't find it, list is sorted */ - } - return NULL; + if (ctx == NULL || ctx->matchStartRow != pos) + return NULL; + + return ctx; } /* @@ -5466,6 +5586,20 @@ nfa_update_length_stats(int64 count, NFALengthStats *stats, int64 newLen) stats->total += newLen; } +/* + * nfa_record_context_success + * + * Record a successful context in statistics. + */ +static void +nfa_record_context_success(WindowAggState *winstate, int64 matchLen) +{ + winstate->nfaMatchesSucceeded++; + nfa_update_length_stats(winstate->nfaMatchesSucceeded, + &winstate->nfaMatchLen, + matchLen); +} + /* * nfa_record_context_failure * @@ -5489,6 +5623,34 @@ nfa_record_context_failure(WindowAggState *winstate, int64 failedLen) } } +/* + * nfa_record_context_skipped + * + * Record a skipped context in statistics. + */ +static void +nfa_record_context_skipped(WindowAggState *winstate, int64 skippedLen) +{ + winstate->nfaContextsSkipped++; + nfa_update_length_stats(winstate->nfaContextsSkipped, + &winstate->nfaSkippedLen, + skippedLen); +} + +/* + * nfa_record_context_absorbed + * + * Record an absorbed context in statistics. + */ +static void +nfa_record_context_absorbed(WindowAggState *winstate, int64 absorbedLen) +{ + winstate->nfaContextsAbsorbed++; + nfa_update_length_stats(winstate->nfaContextsAbsorbed, + &winstate->nfaAbsorbedLen, + absorbedLen); +} + /* * nfa_evaluate_row * @@ -5557,47 +5719,6 @@ nfa_evaluate_row(WindowObject winobj, int64 pos, bool *varMatched) return true; /* Row exists */ } -/* - * nfa_remove_contexts_up_to - * - * Remove all contexts with matchStartRow <= endPos. - * Used by SKIP PAST LAST ROW to discard contexts within matched frame. - * - * excludeCtx: if not NULL, this context should not be counted in statistics - * (typically the matched context that triggered this removal). - */ -static void -nfa_remove_contexts_up_to(WindowAggState *winstate, int64 endPos, - RPRNFAContext *excludeCtx) -{ - RPRNFAContext *ctx; - RPRNFAContext *next; - - /* Contexts are sorted by matchStartRow ascending, so we can stop early */ - for (ctx = winstate->nfaContext; ctx != NULL; ctx = next) - { - next = ctx->next; - if (ctx->matchStartRow > endPos) - break; - - /* - * Track skipped context length statistics, excluding the matched - * context - */ - if (ctx != excludeCtx && ctx->lastProcessedRow >= ctx->matchStartRow) - { - int64 skippedLen = ctx->lastProcessedRow - ctx->matchStartRow + 1; - - winstate->nfaContextsSkipped++; - nfa_update_length_stats(winstate->nfaContextsSkipped, - &winstate->nfaSkippedLen, - skippedLen); - } - - nfa_context_free(winstate, ctx); - } -} - /* * nfa_cleanup_dead_contexts * @@ -5640,6 +5761,27 @@ nfa_cleanup_dead_contexts(WindowAggState *winstate, RPRNFAContext *excludeCtx) } } +/* + * nfa_finalize_all_contexts + * + * Finalize all active contexts when partition ends. + * Match with NULL to force mismatch, then advance to process epsilon transitions. + */ +static void +nfa_finalize_all_contexts(WindowAggState *winstate, int64 lastPos) +{ + RPRNFAContext *ctx; + + for (ctx = winstate->nfaContext; ctx != NULL; ctx = ctx->next) + { + if (ctx->states != NULL) + { + nfa_match(winstate, ctx, NULL); + nfa_advance(winstate, ctx, lastPos, false); + } + } +} + /* * nfa_update_absorption_flags * @@ -5657,7 +5799,7 @@ nfa_cleanup_dead_contexts(WindowAggState *winstate, RPRNFAContext *excludeCtx) * permanently, so we skip recalculation. */ static void -nfa_update_absorption_flags(RPRNFAContext *ctx, RPRPattern *pattern) +nfa_update_absorption_flags(RPRNFAContext *ctx) { RPRNFAState *state; bool hasAbsorbable = false; @@ -5681,9 +5823,6 @@ nfa_update_absorption_flags(RPRNFAContext *ctx, RPRPattern *pattern) return; } - if (pattern == NULL) - return; - /* * Iterate through all states to check absorption status. Uses * state->isAbsorbable which tracks if state is in absorbable region. This @@ -5800,10 +5939,7 @@ nfa_try_absorb_context(WindowAggState *winstate, RPRNFAContext *ctx) int64 absorbedLen = ctx->lastProcessedRow - ctx->matchStartRow + 1; nfa_context_free(winstate, ctx); - winstate->nfaContextsAbsorbed++; - nfa_update_length_stats(winstate->nfaContextsAbsorbed, - &winstate->nfaAbsorbedLen, - absorbedLen); + nfa_record_context_absorbed(winstate, absorbedLen); return true; } } @@ -5853,6 +5989,9 @@ static inline bool nfa_eval_var_match(WindowAggState *winstate, RPRPatternElement *elem, bool *varMatched) { + /* This function should only be called for VAR elements */ + Assert(RPRElemIsVar(elem)); + if (varMatched == NULL) return false; if (elem->varId >= list_length(winstate->defineVariableList)) @@ -5884,17 +6023,19 @@ nfa_match(WindowAggState *winstate, RPRNFAContext *ctx, bool *varMatched) RPRPatternElement *elements = pattern->elements; RPRNFAState **prevPtr = &ctx->states; RPRNFAState *state; + RPRNFAState *nextState; /* * Evaluate VAR elements against current row. For simple VARs with END * next, advance to END and update group count inline so absorb phase can * compare states properly. */ - for (state = ctx->states; state != NULL;) + for (state = ctx->states; state != NULL; state = nextState) { - RPRNFAState *nextState = state->next; RPRPatternElement *elem = &elements[state->elemIdx]; + nextState = state->next; + if (RPRElemIsVar(elem)) { bool matched; @@ -5909,15 +6050,8 @@ nfa_match(WindowAggState *winstate, RPRNFAContext *ctx, bool *varMatched) if (count < RPR_COUNT_MAX) count++; - /* Check max constraint */ - if (elem->max != RPR_QUANTITY_INF && count > elem->max) - { - /* Exceeded max - remove state */ - *prevPtr = nextState; - nfa_state_free(winstate, state); - state = nextState; - continue; - } + /* Max constraint should not be exceeded */ + Assert(elem->max == RPR_QUANTITY_INF || count <= elem->max); state->counts[depth] = count; @@ -5926,33 +6060,30 @@ nfa_match(WindowAggState *winstate, RPRNFAContext *ctx, bool *varMatched) * and update group count inline. This keeps state in place, * preserving lexical order. */ - if (elem->min == 1 && elem->max == 1 && count == 1 && + if (elem->min == 1 && elem->max == 1 && RPRElemIsEnd(&elements[elem->next])) { RPRPatternElement *endElem = &elements[elem->next]; int endDepth = endElem->depth; int32 endCount = state->counts[endDepth]; + Assert(count == 1); + /* Increment group count with overflow protection */ if (endCount < RPR_COUNT_MAX) endCount++; - /* Check END's max constraint */ - if (endElem->max != RPR_QUANTITY_INF && endCount > endElem->max) - { - /* Exceeded END's max - remove state */ - *prevPtr = nextState; - nfa_state_free(winstate, state); - state = nextState; - continue; - } + /* + * END's max can never be exceeded here because + * nfa_advance_end only loops when count < max, + * so endCount entering inline advance is at most + * max-1, and incrementing yields at most max. + */ + Assert(endElem->max == RPR_QUANTITY_INF || + endCount <= endElem->max); state->elemIdx = elem->next; state->counts[endDepth] = endCount; - - /* Clear deeper counts */ - for (int d = endDepth + 1; d < pattern->maxDepth; d++) - state->counts[d] = 0; } /* else: stay at VAR for advance phase */ } @@ -5964,14 +6095,12 @@ nfa_match(WindowAggState *winstate, RPRNFAContext *ctx, bool *varMatched) */ *prevPtr = nextState; nfa_state_free(winstate, state); - state = nextState; continue; } } /* Non-VAR elements: keep as-is for advance phase */ prevPtr = &state->next; - state = nextState; } } @@ -5993,9 +6122,9 @@ nfa_route_to_elem(WindowAggState *winstate, RPRNFAContext *ctx, { RPRNFAState *skipState; - skipState = nfa_state_clone(winstate, nextElem->next, - state->altPriority, state->counts, - state->isAbsorbable); + skipState = nfa_state_create(winstate, nextElem->next, + state->altPriority, state->counts, + state->isAbsorbable); nfa_advance_state(winstate, ctx, skipState, currentPos, initialAdvance); } } @@ -6026,6 +6155,10 @@ nfa_advance_alt(WindowAggState *winstate, RPRNFAContext *ctx, RPRPatternElement *altElem = &elements[altIdx]; RPRNFAState *newState; + /* Stop if element is outside ALT scope (not a branch) */ + if (altElem->depth <= elem->depth) + break; + if (first) { state->elemIdx = altIdx; @@ -6035,8 +6168,8 @@ nfa_advance_alt(WindowAggState *winstate, RPRNFAContext *ctx, } else { - newState = nfa_state_clone(winstate, altIdx, altIdx, - state->counts, state->isAbsorbable); + newState = nfa_state_create(winstate, altIdx, altIdx, + state->counts, state->isAbsorbable); } /* Recursively process this branch before next */ @@ -6044,8 +6177,8 @@ nfa_advance_alt(WindowAggState *winstate, RPRNFAContext *ctx, altIdx = altElem->jump; } - if (first) - nfa_state_free(winstate, state); + /* ALT must have at least one branch */ + Assert(!first); } /* @@ -6063,24 +6196,28 @@ nfa_advance_begin(WindowAggState *winstate, RPRNFAContext *ctx, { RPRPattern *pattern = winstate->rpPattern; RPRPatternElement *elements = pattern->elements; + RPRNFAState *skipState = NULL; state->counts[elem->depth] = 0; - /* Optional group: create skip path */ + /* Optional group: create skip path (but don't route yet) */ if (elem->min == 0) { - RPRNFAState *skipState; - - skipState = nfa_state_clone(winstate, elem->jump, state->altPriority, + skipState = nfa_state_create(winstate, elem->jump, state->altPriority, state->counts, state->isAbsorbable); - nfa_route_to_elem(winstate, ctx, skipState, - &elements[elem->jump], currentPos, initialAdvance); } - /* Enter group: route to first child */ + /* Enter group: route to first child (lexically first) */ state->elemIdx = elem->next; nfa_route_to_elem(winstate, ctx, state, &elements[state->elemIdx], currentPos, initialAdvance); + + /* Now route skip path (lexically second) */ + if (skipState != NULL) + { + nfa_route_to_elem(winstate, ctx, skipState, + &elements[elem->jump], currentPos, initialAdvance); + } } /* @@ -6117,11 +6254,17 @@ nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, /* * Must exit: either reached max iterations, or group matched empty. * - * The second condition (count == 0 && min == 0) prevents infinite - * recursion with patterns like (A*)* where the inner group can match - * empty. If the group completed without consuming any input - * (count=0) and is optional (min=0), looping back would just repeat - * the empty match forever. So we force exit instead. + * FIXME: The (count == 0 && min == 0) condition is insufficient for + * cycle prevention. Cycles can occur at any count value when loop back + * happens without consuming rows. For example: + * Pattern: (A*)* + * After matching 3 A's (count=3), loop back at a B row + * Inner A* matches 0 times (skip path) → same (elemIdx, count=3) + * Infinite cycle at count=3, not count=0 + * + * Currently, cycles are silently prevented by nfa_add_state_unique + * detecting duplicate states, but this is implicit and not guaranteed + * for all code paths. Explicit cycle detection is needed. */ RPRPatternElement *nextElem; @@ -6155,8 +6298,8 @@ nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, * Create exit state first (need original counts before modifying * state) */ - exitState = nfa_state_clone(winstate, elem->next, exitAltPriority, - state->counts, state->isAbsorbable); + exitState = nfa_state_create(winstate, elem->next, exitAltPriority, + state->counts, state->isAbsorbable); exitState->counts[depth] = 0; nextElem = &elements[exitState->elemIdx]; @@ -6165,7 +6308,6 @@ nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, exitState->counts[nextElem->depth]++; /* Route loop state first (earlier in pattern = lexical order) */ - state->counts[depth] = count; for (int d = depth + 1; d < pattern->maxDepth; d++) state->counts[d] = 0; state->elemIdx = elem->jump; @@ -6196,14 +6338,17 @@ nfa_advance_var(WindowAggState *winstate, RPRNFAContext *ctx, bool canLoop = (elem->max == RPR_QUANTITY_INF || count < elem->max); bool canExit = (count >= elem->min); + /* After a successful match, count >= 1, so at least one must be true */ + Assert(canLoop || canExit); + if (canLoop && canExit) { /* Both: clone for loop, modify original for exit */ RPRNFAState *loopState; RPRPatternElement *nextElem; - loopState = nfa_state_clone(winstate, state->elemIdx, state->altPriority, - state->counts, state->isAbsorbable); + loopState = nfa_state_create(winstate, state->elemIdx, state->altPriority, + state->counts, state->isAbsorbable); nfa_add_state_unique(winstate, ctx, loopState); /* Exit: advance to next element */ @@ -6220,7 +6365,7 @@ nfa_advance_var(WindowAggState *winstate, RPRNFAContext *ctx, } else if (canExit) { - /* Exit only: modify state */ + /* Exit only: advance to next element */ RPRPatternElement *nextElem; state->counts[depth] = 0; @@ -6229,11 +6374,6 @@ nfa_advance_var(WindowAggState *winstate, RPRNFAContext *ctx, nfa_route_to_elem(winstate, ctx, state, nextElem, currentPos, initialAdvance); } - else - { - /* Neither - shouldn't happen, free state */ - nfa_state_free(winstate, state); - } } /* @@ -6311,106 +6451,3 @@ nfa_advance(WindowAggState *winstate, RPRNFAContext *ctx, int64 currentPos, nfa_advance_state(winstate, ctx, state, currentPos, initialAdvance); } } - -/* - * nfa_process_row - * - * Process all contexts for one row using the new flow: - * 1. Match all contexts (convergence) - evaluate VARs, prune dead states - * 2. Absorb redundant contexts - ideal timing after convergence - * 3. Advance all contexts (divergence) - create new states for next row - */ -static void -nfa_process_row(WindowAggState *winstate, int64 currentPos, - bool hasLimitedFrame, int64 frameOffset) -{ - RPRNFAContext *ctx; - bool *varMatched = winstate->nfaVarMatched; - - /* - * Phase 1: Match all contexts (convergence) Evaluate VAR elements, update - * counts, remove dead states. - */ - for (ctx = winstate->nfaContext; ctx != NULL; ctx = ctx->next) - { - if (ctx->states == NULL) - continue; - - /* Check frame boundary - finalize if exceeded */ - if (hasLimitedFrame) - { - int64 ctxFrameEnd = ctx->matchStartRow + frameOffset + 1; - - if (currentPos >= ctxFrameEnd) - { - /* - * Frame boundary: match with NULL (force mismatch), then - * advance - */ - nfa_match(winstate, ctx, NULL); - nfa_advance(winstate, ctx, ctxFrameEnd - 1, false); - continue; - } - } - - nfa_match(winstate, ctx, varMatched); - ctx->lastProcessedRow = currentPos; - } - - /* - * Phase 2: Absorb redundant contexts After match phase, states have - * converged - ideal for absorption. First update absorption flags that - * may have changed due to state removal. - */ - if (winstate->rpPattern->isAbsorbable) - { - RPRPattern *pattern = winstate->rpPattern; - - for (ctx = winstate->nfaContext; ctx != NULL; ctx = ctx->next) - nfa_update_absorption_flags(ctx, pattern); - - nfa_absorb_contexts(winstate); - } - - /* - * Phase 3: Advance all contexts (divergence) Create new states - * (loop/exit) from surviving matched states. - */ - for (ctx = winstate->nfaContext; ctx != NULL; ctx = ctx->next) - { - if (ctx->states == NULL) - continue; - - /* Skip contexts already finalized in phase 1 */ - if (hasLimitedFrame) - { - int64 ctxFrameEnd = ctx->matchStartRow + frameOffset + 1; - - if (currentPos >= ctxFrameEnd) - continue; - } - - nfa_advance(winstate, ctx, currentPos, false); - } -} - -/* - * nfa_finalize_all_contexts - * - * Finalize all active contexts when partition ends. - * Match with NULL to force mismatch, then advance to process epsilon transitions. - */ -static void -nfa_finalize_all_contexts(WindowAggState *winstate, int64 lastPos) -{ - RPRNFAContext *ctx; - - for (ctx = winstate->nfaContext; ctx != NULL; ctx = ctx->next) - { - if (ctx->states != NULL) - { - nfa_match(winstate, ctx, NULL); - nfa_advance(winstate, ctx, lastPos, false); - } - } -} diff --git a/src/backend/optimizer/plan/rpr.c b/src/backend/optimizer/plan/rpr.c index 67710a94a0d..112ed034fe2 100644 --- a/src/backend/optimizer/plan/rpr.c +++ b/src/backend/optimizer/plan/rpr.c @@ -89,12 +89,10 @@ static void fillRPRPattern(RPRPatternNode *node, RPRPattern *pat, static void finalizeRPRPattern(RPRPattern *result); /* Forward declarations - context absorption */ -static bool isUnboundedStart(RPRPattern *pattern, RPRElemIdx idx, - RPRDepth parentDepth); +static bool isUnboundedStart(RPRPattern *pattern, RPRElemIdx idx); static void computeAbsorbabilityRecursive(RPRPattern *pattern, RPRElemIdx startIdx, - bool *hasAbsorbable, - RPRDepth parentDepth); + bool *hasAbsorbable); static void computeAbsorbability(RPRPattern *pattern); /* @@ -1184,13 +1182,13 @@ fillRPRPatternGroup(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth de endElem->min = node->min; endElem->max = (node->max == INT_MAX) ? RPR_QUANTITY_INF : node->max; endElem->next = RPR_ELEMIDX_INVALID; - endElem->jump = groupStartIdx; /* loop to first child */ + endElem->jump = groupStartIdx; /* loop to first child */ if (node->reluctant >= 0) endElem->flags |= RPR_ELEM_RELUCTANT; (*idx)++; /* Set BEGIN skip pointer (next is set by finalize) */ - beginElem->jump = *idx; /* skip: go to after END */ + beginElem->jump = *idx; /* skip: go to after END */ } } @@ -1439,121 +1437,79 @@ finalizeRPRPattern(RPRPattern *result) /* * isUnboundedStart - * Check if the element at idx starts an unbounded sequence. + * Check if the element at idx starts an unbounded greedy sequence. * - * For context absorption to work, the sequence starting at idx must be - * unbounded (max = infinity) so that we can "shift" by decrementing count. + * For context absorption to work, the sequence starting at idx must be: + * - Unbounded (max = infinity) + * - Greedy (not reluctant) + * - At the start of current scope * * Algorithm: - * - Traverse elements within current scope (bounded by parentDepth) - * - For GROUP: must be unbounded AND contain only simple {1,1} VARs - * - Check if sequence starts with unbounded element (VAR or GROUP END) + * - Traverse elements within current scope (parentDepth to startDepth) + * - For GROUP: must be unbounded greedy AND contain only simple {1,1} VARs + * - Sets ABSORBABLE and ABSORBABLE_BRANCH flags on matching elements * * Two cases are handled: - * 1. Simple var: A+ B C - first element A has max=INF - * 2. Group: (A B){2,} C - group END has max=INF, and all elements - * inside the group must be simple {1,1} vars (no nested complexity) + * 1. Simple VAR: A+ B C - A has max=INF, gets both flags + * 2. Group: (A B)+ C - END has max=INF, all children are {1,1} VARs + * A,B,END get ABSORBABLE_BRANCH, only END gets ABSORBABLE * * Returns false for patterns where absorption cannot work: * - A B+ (unbounded not at start) - * - (A | B){2,} (ALT inside group) - * - (A B+){2,} (unbounded element inside group) - * - ((A B){2,} C){3,} (nested groups) + * - A+? B (reluctant quantifier) + * - (A | B)+ (ALT inside group) + * - (A B+)+ (unbounded element inside group) + * - ((A B)+ C)+ (nested unbounded groups) */ static bool -isUnboundedStart(RPRPattern *pattern, RPRElemIdx idx, RPRDepth parentDepth) +isUnboundedStart(RPRPattern *pattern, RPRElemIdx idx) { RPRPatternElement *elem = &pattern->elements[idx]; RPRDepth startDepth = elem->depth; RPRPatternElement *nextElem; RPRPatternElement *e; + /* Case 1: Simple unbounded VAR at start (greedy only) */ + if (RPRElemIsVar(elem) && elem->max == RPR_QUANTITY_INF && + !RPRElemIsReluctant(elem)) + { + /* Set both flags on first element */ + elem->flags |= RPR_ELEM_ABSORBABLE_BRANCH | RPR_ELEM_ABSORBABLE; + return true; + } + /* - * Traverse elements until we exit the current scope. For groups, check if - * all elements inside are simple {1,1} vars. Groups with complex - * quantifiers (e.g., (A+ B)+) are not absorbable. Uses parentDepth as - * upper boundary to detect scope exit. + * Case 2: Unbounded GROUP - traverse siblings at startDepth and check if + * they're all simple {1,1} VARs, then check if END at startDepth - 1 is + * unbounded greedy. */ - for (e = elem; !RPRElemIsFin(e); e = nextElem) + for (e = elem; e->depth == startDepth; e = nextElem) { - /* Check for unbounded END at enclosing depth */ - if (e->depth < startDepth) - { - /* Case 2: Unbounded group - END element points back to idx */ - if (e->depth == startDepth - 1 && - RPRElemIsEnd(e) && e->max == RPR_QUANTITY_INF && - !RPRElemIsReluctant(e)) - { - Assert(e->jump == idx); /* END-based break ensures this */ - - /* - * Set ABSORBABLE_BRANCH on all elements, ABSORBABLE on END - * only - */ - for (e = elem; !RPRElemIsEnd(e); e = &pattern->elements[e->next]) - e->flags |= RPR_ELEM_ABSORBABLE_BRANCH; - e->flags |= RPR_ELEM_ABSORBABLE_BRANCH | RPR_ELEM_ABSORBABLE; - return true; - } - break; - } - - /* Element must be simple {1,1} VAR */ - if (e->depth == startDepth && - (!RPRElemIsVar(e) || e->min != 1 || e->max != 1)) - break; + /* Must be simple {1,1} VAR */ + if (!RPRElemIsVar(e) || e->min != 1 || e->max != 1) + return false; - /* Next must be valid (will break before reaching FIN) */ Assert(e->next != RPR_ELEMIDX_INVALID); nextElem = &pattern->elements[e->next]; - - /* Break if next element exits current scope */ - if (nextElem->depth < parentDepth || nextElem->depth > startDepth) - break; } - /* Case 1: Simple unbounded var at start (greedy only) */ - if (RPRElemIsVar(elem) && elem->max == RPR_QUANTITY_INF && - !RPRElemIsReluctant(elem)) + /* Now e should be END at startDepth - 1 */ + if (e->depth == startDepth - 1 && + RPRElemIsEnd(e) && e->max == RPR_QUANTITY_INF && + !RPRElemIsReluctant(e)) { - /* Set both flags on first element */ - elem->flags |= RPR_ELEM_ABSORBABLE_BRANCH | RPR_ELEM_ABSORBABLE; + Assert(e->jump == idx); /* END points back to first child */ + + /* Set ABSORBABLE_BRANCH on all children, ABSORBABLE on END only */ + for (e = elem; !RPRElemIsEnd(e); e = &pattern->elements[e->next]) + e->flags |= RPR_ELEM_ABSORBABLE_BRANCH; + e->flags |= RPR_ELEM_ABSORBABLE_BRANCH | RPR_ELEM_ABSORBABLE; return true; } return false; } -/* - * computeAbsorbability - * Determine if pattern supports context absorption optimization. - * - * Context absorption eliminates redundant match searches by absorbing - * newer contexts that cannot produce longer matches than older contexts. - * This achieves O(n^2) -> O(n) performance improvement. - * - * Only greedy unbounded quantifiers at pattern start can be absorbable. - * Reluctant quantifiers are excluded because they don't maintain monotonic - * decrease property required for safe absorption. - * - * This function sets two flags: - * RPR_ELEM_ABSORBABLE: Absorption judgment point - * - Simple unbounded VAR: the VAR itself (e.g., A in A+) - * - Unbounded GROUP: the END element (e.g., END in (A B)+) - * RPR_ELEM_ABSORBABLE_BRANCH: All elements in absorbable region - * - All elements within the same scope as unbounded start - * - * Examples: - * A+ B C - absorbable (A gets both flags) - * (A B)+ C - absorbable (A,B,END get BRANCH, END gets ABSORBABLE) - * A B+ - NOT absorbable (unbounded not at start) - * A+? B C - NOT absorbable (reluctant quantifier) - * (A+ B+)+ - only first A+ on first iteration (nested unbounded not supported) - * A+ | B+ - both branches absorbable independently - * A+ | C D - only A+ branch absorbable (C D branch not absorbable) - * ((A+ B) | C) D - nested ALT: A+ branch is absorbable - */ - /* * computeAbsorbabilityRecursive * Recursively check absorbability starting from given index. @@ -1562,14 +1518,14 @@ isUnboundedStart(RPRPattern *pattern, RPRElemIdx idx, RPRDepth parentDepth) * Each branch gets its own absorbability status, and if any branch is absorbable, * the ALT element itself is marked with RPR_ELEM_ABSORBABLE_BRANCH. * - * Otherwise, checks if the element starts an unbounded sequence via isUnboundedStart. + * If BEGIN, skips to first child. * - * parentDepth acts as a scope boundary, preventing checks from crossing into - * enclosing contexts. + * Otherwise (VAR), checks if the element starts an unbounded sequence via + * isUnboundedStart. */ static void computeAbsorbabilityRecursive(RPRPattern *pattern, RPRElemIdx startIdx, - bool *hasAbsorbable, RPRDepth parentDepth) + bool *hasAbsorbable) { RPRPatternElement *elem = &pattern->elements[startIdx]; @@ -1587,9 +1543,12 @@ computeAbsorbabilityRecursive(RPRPattern *pattern, RPRElemIdx startIdx, Assert(branchIdx < pattern->numElements); branchFirst = &pattern->elements[branchIdx]; - /* Recursively check this branch with ALT depth + 1 as boundary */ - computeAbsorbabilityRecursive(pattern, branchIdx, &branchAbsorbable, - elem->depth + 1); + /* Stop if element is outside ALT scope (not a branch) */ + if (branchFirst->depth <= elem->depth) + break; + + /* Recursively check this branch */ + computeAbsorbabilityRecursive(pattern, branchIdx, &branchAbsorbable); if (branchAbsorbable) { *hasAbsorbable = true; @@ -1605,7 +1564,7 @@ computeAbsorbabilityRecursive(RPRPattern *pattern, RPRElemIdx startIdx, else if (RPRElemIsBegin(elem)) { /* BEGIN: skip to first child and check that */ - computeAbsorbabilityRecursive(pattern, elem->next, hasAbsorbable, parentDepth); + computeAbsorbabilityRecursive(pattern, elem->next, hasAbsorbable); /* Mark BEGIN element if contents are absorbable */ if (*hasAbsorbable) @@ -1613,14 +1572,46 @@ computeAbsorbabilityRecursive(RPRPattern *pattern, RPRElemIdx startIdx, } else { + /* Should never reach END - structural invariant of pattern AST */ + Assert(!RPRElemIsEnd(elem)); + /* Non-ALT, non-BEGIN: check if unbounded start */ - if (isUnboundedStart(pattern, startIdx, parentDepth)) + if (isUnboundedStart(pattern, startIdx)) { *hasAbsorbable = true; } } } +/* + * computeAbsorbability + * Determine if pattern supports context absorption optimization. + * + * Context absorption eliminates redundant match searches by absorbing + * newer contexts that cannot produce longer matches than older contexts. + * This achieves O(n^2) -> O(n) performance improvement. + * + * Only greedy unbounded quantifiers at pattern start can be absorbable. + * Reluctant quantifiers are excluded because they don't maintain monotonic + * decrease property required for safe absorption. + * + * This function sets two flags: + * RPR_ELEM_ABSORBABLE: Absorption judgment point + * - Simple unbounded VAR: the VAR itself (e.g., A in A+) + * - Unbounded GROUP: the END element (e.g., END in (A B)+) + * RPR_ELEM_ABSORBABLE_BRANCH: All elements in absorbable region + * - All elements within the same scope as unbounded start + * + * Examples: + * A+ B C - absorbable (A gets both flags) + * (A B)+ C - absorbable (A,B,END get BRANCH, END gets ABSORBABLE) + * A B+ - NOT absorbable (unbounded not at start) + * A+? B C - NOT absorbable (reluctant quantifier) + * (A+ B+)+ - only first A+ on first iteration (nested unbounded not supported) + * A+ | B+ - both branches absorbable independently + * A+ | C D - only A+ branch absorbable (C D branch not absorbable) + * ((A+ B) | C) D - nested ALT: A+ branch is absorbable + */ static void computeAbsorbability(RPRPattern *pattern) { @@ -1629,8 +1620,8 @@ computeAbsorbability(RPRPattern *pattern) /* Parser always produces at least one element + FIN */ Assert(pattern->numElements >= 2); - /* Start recursion with parentDepth=0 (top level) */ - computeAbsorbabilityRecursive(pattern, 0, &hasAbsorbable, 0); + /* Start recursion from first element */ + computeAbsorbabilityRecursive(pattern, 0, &hasAbsorbable); pattern->isAbsorbable = hasAbsorbable; } diff --git a/src/backend/parser/parse_rpr.c b/src/backend/parser/parse_rpr.c index 80ebf3c33f8..9e1fa228759 100644 --- a/src/backend/parser/parse_rpr.c +++ b/src/backend/parser/parse_rpr.c @@ -90,17 +90,9 @@ transformRPR(ParseState *pstate, WindowClause *wc, WindowDef *windef, /* Frame must start at current row */ if ((wc->frameOptions & FRAMEOPTION_START_CURRENT_ROW) == 0) { - const char *frameType = "GROUPS"; + const char *frameType = "ROWS"; const char *startBound = "unknown"; - /* Determine frame type */ - if (wc->frameOptions & FRAMEOPTION_ROWS) - frameType = "ROWS"; - else if (wc->frameOptions & FRAMEOPTION_RANGE) - frameType = "RANGE"; - else - frameType = "GROUPS"; - /* Determine current start bound */ if (wc->frameOptions & FRAMEOPTION_START_UNBOUNDED_PRECEDING) startBound = "UNBOUNDED PRECEDING"; diff --git a/src/include/optimizer/rpr.h b/src/include/optimizer/rpr.h index 8e1bc47643c..7a8938dbebd 100644 --- a/src/include/optimizer/rpr.h +++ b/src/include/optimizer/rpr.h @@ -22,7 +22,7 @@ #define RPR_COUNT_MAX INT32_MAX /* max runtime count (NFA state) */ #define RPR_ELEMIDX_MAX INT16_MAX /* max pattern elements */ #define RPR_ELEMIDX_INVALID ((RPRElemIdx) -1) /* invalid index */ -#define RPR_DEPTH_MAX (UINT8_MAX - 1) /* max pattern nesting depth: 254 */ +#define RPR_DEPTH_MAX (UINT8_MAX - 1) /* max pattern nesting depth: 254 */ #define RPR_DEPTH_NONE UINT8_MAX /* no enclosing group (top-level) */ /* Special varId values for control elements (252-255) */ diff --git a/src/test/regress/expected/rpr.out b/src/test/regress/expected/rpr.out index 8c8ff90e1cd..c921badb006 100644 --- a/src/test/regress/expected/rpr.out +++ b/src/test/regress/expected/rpr.out @@ -826,6 +826,46 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER company2 | 07-10-2023 | 1300 | | (20 rows) +-- nth_value beyond reduced frame (no IGNORE NULLS) +-- Tests WinGetSlotInFrame/WinGetFuncArgInFrame out-of-frame with RPR +SELECT company, tdate, price, + nth_value(price, 5) OVER w AS nth_5 +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | nth_5 +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | + company1 | 07-02-2023 | 200 | + company1 | 07-03-2023 | 150 | + company1 | 07-04-2023 | 140 | + company1 | 07-05-2023 | 150 | + company1 | 07-06-2023 | 90 | + company1 | 07-07-2023 | 110 | + company1 | 07-08-2023 | 130 | + company1 | 07-09-2023 | 120 | + company1 | 07-10-2023 | 130 | + company2 | 07-01-2023 | 50 | + company2 | 07-02-2023 | 2000 | + company2 | 07-03-2023 | 1500 | + company2 | 07-04-2023 | 1400 | + company2 | 07-05-2023 | 1500 | + company2 | 07-06-2023 | 60 | + company2 | 07-07-2023 | 1100 | + company2 | 07-08-2023 | 1300 | + company2 | 07-09-2023 | 1200 | + company2 | 07-10-2023 | 1300 | +(20 rows) + -- backtracking with reclassification of rows -- using AFTER MATCH SKIP PAST LAST ROW SELECT company, tdate, price, first_value(tdate) OVER w, last_value(tdate) OVER w @@ -1165,6 +1205,33 @@ count(*) OVER w 07-07-2023 | 1100 | | 0 (14 rows) +-- ReScan test: LATERAL join forces WindowAgg rescan with RPR +-- Tests ExecReScanWindowAgg clearing prev_slot/next_slot +SELECT g.x, sub.* +FROM generate_series(1, 2) g(x), +LATERAL ( + SELECT id, price, count(*) OVER w AS c + FROM (VALUES (1, 100), (2, 200), (3, 150)) AS t(id, price) + WHERE id <= g.x + 1 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (START UP+) + DEFINE + START AS TRUE, + UP AS price > PREV(price) + ) +) sub +ORDER BY g.x, sub.id; + x | id | price | c +---+----+-------+--- + 1 | 1 | 100 | 2 + 1 | 2 | 200 | 0 + 2 | 1 | 100 | 2 + 2 | 2 | 200 | 0 + 2 | 3 | 150 | 0 +(5 rows) + -- PREV has multiple column reference CREATE TEMP TABLE rpr1 (id INTEGER, i SERIAL, j INTEGER); INSERT INTO rpr1(id, j) SELECT 1, g*2 FROM generate_series(1, 10) AS g; @@ -1352,6 +1419,46 @@ WITH data AS ( 13 | 4 | | (4 rows) +-- nth_value beyond reduced frame with IGNORE NULLS +-- Tests ignorenulls_getfuncarginframe early out-of-frame check +SELECT company, tdate, price, + nth_value(price, 5) IGNORE NULLS OVER w AS nth_5_in +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | nth_5_in +----------+------------+-------+---------- + company1 | 07-01-2023 | 100 | + company1 | 07-02-2023 | 200 | + company1 | 07-03-2023 | 150 | + company1 | 07-04-2023 | 140 | + company1 | 07-05-2023 | 150 | + company1 | 07-06-2023 | 90 | + company1 | 07-07-2023 | 110 | + company1 | 07-08-2023 | 130 | + company1 | 07-09-2023 | 120 | + company1 | 07-10-2023 | 130 | + company2 | 07-01-2023 | 50 | + company2 | 07-02-2023 | 2000 | + company2 | 07-03-2023 | 1500 | + company2 | 07-04-2023 | 1400 | + company2 | 07-05-2023 | 1500 | + company2 | 07-06-2023 | 60 | + company2 | 07-07-2023 | 1100 | + company2 | 07-08-2023 | 1300 | + company2 | 07-09-2023 | 1200 | + company2 | 07-10-2023 | 1300 | +(20 rows) + -- View and pg_get_viewdef tests. CREATE TEMP VIEW v_window AS SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, @@ -2882,11 +2989,10 @@ WINDOW w AS ( -- Row 2: B C D (2-4) <- ends first! -- Row 3: C D E F (3-6) <- ends last! -- Test 1b: Longer pattern FAILS, shorter pattern should survive --- Pattern: A+ B C D | A+ B (greedy at front for absorption) --- Data: A B C X (D expected but X found) --- A+ B C D fails at row 4 (X instead of D) --- A+ B succeeds at row 2 --- Result should be match 1-2 (A+ B) +-- Pattern: A+ B C D E | B+ C +-- A+ B C D E fails (no E found in sequence) +-- B+ C matches at rows 2-3 +-- Result: match 2-3 (B+ C) WITH test_overlap1b AS ( SELECT * FROM (VALUES (1, ARRAY['A']), @@ -3179,8 +3285,7 @@ WINDOW w AS ( (5 rows) -- Pattern A+ is absorbable (unbounded first element, only one unbounded) --- Without absorption: 4 matches (1-4, 2-4, 3-4, 4-4) --- With absorption: Row 1 match (1-4), rows 2-4 absorbed +-- 4 matches: (1-4, 2-4, 3-4, 4-4) -- Test absorption 2: A+ B pattern - absorption with fixed suffix WITH test_absorb_suffix AS ( SELECT * FROM (VALUES @@ -3213,7 +3318,7 @@ WINDOW w AS ( -- Pattern A+ B is absorbable (A+ unbounded first, B bounded suffix) -- All potential matches end at same row (row 4 with B) --- With absorption: Row 1 match (1-4), rows 2-3 absorbed +-- 3 matches: (1-4, 2-4, 3-4) -- Test absorption 3: Per-branch absorption with ALT (B+ C | B+ D) WITH test_absorb_alt AS ( SELECT * FROM (VALUES @@ -3246,8 +3351,7 @@ WINDOW w AS ( (5 rows) -- Both branches B+ C and B+ D are absorbable (B+ unbounded first) --- B+ D branch matches: potential 1-4, 2-4, 3-4 --- Row 1 (1-4) absorbs Row 2 (2-4) and Row 3 (3-4) - same endpoint +-- B+ D branch matches: 3 matches (1-4, 2-4, 3-4) -- Test absorption 4: Non-absorbable pattern (A B+ - unbounded not first) WITH test_no_absorb AS ( SELECT * FROM (VALUES @@ -3315,8 +3419,7 @@ WINDOW w AS ( (7 rows) -- Pattern optimized: (A B) (A B)+ -> (A B){2,} --- Potential matches: 1-6 (3 reps), 3-6 (2 reps), 5-6 needs 2 reps (fail) --- Row 1 (1-6) absorbs Row 3 (3-6) - same endpoint, top-level unbounded GROUP +-- 2 matches: 1-6 (3 reps), 3-6 (2 reps) -- Test absorption 6: Multiple unbounded - first element unbounded enables absorption WITH test_multi_unbounded AS ( SELECT * FROM (VALUES @@ -3347,7 +3450,7 @@ WINDOW w AS ( 5 | {X} | | (5 rows) --- Row 1: 1-4, Row 2: absorbed (same endpoint 4) +-- 2 matches: 1-4, 2-4 (same endpoint 4) -- ============================================ -- Jacob's RPR Patterns (from jacob branch) -- ============================================ @@ -3671,7 +3774,6 @@ WINDOW w AS ( -- Alternation with quantifiers (BUG cases from Jacob's tests) -- ============================================ -- Test: (A | B)+ C - alternation inside quantified group followed by C --- BUG: Currently returns NULL instead of matching 1-4 WITH jacob_alt_quant AS ( SELECT * FROM (VALUES (1, ARRAY['A']), @@ -3702,7 +3804,6 @@ WINDOW w AS ( -- Expected: 1-4 (A B A C) -- Test: ((A | B) C)+ - alternation inside group with outer quantifier --- Currently returns two separate matches (1-2, 3-4) instead of single 1-4 WITH jacob_alt_group AS ( SELECT * FROM (VALUES (1, ARRAY['A']), @@ -3737,8 +3838,7 @@ WINDOW w AS ( -- ============================================ -- RELUCTANT quantifiers (not yet supported) -- ============================================ --- Test: A+? B (reluctant) - CURRENTLY ACTS LIKE A+ (GREEDY) --- Parser accepts the syntax but executor doesn't differentiate +-- Test: A+? B (reluctant) - parser rejects with ERROR WITH jacob_reluctant AS ( SELECT * FROM (VALUES (1, ARRAY['A']), @@ -3761,9 +3861,8 @@ WINDOW w AS ( ERROR: reluctant quantifiers are not yet supported LINE 15: PATTERN (A+? B) ^ --- Current: 1-4 (A A A B) - greedy behavior --- Expected when RELUCTANT implemented: 3-4 (A B) - shortest A before B --- Test: A{1,3}? B (reluctant bounded) +-- Expected: ERROR (reluctant quantifiers not yet supported) +-- Test: A{1,3}? B (reluctant bounded) - parser rejects with ERROR WITH jacob_reluctant_bounded AS ( SELECT * FROM (VALUES (1, ARRAY['A']), @@ -3786,8 +3885,7 @@ WINDOW w AS ( ERROR: reluctant quantifiers are not yet supported LINE 15: PATTERN (A{1,3}? B) ^ --- Current: 1-4 (greedy, takes max A before B) --- Expected when RELUCTANT implemented: 3-4 (takes min A before B) +-- Expected: ERROR (reluctant quantifiers not yet supported) -- ============================================ -- Nested quantifiers (pathological patterns) -- ============================================ diff --git a/src/test/regress/expected/rpr_base.out b/src/test/regress/expected/rpr_base.out index a1f11bd61ce..ec67a099ee6 100644 --- a/src/test/regress/expected/rpr_base.out +++ b/src/test/regress/expected/rpr_base.out @@ -233,7 +233,7 @@ WINDOW w AS ( ERROR: row pattern definition variable name "a" appears more than once in DEFINE clause LINE 7: DEFINE A AS id > 0, A AS id < 10 ^ --- Expected: ERROR: row pattern definition variable name "A" appears more than once in DEFINE clause +-- Expected: ERROR: row pattern definition variable name "a" appears more than once in DEFINE clause DROP TABLE rpr_dup; -- Boolean coercion CREATE TABLE rpr_bool (id INT, flag BOOLEAN); @@ -438,7 +438,7 @@ ERROR: FRAME option RANGE is not permitted when row pattern recognition is used LINE 5: RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWIN... ^ HINT: Use: ROWS instead --- Expected: ERROR: FRAME must start at current row when row pattern recognition is used +-- Expected: ERROR: FRAME option RANGE is not permitted when row pattern recognition is used -- GROUPS frame not starting at CURRENT ROW SELECT COUNT(*) OVER w FROM rpr_frame @@ -452,7 +452,7 @@ ERROR: FRAME option GROUP is not permitted when row pattern recognition is used LINE 5: GROUPS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWI... ^ HINT: Use: ROWS instead --- Expected: ERROR: FRAME must start at current row when row pattern recognition is used +-- Expected: ERROR: FRAME option GROUP is not permitted when row pattern recognition is used -- Starting with N PRECEDING SELECT COUNT(*) OVER w FROM rpr_frame @@ -496,7 +496,7 @@ WINDOW w AS ( ERROR: frame starting from current row cannot have preceding rows LINE 5: ROWS BETWEEN CURRENT ROW AND 1 PRECEDING ^ --- Expected: ERROR: frame end cannot be before frame start +-- Expected: ERROR: frame starting from current row cannot have preceding rows -- End before start: CURRENT ROW AND UNBOUNDED PRECEDING SELECT COUNT(*) OVER w FROM rpr_frame @@ -509,7 +509,7 @@ WINDOW w AS ( ERROR: frame end cannot be UNBOUNDED PRECEDING LINE 5: ROWS BETWEEN CURRENT ROW AND UNBOUNDED PRECEDING ^ --- Expected: ERROR: frame end cannot be before frame start +-- Expected: ERROR: frame end cannot be UNBOUNDED PRECEDING -- Single row frame: CURRENT ROW AND CURRENT ROW SELECT id, val, COUNT(*) OVER w as cnt FROM rpr_frame @@ -594,14 +594,7 @@ ORDER BY id; 6 | 30 | 1 (6 rows) --- RANGE: includes all rows with same ORDER BY value --- Expected result: Includes peer rows (same val) in range calculation --- id=1 (val=10): range [10,20] includes all val=10 and val=20 peers -> cnt=2 (only first in peer group gets match) --- id=2,3 (val=10): already matched by id=1 -> cnt=0 --- id=4 (val=20): range [20,30] includes all val=20 and val=30 peers -> cnt=2 --- id=5 (val=20): already matched by id=4 -> cnt=0 --- id=6 (val=30): range [30,40] includes only val=30 -> cnt=1 --- Result: [2,0,0,2,0,1] +-- RANGE frame with RPR (not permitted) SELECT id, val, COUNT(*) OVER w as cnt FROM rpr_frame WINDOW w AS ( @@ -616,14 +609,8 @@ ERROR: FRAME option RANGE is not permitted when row pattern recognition is used LINE 5: RANGE BETWEEN CURRENT ROW AND 10 FOLLOWING ^ HINT: Use: ROWS instead --- GROUPS: treats rows with same value as one group (1 FOLLOWING = next group) --- Expected result: 1 FOLLOWING means current group + 1 next group --- id=1 (val=10): groups [val=10, val=20] -> cnt=2 (only first in group gets match) --- id=2,3 (val=10): already matched by id=1 -> cnt=0 --- id=4 (val=20): groups [val=20, val=30] -> cnt=2 --- id=5 (val=20): already matched by id=4 -> cnt=0 --- id=6 (val=30): groups [val=30] (no next group) -> cnt=1 --- Result: [2,0,0,2,0,1] +-- Expected: ERROR: FRAME option RANGE is not permitted when row pattern recognition is used +-- GROUPS frame with RPR (not permitted) SELECT id, val, COUNT(*) OVER w as cnt FROM rpr_frame WINDOW w AS ( @@ -638,6 +625,7 @@ ERROR: FRAME option GROUP is not permitted when row pattern recognition is used LINE 5: GROUPS BETWEEN CURRENT ROW AND 1 FOLLOWING ^ HINT: Use: ROWS instead +-- Expected: ERROR: FRAME option GROUP is not permitted when row pattern recognition is used DROP TABLE rpr_frame; -- ============================================================ -- PARTITION BY + FRAME Tests @@ -648,7 +636,6 @@ INSERT INTO rpr_partition VALUES (1, 1, 10), (2, 1, 20), (3, 1, 30), (4, 2, 15), (5, 2, 25), (6, 2, 35); -- PARTITION BY with ROWS frame --- Expected: Pattern matching should reset for each partition SELECT id, grp, val, COUNT(*) OVER w as cnt FROM rpr_partition WINDOW w AS ( @@ -670,8 +657,8 @@ ORDER BY id; 6 | 2 | 35 | 0 (6 rows) +-- Expected: Pattern matching should reset for each partition -- PARTITION BY with RANGE frame --- Expected: Each partition processed independently SELECT id, grp, val, COUNT(*) OVER w as cnt FROM rpr_partition WINDOW w AS ( @@ -687,6 +674,7 @@ ERROR: FRAME option RANGE is not permitted when row pattern recognition is used LINE 6: RANGE BETWEEN CURRENT ROW AND 10 FOLLOWING ^ HINT: Use: ROWS instead +-- Expected: ERROR: FRAME option RANGE is not permitted when row pattern recognition is used DROP TABLE rpr_partition; -- ============================================================ -- PATTERN Syntax Tests @@ -2659,7 +2647,7 @@ WINDOW w AS ( ERROR: invalid input syntax for type integer: "string" LINE 7: DEFINE A AS val > 'string' ^ --- Expected: ERROR: operator does not exist or type mismatch +-- Expected: ERROR: invalid input syntax for type integer: "string" -- Aggregate function in DEFINE (if not allowed) SELECT COUNT(*) OVER w FROM rpr_err @@ -2922,7 +2910,7 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING -> Seq Scan on rpr_plan (6 rows) --- ALT flatten: (A | (B | C)) -> (a | b | c) +-- ALT flatten: (A | (B | C))+ -> (a | b | c)+ EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -3154,7 +3142,7 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING -> Seq Scan on rpr_plan (6 rows) --- Combined optimization: A A (B B)+ B B C C C -> a{2} (b{2}){3,} c{3} +-- Combined optimization: A A (B B)+ B B C C C -> a{2} (b{2}){2,} c{3} EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -3170,7 +3158,7 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING -> Seq Scan on rpr_plan (6 rows) --- Consecutive GROUP merge with unbounded: (A+){2} -> (a+){2} +-- Consecutive GROUP merge with unbounded: (A+) (A+) -> a{2,} -- Tests mergeConsecutiveGroups with child->max == INF EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan @@ -3355,8 +3343,8 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING -> Seq Scan on rpr_plan (6 rows) --- Unwrap GROUP{1,1}: (A | B | C) -> A | B | C --- Tests tryUnwrapGroup removing redundant GROUP +-- Unwrap GROUP{1,1}: ((A | B | C)) -> (a | b | c) +-- Tests tryUnwrapGroup removing redundant outer GROUP EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -4040,7 +4028,6 @@ ORDER BY id; CREATE TABLE rpr_fallback (id INT, val INT); INSERT INTO rpr_fallback VALUES (1, 10), (2, 20); -- Test: min quantifier overflow causes optimization fallback (min == max case) --- Expected: Fallback - pattern not merged due to min overflow (4000000000 > INT32_MAX) EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_fallback WINDOW w AS ( @@ -4059,8 +4046,8 @@ WINDOW w AS ( -> Seq Scan on rpr_fallback (6 rows) +-- Expected: Fallback - pattern not merged due to min overflow (4000000000 > INT32_MAX) -- Test: max-only quantifier overflow causes optimization fallback --- Expected: Fallback - min OK (2*1=2), but max overflow (2*2000000000 > INT32_MAX) EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_fallback WINDOW w AS ( @@ -4079,8 +4066,8 @@ WINDOW w AS ( -> Seq Scan on rpr_fallback (6 rows) --- Test: min quantifier overflow causes optimization fallback (min != max case) --- Expected: Fallback - pattern not merged due to min overflow +-- Expected: Fallback - min OK (2*1=2), but max overflow (2*2000000000 > INT32_MAX) +-- Test: max quantifier exceeds valid range (2147483647 = INT_MAX, limit is 2147483646) EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_fallback WINDOW w AS ( @@ -4092,8 +4079,8 @@ WINDOW w AS ( ERROR: quantifier bounds must be between 0 and 2147483646 with max >= 1 LINE 6: PATTERN ((A{2000000000,2147483647}){2}) ^ +-- Expected: ERROR at parse time before optimization -- Test: nested unbounded with large min causes overflow fallback --- Expected: Fallback - min overflow (2000000000 * 2000000000 > INT32_MAX) EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_fallback WINDOW w AS ( @@ -4112,8 +4099,8 @@ WINDOW w AS ( -> Seq Scan on rpr_fallback (6 rows) +-- Expected: Fallback - min overflow (2000000000 * 2000000000 > INT32_MAX) -- Test: prefix mismatch causes optimization fallback --- Expected: Fallback - prefix elements don't match GROUP content EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_fallback WINDOW w AS ( @@ -4132,6 +4119,7 @@ WINDOW w AS ( -> Seq Scan on rpr_fallback (6 rows) +-- Expected: Fallback - prefix elements don't match GROUP content DROP TABLE rpr_fallback; -- ============================================================ -- Planner Integration Tests @@ -4288,6 +4276,7 @@ ORDER BY category; ERROR: syntax error at or near "GROUP" LINE 12: GROUP BY category ^ +-- Expected: ERROR (GROUP BY with window RPR not supported) -- ============================================================ -- Subquery and CTE Tests -- Files: planner.c, prepjointree.c @@ -5015,7 +5004,6 @@ DROP TABLE rpr_stress; CREATE TABLE rpr_errors (id INT, val INT); INSERT INTO rpr_errors VALUES (1, 10), (2, 20); -- Test: PATTERN variable without DEFINE (A), DEFINE variable not in PATTERN (B) --- Expected: Success - A is implicitly TRUE, B is filtered out SELECT id, val, COUNT(*) OVER w FROM rpr_errors WINDOW w AS ( ORDER BY id @@ -5030,8 +5018,8 @@ WINDOW w AS ( 2 | 20 | 0 (2 rows) +-- Expected: Success - A is implicitly TRUE, B is filtered out -- Test: 3 variables in PATTERN, 253 in DEFINE (DEFINE filtering test) --- Expected: Success - unused DEFINE variables are filtered out SELECT COUNT(*) OVER w FROM rpr_errors WINDOW w AS ( ORDER BY id @@ -5071,8 +5059,8 @@ WINDOW w AS ( 0 (2 rows) --- Test: 251 variables in PATTERN, 252 in DEFINE (boundary - should succeed) -- Expected: Success - unused DEFINE variables are filtered out +-- Test: 251 variables in PATTERN, 252 in DEFINE (boundary - should succeed) SELECT COUNT(*) OVER w FROM rpr_errors WINDOW w AS ( ORDER BY id @@ -5112,8 +5100,8 @@ WINDOW w AS ( 0 (2 rows) +-- Expected: Success - unused DEFINE variables are filtered out -- Test: 252 variables in PATTERN, 251 in DEFINE (exceeds limit with implicit TRUE) --- Expected: ERROR - too many pattern variables (Maximum is 251) SELECT COUNT(*) OVER w FROM rpr_errors WINDOW w AS ( ORDER BY id @@ -5149,8 +5137,8 @@ WINDOW w AS ( ); ERROR: too many pattern variables DETAIL: Maximum is 251. +-- Expected: ERROR - too many pattern variables (Maximum is 251) -- Test: Pattern nesting at maximum depth (depth 253) --- Expected: Should succeed -- Note: 253 nested GROUP{3,7} quantifiers produce depth 253 after optimization SELECT id, val, COUNT(*) OVER w FROM rpr_errors WINDOW w AS ( @@ -5165,8 +5153,8 @@ WINDOW w AS ( 2 | 20 | 0 (2 rows) +-- Expected: Should succeed -- Test: Pattern nesting depth exceeds maximum (depth 254) --- Expected: ERROR - pattern nesting too deep -- Note: 254 nested GROUP{3,7} quantifiers produce depth 254 after optimization SELECT id, val, COUNT(*) OVER w FROM rpr_errors WINDOW w AS ( @@ -5177,6 +5165,7 @@ WINDOW w AS ( ); ERROR: pattern nesting too deep DETAIL: Pattern nesting depth 254 exceeds maximum 253. +-- Expected: ERROR - pattern nesting too deep DROP TABLE rpr_errors; -- ============================================================ -- Jacob's Patterns @@ -5398,6 +5387,31 @@ WINDOW w AS ( 10 | 100 | 0 (10 rows) +-- Test: (A+ | (A | B)+)* - nested alternation inside quantified group +-- Previously caused infinite recursion in nfa_advance_alt when the inner +-- BEGIN(+)'s skip jump was followed as an ALT branch pointer. +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM (VALUES + (1, ARRAY['A', 'B']), + (2, ARRAY['B']), + (3, ARRAY['C']) +) AS t(id, flags) +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A+ | (A | B)+)*) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 2 + 2 | {B} | | + 3 | {C} | | +(3 rows) + -- ============================================================ -- Pathological Patterns -- ============================================================ diff --git a/src/test/regress/expected/rpr_explain.out b/src/test/regress/expected/rpr_explain.out index ea75d62718e..f23d06f6d59 100644 --- a/src/test/regress/expected/rpr_explain.out +++ b/src/test/regress/expected/rpr_explain.out @@ -134,10 +134,11 @@ WINDOW w AS ( Pattern: a b Storage: Memory Maximum Storage: NkB NFA States: 2 peak, 101 total, 0 merged - NFA Contexts: 3 peak, 101 total, 80 pruned + NFA Contexts: 2 peak, 101 total, 60 pruned NFA: 20 matched (len 2/2/2.0), 0 mismatched + NFA: 0 absorbed, 20 skipped (len 1/1/1.0) -> Seq Scan on nfa_test (actual rows=100.00 loops=1) -(8 rows) +(9 rows) DROP VIEW rpr_v; -- Pattern with no matches - 0 matched @@ -235,7 +236,7 @@ SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line (1 row) SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 20) AS s(v) WINDOW w AS ( @@ -243,13 +244,18 @@ WINDOW w AS ( PATTERN (A (B | C)) DEFINE A AS v % 3 = 1, B AS v % 3 = 2, C AS v % 3 = 0 );'); - rpr_explain_filter -------------------------------------------------------------------- - WindowAgg + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=20.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: a (b | c) - -> Function Scan on generate_series s -(4 rows) + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 28 total, 0 merged + NFA Contexts: 2 peak, 21 total, 6 pruned + NFA: 7 matched (len 2/2/2.0), 0 mismatched + NFA: 0 absorbed, 7 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=20.00 loops=1) +(9 rows) DROP VIEW rpr_v; -- Regression test: Sequential alternations at same depth @@ -270,7 +276,7 @@ SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line (1 row) SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 30) AS s(v) WINDOW w AS ( @@ -278,13 +284,17 @@ WINDOW w AS ( PATTERN (A ((B | C) (D | E))*) DEFINE A AS v % 5 = 1, B AS v % 5 = 2, C AS v % 5 = 3, D AS v % 5 = 4, E AS v % 5 = 0 );'); - rpr_explain_filter -------------------------------------------------------------------- - WindowAgg + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=30.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: a ((b | c) (d | e))* - -> Function Scan on generate_series s -(4 rows) + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 49 total, 0 merged + NFA Contexts: 3 peak, 31 total, 24 pruned + NFA: 6 matched (len 1/1/1.0), 0 mismatched + -> Function Scan on generate_series s (actual rows=30.00 loops=1) +(8 rows) DROP VIEW rpr_v; -- ============================================================ @@ -366,10 +376,11 @@ WINDOW w AS ( Pattern: (a | b | c) (d | e) Storage: Memory Maximum Storage: NkB NFA States: 5 peak, 363 total, 0 merged - NFA Contexts: 3 peak, 101 total, 40 pruned + NFA Contexts: 3 peak, 101 total, 20 pruned NFA: 20 matched (len 2/2/2.0), 40 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 20 skipped (len 1/1/1.0) -> Seq Scan on nfa_test (actual rows=100.00 loops=1) -(8 rows) +(9 rows) DROP VIEW rpr_v; -- Complex pattern with high state count @@ -411,10 +422,11 @@ WINDOW w AS ( Pattern: a+" b* c+ Storage: Memory Maximum Storage: NkB NFA States: 5 peak, 235 total, 0 merged - NFA Contexts: 3 peak, 101 total, 67 pruned + NFA Contexts: 3 peak, 101 total, 34 pruned NFA: 33 matched (len 3/3/3.0), 0 mismatched + NFA: 0 absorbed, 33 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=100.00 loops=1) -(8 rows) +(9 rows) DROP VIEW rpr_v; -- Grouped pattern with quantifier - state merging @@ -450,9 +462,9 @@ WINDOW w AS ( Pattern: (a' b')+" Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 91 total, 0 merged - NFA Contexts: 3 peak, 61 total, 30 pruned + NFA Contexts: 2 peak, 61 total, 0 pruned NFA: 1 matched (len 60/60/60.0), 0 mismatched - NFA: 29 absorbed (len 1/1/1.0), 0 skipped + NFA: 29 absorbed (len 1/1/1.0), 30 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=60.00 loops=1) (9 rows) @@ -490,8 +502,8 @@ WINDOW w AS ( Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: (a | b){8} Storage: Memory Maximum Storage: NkB - NFA States: 17 peak, 632 total, 0 merged - NFA Contexts: 9 peak, 101 total, 1 pruned + NFA States: 16 peak, 548 total, 0 merged + NFA Contexts: 8 peak, 101 total, 1 pruned NFA: 12 matched (len 8/8/8.0), 3 mismatched (len 2/4/3.0) NFA: 0 absorbed, 84 skipped (len 1/7/4.0) -> Function Scan on generate_series s (actual rows=100.00 loops=1) @@ -516,7 +528,7 @@ SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line (1 row) SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 40) AS s(v) WINDOW w AS ( @@ -525,13 +537,18 @@ WINDOW w AS ( PATTERN ((A | B) (A | B) (C | D)) DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 );'); - rpr_explain_filter -------------------------------------------------------------------- - WindowAgg + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: (a | b){2} (c | d) - -> Function Scan on generate_series s -(4 rows) + Storage: Memory Maximum Storage: NkB + NFA States: 6 peak, 111 total, 0 merged + NFA Contexts: 3 peak, 41 total, 12 pruned + NFA: 9 matched (len 3/3/3.0), 1 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 18 skipped (len 1/2/1.5) + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(9 rows) DROP VIEW rpr_v; -- Consecutive ALT merge followed by non-ALT element @@ -552,7 +569,7 @@ SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line (1 row) SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 40) AS s(v) WINDOW w AS ( @@ -561,13 +578,18 @@ WINDOW w AS ( PATTERN ((A | B) (A | B) C) DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 );'); - rpr_explain_filter -------------------------------------------------------------------- - WindowAgg + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: (a | b){2} c - -> Function Scan on generate_series s -(4 rows) + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 109 total, 0 merged + NFA Contexts: 3 peak, 41 total, 2 pruned + NFA: 12 matched (len 3/3/3.0), 2 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 24 skipped (len 1/2/1.5) + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(9 rows) DROP VIEW rpr_v; -- ALT prefix/suffix absorbed into GROUP: (A|B) (A|B)+ (A|B) -> (A|B){3,} @@ -587,7 +609,7 @@ SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line (1 row) SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 40) AS s(v) WINDOW w AS ( @@ -596,13 +618,18 @@ WINDOW w AS ( PATTERN ((A | B) (A | B)+ (A | B)) DEFINE A AS v % 2 = 0, B AS v % 2 = 1 );'); - rpr_explain_filter -------------------------------------------------------------------- - WindowAgg + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: (a | b){3,} - -> Function Scan on generate_series s -(4 rows) + Storage: Memory Maximum Storage: NkB + NFA States: 6 peak, 161 total, 0 merged + NFA Contexts: 3 peak, 41 total, 0 pruned + NFA: 1 matched (len 40/40/40.0), 0 mismatched + NFA: 0 absorbed, 39 skipped (len 1/2/1.0) + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(9 rows) DROP VIEW rpr_v; -- High state merging - alternation with plus quantifier @@ -638,9 +665,9 @@ WINDOW w AS ( Pattern: (a | b | c)+ d Storage: Memory Maximum Storage: NkB NFA States: 15 peak, 753 total, 0 merged - NFA Contexts: 5 peak, 101 total, 25 pruned + NFA Contexts: 4 peak, 101 total, 0 pruned NFA: 25 matched (len 4/4/4.0), 0 mismatched - NFA: 0 absorbed, 50 skipped (len 2/3/2.5) + NFA: 0 absorbed, 75 skipped (len 1/3/2.0) -> Function Scan on generate_series s (actual rows=100.00 loops=1) (9 rows) @@ -677,10 +704,10 @@ WINDOW w AS ( Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: (a | b)+ Storage: Memory Maximum Storage: NkB - NFA States: 8 peak, 4002 total, 0 merged - NFA Contexts: 4 peak, 1001 total, 333 pruned + NFA States: 5 peak, 3336 total, 0 merged + NFA Contexts: 3 peak, 1001 total, 333 pruned NFA: 334 matched (len 1/2/2.0), 0 mismatched - NFA: 0 absorbed, 333 skipped (len 2/2/2.0) + NFA: 0 absorbed, 333 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=1000.00 loops=1) (9 rows) @@ -721,9 +748,9 @@ WINDOW w AS ( Pattern: a+" b Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 91 total, 0 merged - NFA Contexts: 3 peak, 51 total, 10 pruned + NFA Contexts: 2 peak, 51 total, 0 pruned NFA: 10 matched (len 5/5/5.0), 0 mismatched - NFA: 30 absorbed (len 1/1/1.0), 0 skipped + NFA: 30 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=50.00 loops=1) (9 rows) @@ -761,9 +788,9 @@ WINDOW w AS ( Pattern: a{2,4} b Storage: Memory Maximum Storage: NkB NFA States: 7 peak, 101 total, 0 merged - NFA Contexts: 6 peak, 51 total, 10 pruned - NFA: 10 matched (len 5/5/5.0), 10 mismatched (len 2/2/2.0) - NFA: 0 absorbed, 20 skipped (len 3/4/3.5) + NFA Contexts: 5 peak, 51 total, 0 pruned + NFA: 10 matched (len 5/5/5.0), 0 mismatched + NFA: 0 absorbed, 40 skipped (len 1/4/2.5) -> Function Scan on generate_series s (actual rows=50.00 loops=1) (9 rows) @@ -801,10 +828,11 @@ WINDOW w AS ( Pattern: a b c Storage: Memory Maximum Storage: NkB NFA States: 2 peak, 101 total, 0 merged - NFA Contexts: 3 peak, 101 total, 90 pruned + NFA Contexts: 3 peak, 101 total, 80 pruned NFA: 10 matched (len 3/3/3.0), 0 mismatched + NFA: 0 absorbed, 10 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=100.00 loops=1) -(8 rows) +(9 rows) DROP VIEW rpr_v; -- High context absorption - unbounded group @@ -840,10 +868,11 @@ WINDOW w AS ( Pattern: (a' b')+" c Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 134 total, 0 merged - NFA Contexts: 3 peak, 101 total, 67 pruned + NFA Contexts: 3 peak, 101 total, 34 pruned NFA: 33 matched (len 3/3/3.0), 0 mismatched + NFA: 0 absorbed, 33 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=100.00 loops=1) -(8 rows) +(9 rows) DROP VIEW rpr_v; -- ============================================================ @@ -886,10 +915,11 @@ WINDOW w AS ( Pattern: a b c d e Storage: Memory Maximum Storage: NkB NFA States: 2 peak, 101 total, 0 merged - NFA Contexts: 3 peak, 101 total, 80 pruned + NFA Contexts: 3 peak, 101 total, 60 pruned NFA: 20 matched (len 5/5/5.0), 0 mismatched + NFA: 0 absorbed, 20 skipped (len 1/1/1.0) -> Seq Scan on nfa_test (actual rows=100.00 loops=1) -(8 rows) +(9 rows) DROP VIEW rpr_v; -- Variable length matches - min/max/avg differ @@ -925,9 +955,9 @@ WINDOW w AS ( Pattern: a+" b Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 191 total, 0 merged - NFA Contexts: 3 peak, 101 total, 10 pruned + NFA Contexts: 2 peak, 101 total, 0 pruned NFA: 10 matched (len 10/10/10.0), 0 mismatched - NFA: 80 absorbed (len 1/1/1.0), 0 skipped + NFA: 80 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=100.00 loops=1) (9 rows) @@ -965,9 +995,9 @@ WINDOW w AS ( Pattern: a+" b Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 396 total, 0 merged - NFA Contexts: 3 peak, 201 total, 5 pruned + NFA Contexts: 2 peak, 201 total, 4 pruned NFA: 1 matched (len 196/196/196.0), 0 mismatched - NFA: 194 absorbed (len 1/1/1.0), 0 skipped + NFA: 194 absorbed (len 1/1/1.0), 1 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=200.00 loops=1) (9 rows) @@ -1009,9 +1039,9 @@ WINDOW w AS ( Pattern: a+" b Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 171 total, 0 merged - NFA Contexts: 3 peak, 101 total, 30 pruned + NFA Contexts: 3 peak, 101 total, 25 pruned NFA: 5 matched (len 5/5/5.0), 5 mismatched (len 11/11/11.0) - NFA: 60 absorbed (len 1/1/1.0), 0 skipped + NFA: 60 absorbed (len 1/1/1.0), 5 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=100.00 loops=1) (9 rows) @@ -1067,9 +1097,9 @@ WINDOW w AS ( Pattern: a+" b+ c Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 151 total, 0 merged - NFA Contexts: 3 peak, 101 total, 70 pruned + NFA Contexts: 3 peak, 101 total, 60 pruned NFA: 10 matched (len 6/6/6.0), 0 mismatched - NFA: 20 absorbed (len 1/1/1.0), 0 skipped + NFA: 20 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=100.00 loops=1) (9 rows) @@ -1131,9 +1161,9 @@ WINDOW w AS ( Pattern: a+" b+ c Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 115 total, 0 merged - NFA Contexts: 3 peak, 61 total, 16 pruned + NFA Contexts: 3 peak, 61 total, 15 pruned NFA: 1 matched (len 30/30/30.0), 1 mismatched (len 26/26/26.0) - NFA: 42 absorbed (len 1/1/1.0), 0 skipped + NFA: 42 absorbed (len 1/1/1.0), 1 skipped (len 1/1/1.0) -> Function Scan on generate_series i (actual rows=60.00 loops=1) (9 rows) @@ -1188,13 +1218,16 @@ WINDOW w AS ( "NFA Contexts Peak": 3, + "NFA Contexts Total": 51, + "NFA Contexts Absorbed": 0, + - "NFA Contexts Skipped": 0, + - "NFA Contexts Pruned": 33, + + "NFA Contexts Skipped": 17, + + "NFA Contexts Pruned": 16, + "NFA Matched": 17, + "NFA Mismatched": 0, + "NFA Match Length Min": 2, + "NFA Match Length Max": 2, + "NFA Match Length Avg": 2.0, + + "NFA Skipped Length Min": 1, + + "NFA Skipped Length Max": 1, + + "NFA Skipped Length Avg": 1.0, + "Plans": [ + { + "Node Type": "Function Scan", + @@ -1260,11 +1293,11 @@ WINDOW w AS ( "NFA States Peak": 3, + "NFA States Total": 191, + "NFA States Merged": 0, + - "NFA Contexts Peak": 3, + + "NFA Contexts Peak": 2, + "NFA Contexts Total": 101, + "NFA Contexts Absorbed": 80, + - "NFA Contexts Skipped": 0, + - "NFA Contexts Pruned": 10, + + "NFA Contexts Skipped": 10, + + "NFA Contexts Pruned": 0, + "NFA Matched": 10, + "NFA Mismatched": 0, + "NFA Match Length Min": 10, + @@ -1273,6 +1306,9 @@ WINDOW w AS ( "NFA Absorbed Length Min": 1, + "NFA Absorbed Length Max": 1, + "NFA Absorbed Length Avg": 1.0, + + "NFA Skipped Length Min": 1, + + "NFA Skipped Length Max": 1, + + "NFA Skipped Length Avg": 1.0, + "Plans": [ + { + "Node Type": "Function Scan", + @@ -1341,16 +1377,19 @@ WINDOW w AS ( 2 + 31 + 0 + - 3 + + 2 + 31 + 0 + - 0 + - 15 + + 15 + + 0 + 15 + 0 + 2 + 2 + 2.0 + + 1 + + 1 + + 1.0 + + + Function Scan + @@ -1420,8 +1459,8 @@ WINDOW w AS ( "NFA Contexts Peak": 3, + "NFA Contexts Total": 10, + "NFA Contexts Absorbed": 0, + - "NFA Contexts Skipped": 0, + - "NFA Contexts Pruned": 6, + + "NFA Contexts Skipped": 1, + + "NFA Contexts Pruned": 5, + "NFA Matched": 1, + "NFA Mismatched": 2, + "NFA Match Length Min": 3, + @@ -1430,6 +1469,9 @@ WINDOW w AS ( "NFA Mismatch Length Min": 3, + "NFA Mismatch Length Max": 3, + "NFA Mismatch Length Avg": 3.0, + + "NFA Skipped Length Min": 1, + + "NFA Skipped Length Max": 1, + + "NFA Skipped Length Avg": 1.0, + "Plans": [ + { + "Node Type": "Values Scan", + @@ -1492,10 +1534,10 @@ WINDOW w AS ( "Pattern": "(a | b){8}", + "Storage": "Memory", + "Maximum Storage": 0, + - "NFA States Peak": 17, + - "NFA States Total": 632, + + "NFA States Peak": 16, + + "NFA States Total": 548, + "NFA States Merged": 0, + - "NFA Contexts Peak": 9, + + "NFA Contexts Peak": 8, + "NFA Contexts Total": 101, + "NFA Contexts Absorbed": 0, + "NFA Contexts Skipped": 84, + @@ -1578,9 +1620,9 @@ WINDOW w AS ( Pattern: a+" b Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 165 total, 0 merged - NFA Contexts: 3 peak, 93 total, 18 pruned + NFA Contexts: 2 peak, 93 total, 0 pruned NFA: 18 matched (len 5/5/5.0), 0 mismatched - NFA: 54 absorbed (len 1/1/1.0), 0 skipped + NFA: 54 absorbed (len 1/1/1.0), 18 skipped (len 1/1/1.0) -> Sort (actual rows=90.00 loops=1) Sort Key: p.p Sort Method: quicksort Memory: 27kB @@ -1635,9 +1677,9 @@ WINDOW w AS ( Pattern: a+" b Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 77 total, 0 merged - NFA Contexts: 3 peak, 52 total, 26 pruned + NFA Contexts: 2 peak, 52 total, 21 pruned NFA: 5 matched (len 5/6/5.8), 0 mismatched - NFA: 19 absorbed (len 1/1/1.0), 0 skipped + NFA: 19 absorbed (len 1/1/1.0), 5 skipped (len 1/1/1.0) -> Sort (actual rows=50.00 loops=1) Sort Key: (CASE WHEN (v.v <= 25) THEN 1 ELSE 2 END) Sort Method: quicksort Memory: 26kB @@ -1841,9 +1883,9 @@ WINDOW w AS ( Pattern: (a' b' c')+" Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 81 total, 0 merged - NFA Contexts: 3 peak, 61 total, 40 pruned + NFA Contexts: 3 peak, 61 total, 20 pruned NFA: 1 matched (len 60/60/60.0), 0 mismatched - NFA: 19 absorbed (len 1/1/1.0), 0 skipped + NFA: 19 absorbed (len 1/1/1.0), 20 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=60.00 loops=1) (9 rows) @@ -1885,10 +1927,11 @@ WINDOW w AS ( Pattern: (a | b) (c | d | e) Storage: Memory Maximum Storage: NkB NFA States: 5 peak, 282 total, 0 merged - NFA Contexts: 3 peak, 101 total, 60 pruned + NFA Contexts: 3 peak, 101 total, 40 pruned NFA: 20 matched (len 2/2/2.0), 20 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 20 skipped (len 1/1/1.0) -> Seq Scan on nfa_test (actual rows=100.00 loops=1) -(8 rows) +(9 rows) DROP VIEW rpr_v; -- Optional elements @@ -1924,10 +1967,11 @@ WINDOW w AS ( Pattern: a b? c Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 64 total, 0 merged - NFA Contexts: 3 peak, 51 total, 37 pruned + NFA Contexts: 3 peak, 51 total, 25 pruned NFA: 12 matched (len 3/3/3.0), 1 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 12 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=50.00 loops=1) -(8 rows) +(9 rows) DROP VIEW rpr_v; -- Bounded quantifiers @@ -1963,9 +2007,9 @@ WINDOW w AS ( Pattern: a{2,5} b Storage: Memory Maximum Storage: NkB NFA States: 9 peak, 311 total, 0 merged - NFA Contexts: 7 peak, 101 total, 10 pruned - NFA: 10 matched (len 6/6/6.0), 50 mismatched (len 2/6/5.2) - NFA: 0 absorbed, 30 skipped (len 3/5/4.0) + NFA Contexts: 7 peak, 101 total, 0 pruned + NFA: 10 matched (len 6/6/6.0), 40 mismatched (len 6/6/6.0) + NFA: 0 absorbed, 50 skipped (len 1/5/3.0) -> Function Scan on generate_series s (actual rows=100.00 loops=1) (9 rows) @@ -2003,10 +2047,11 @@ WINDOW w AS ( Pattern: a b* c Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 91 total, 0 merged - NFA Contexts: 3 peak, 51 total, 45 pruned + NFA Contexts: 3 peak, 51 total, 40 pruned NFA: 5 matched (len 9/9/9.0), 0 mismatched + NFA: 0 absorbed, 5 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=50.00 loops=1) -(8 rows) +(9 rows) DROP VIEW rpr_v; -- ============================================================ @@ -2045,9 +2090,9 @@ WINDOW w AS ( Pattern: d+" u+ Storage: Memory Maximum Storage: NkB NFA States: 4 peak, 58 total, 0 merged - NFA Contexts: 3 peak, 31 total, 17 pruned + NFA Contexts: 3 peak, 31 total, 3 pruned NFA: 3 matched (len 3/14/8.0), 1 mismatched (len 3/3/3.0) - NFA: 9 absorbed (len 1/1/1.0), 0 skipped + NFA: 9 absorbed (len 1/1/1.0), 14 skipped (len 1/1/1.0) -> Seq Scan on nfa_complex (actual rows=30.00 loops=1) (9 rows) @@ -2085,9 +2130,9 @@ WINDOW w AS ( Pattern: u+" s* d+ Storage: Memory Maximum Storage: NkB NFA States: 5 peak, 76 total, 0 merged - NFA Contexts: 3 peak, 31 total, 14 pruned + NFA Contexts: 3 peak, 31 total, 1 pruned NFA: 4 matched (len 3/11/7.2), 0 mismatched - NFA: 12 absorbed (len 1/1/1.0), 0 skipped + NFA: 12 absorbed (len 1/1/1.0), 13 skipped (len 1/1/1.0) -> Seq Scan on nfa_complex (actual rows=30.00 loops=1) (9 rows) @@ -2168,10 +2213,11 @@ WINDOW w AS ( Pattern: a b Storage: Memory Maximum Storage: NkB NFA States: 2 peak, 1001 total, 0 merged - NFA Contexts: 3 peak, 1001 total, 500 pruned + NFA Contexts: 2 peak, 1001 total, 0 pruned NFA: 500 matched (len 2/2/2.0), 0 mismatched + NFA: 0 absorbed, 500 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=1000.00 loops=1) -(8 rows) +(9 rows) DROP VIEW rpr_v; -- Large dataset with absorption @@ -2207,9 +2253,9 @@ WINDOW w AS ( Pattern: a+" b Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 1991 total, 0 merged - NFA Contexts: 3 peak, 1001 total, 10 pruned + NFA Contexts: 2 peak, 1001 total, 0 pruned NFA: 10 matched (len 100/100/100.0), 0 mismatched - NFA: 980 absorbed (len 1/1/1.0), 0 skipped + NFA: 980 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=1000.00 loops=1) (9 rows) @@ -2247,9 +2293,9 @@ WINDOW w AS ( Pattern: (a | b)+ c Storage: Memory Maximum Storage: NkB NFA States: 8 peak, 2004 total, 0 merged - NFA Contexts: 4 peak, 501 total, 167 pruned + NFA Contexts: 3 peak, 501 total, 1 pruned NFA: 166 matched (len 3/3/3.0), 1 mismatched (len 2/2/2.0) - NFA: 0 absorbed, 166 skipped (len 2/2/2.0) + NFA: 0 absorbed, 332 skipped (len 1/2/1.5) -> Function Scan on generate_series s (actual rows=500.00 loops=1) (9 rows) @@ -2292,9 +2338,9 @@ WINDOW w AS ( Pattern: a+" b Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 91 total, 0 merged - NFA Contexts: 3 peak, 51 total, 10 pruned + NFA Contexts: 2 peak, 51 total, 0 pruned NFA: 10 matched (len 5/5/5.0), 0 mismatched - NFA: 30 absorbed (len 1/1/1.0), 0 skipped + NFA: 30 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=50.00 loops=1) (9 rows) @@ -2332,9 +2378,9 @@ WINDOW w AS ( Pattern: a+" b Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 91 total, 0 merged - NFA Contexts: 3 peak, 51 total, 10 pruned + NFA Contexts: 2 peak, 51 total, 0 pruned NFA: 10 matched (len 5/5/5.0), 0 mismatched - NFA: 30 absorbed (len 1/1/1.0), 0 skipped + NFA: 30 absorbed (len 1/1/1.0), 10 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=50.00 loops=1) (9 rows) @@ -2455,9 +2501,9 @@ WINDOW w AS ( Pattern: a? b c Storage: Memory Maximum Storage: NkB NFA States: 4 peak, 82 total, 0 merged - NFA Contexts: 4 peak, 41 total, 20 pruned + NFA Contexts: 3 peak, 41 total, 10 pruned NFA: 10 matched (len 3/3/3.0), 0 mismatched - NFA: 0 absorbed, 10 skipped (len 2/2/2.0) + NFA: 0 absorbed, 20 skipped (len 1/2/1.5) -> Function Scan on generate_series s (actual rows=40.00 loops=1) (9 rows) @@ -2495,10 +2541,11 @@ WINDOW w AS ( Pattern: a{3} b Storage: Memory Maximum Storage: NkB NFA States: 4 peak, 51 total, 0 merged - NFA Contexts: 5 peak, 51 total, 10 pruned - NFA: 10 matched (len 4/4/4.0), 30 mismatched (len 2/4/3.0) + NFA Contexts: 5 peak, 51 total, 0 pruned + NFA: 10 matched (len 4/4/4.0), 10 mismatched (len 4/4/4.0) + NFA: 0 absorbed, 30 skipped (len 1/3/2.0) -> Function Scan on generate_series s (actual rows=50.00 loops=1) -(8 rows) +(9 rows) DROP VIEW rpr_v; -- Range {n,m} @@ -2534,9 +2581,9 @@ WINDOW w AS ( Pattern: a{2,4} b Storage: Memory Maximum Storage: NkB NFA States: 7 peak, 101 total, 0 merged - NFA Contexts: 6 peak, 51 total, 10 pruned - NFA: 10 matched (len 5/5/5.0), 10 mismatched (len 2/2/2.0) - NFA: 0 absorbed, 20 skipped (len 3/4/3.5) + NFA Contexts: 5 peak, 51 total, 0 pruned + NFA: 10 matched (len 5/5/5.0), 0 mismatched + NFA: 0 absorbed, 40 skipped (len 1/4/2.5) -> Function Scan on generate_series s (actual rows=50.00 loops=1) (9 rows) @@ -2574,9 +2621,9 @@ WINDOW w AS ( Pattern: a{3,}" b Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 86 total, 0 merged - NFA Contexts: 3 peak, 51 total, 5 pruned + NFA Contexts: 2 peak, 51 total, 0 pruned NFA: 5 matched (len 10/10/10.0), 0 mismatched - NFA: 40 absorbed (len 1/1/1.0), 0 skipped + NFA: 40 absorbed (len 1/1/1.0), 5 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=50.00 loops=1) (9 rows) @@ -2618,9 +2665,9 @@ WINDOW w AS ( Pattern: a+" b Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 37 total, 0 merged - NFA Contexts: 3 peak, 21 total, 4 pruned + NFA Contexts: 2 peak, 21 total, 0 pruned NFA: 4 matched (len 5/5/5.0), 0 mismatched - NFA: 12 absorbed (len 1/1/1.0), 0 skipped + NFA: 12 absorbed (len 1/1/1.0), 4 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=20.00 loops=1) (9 rows) @@ -2658,9 +2705,9 @@ WINDOW w AS ( Pattern: a+" b c Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 52 total, 0 merged - NFA Contexts: 3 peak, 31 total, 9 pruned + NFA Contexts: 3 peak, 31 total, 6 pruned NFA: 3 matched (len 9/9/9.0), 0 mismatched - NFA: 18 absorbed (len 1/1/1.0), 0 skipped + NFA: 18 absorbed (len 1/1/1.0), 3 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=30.00 loops=1) (9 rows) @@ -2698,10 +2745,11 @@ WINDOW w AS ( Pattern: a b c Storage: Memory Maximum Storage: NkB NFA States: 2 peak, 31 total, 0 merged - NFA Contexts: 3 peak, 31 total, 20 pruned + NFA Contexts: 3 peak, 31 total, 10 pruned NFA: 10 matched (len 3/3/3.0), 0 mismatched + NFA: 0 absorbed, 10 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=30.00 loops=1) -(8 rows) +(9 rows) DROP VIEW rpr_v; -- ============================================================ @@ -2740,10 +2788,11 @@ WINDOW w AS ( Pattern: (a | b) c Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 202 total, 0 merged - NFA Contexts: 3 peak, 101 total, 60 pruned + NFA Contexts: 3 peak, 101 total, 40 pruned NFA: 20 matched (len 2/2/2.0), 20 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 20 skipped (len 1/1/1.0) -> Seq Scan on nfa_test (actual rows=100.00 loops=1) -(8 rows) +(9 rows) DROP VIEW rpr_v; -- Multiple items in alternation @@ -2783,10 +2832,11 @@ WINDOW w AS ( Pattern: (a | b | c | d) e Storage: Memory Maximum Storage: NkB NFA States: 5 peak, 404 total, 0 merged - NFA Contexts: 3 peak, 101 total, 20 pruned + NFA Contexts: 3 peak, 101 total, 0 pruned NFA: 20 matched (len 2/2/2.0), 60 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 20 skipped (len 1/1/1.0) -> Seq Scan on nfa_test (actual rows=100.00 loops=1) -(8 rows) +(9 rows) DROP VIEW rpr_v; -- Alternation with quantifiers @@ -2822,9 +2872,9 @@ WINDOW w AS ( Pattern: (a | b)+ c Storage: Memory Maximum Storage: NkB NFA States: 8 peak, 204 total, 0 merged - NFA Contexts: 4 peak, 51 total, 17 pruned + NFA Contexts: 3 peak, 51 total, 1 pruned NFA: 16 matched (len 3/3/3.0), 1 mismatched (len 2/2/2.0) - NFA: 0 absorbed, 16 skipped (len 2/2/2.0) + NFA: 0 absorbed, 32 skipped (len 1/2/1.5) -> Function Scan on generate_series s (actual rows=50.00 loops=1) (9 rows) @@ -2845,7 +2895,7 @@ SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line (1 row) SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 100) AS s(v) WINDOW w AS ( @@ -2853,13 +2903,17 @@ WINDOW w AS ( PATTERN (A | B | C | D | E) DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 );'); - rpr_explain_filter -------------------------------------------------------------------- - WindowAgg + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: (a | b | c | d | e) - -> Function Scan on generate_series s -(4 rows) + Storage: Memory Maximum Storage: NkB + NFA States: 6 peak, 505 total, 0 merged + NFA Contexts: 2 peak, 101 total, 0 pruned + NFA: 100 matched (len 1/1/1.0), 0 mismatched + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(8 rows) DROP VIEW rpr_v; -- Alternation at start @@ -2878,7 +2932,7 @@ SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line (1 row) SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 60) AS s(v) WINDOW w AS ( @@ -2886,13 +2940,18 @@ WINDOW w AS ( PATTERN ((A | B) C D) DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 );'); - rpr_explain_filter -------------------------------------------------------------------- - WindowAgg + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: (a | b) c d - -> Function Scan on generate_series s -(4 rows) + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 122 total, 0 merged + NFA Contexts: 3 peak, 61 total, 16 pruned + NFA: 15 matched (len 3/3/3.0), 14 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 15 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(9 rows) DROP VIEW rpr_v; -- Multiple sequential alternations @@ -2911,7 +2970,7 @@ SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line (1 row) SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 100) AS s(v) WINDOW w AS ( @@ -2919,13 +2978,17 @@ WINDOW w AS ( PATTERN ((A | B) C (D | E) F) DEFINE A AS v % 6 = 0, B AS v % 6 = 1, C AS v % 6 = 2, D AS v % 6 = 3, E AS v % 6 = 4, F AS v % 6 = 5 );'); - rpr_explain_filter -------------------------------------------------------------------- - WindowAgg + rpr_explain_filter +----------------------------------------------------------------------- + WindowAgg (actual rows=100.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: (a | b) c (d | e) f - -> Function Scan on generate_series s -(4 rows) + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 219 total, 0 merged + NFA Contexts: 3 peak, 101 total, 67 pruned + NFA: 0 matched, 33 mismatched (len 2/4/3.0) + -> Function Scan on generate_series s (actual rows=100.00 loops=1) +(8 rows) DROP VIEW rpr_v; -- Quantified alternatives @@ -2944,7 +3007,7 @@ SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line (1 row) SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 60) AS s(v) WINDOW w AS ( @@ -2952,13 +3015,18 @@ WINDOW w AS ( PATTERN ((A+ | B+) C) DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 );'); - rpr_explain_filter -------------------------------------------------------------------- - WindowAgg + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: (a+" | b+") c - -> Function Scan on generate_series s -(4 rows) + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 162 total, 0 merged + NFA Contexts: 3 peak, 61 total, 1 pruned + NFA: 20 matched (len 2/2/2.0), 19 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 20 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(9 rows) DROP VIEW rpr_v; -- Alternation at end @@ -2977,7 +3045,7 @@ SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line (1 row) SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 60) AS s(v) WINDOW w AS ( @@ -2985,13 +3053,18 @@ WINDOW w AS ( PATTERN (A B (C | D)) DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 );'); - rpr_explain_filter -------------------------------------------------------------------- - WindowAgg + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: a b (c | d) - -> Function Scan on generate_series s -(4 rows) + Storage: Memory Maximum Storage: NkB + NFA States: 3 peak, 75 total, 0 merged + NFA Contexts: 3 peak, 61 total, 32 pruned + NFA: 14 matched (len 3/3/3.0), 0 mismatched + NFA: 0 absorbed, 14 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(9 rows) DROP VIEW rpr_v; -- Nested ALT at start of branch inside outer ALT @@ -3011,7 +3084,7 @@ SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line (1 row) SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 20) AS s(v) WINDOW w AS ( @@ -3019,13 +3092,17 @@ WINDOW w AS ( PATTERN (A ((B | C) D | E)) DEFINE A AS v % 5 = 0, B AS v % 5 = 1, C AS v % 5 = 2, D AS v % 5 = 3, E AS v % 5 = 4 );'); - rpr_explain_filter -------------------------------------------------------------------- - WindowAgg + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=20.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: a ((b | c) d | e) - -> Function Scan on generate_series s -(4 rows) + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 29 total, 0 merged + NFA Contexts: 3 peak, 21 total, 17 pruned + NFA: 0 matched, 3 mismatched (len 3/3/3.0) + -> Function Scan on generate_series s (actual rows=20.00 loops=1) +(8 rows) DROP VIEW rpr_v; -- Nested ALT at end of branch inside outer ALT @@ -3045,7 +3122,7 @@ SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line (1 row) SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 20) AS s(v) WINDOW w AS ( @@ -3053,13 +3130,17 @@ WINDOW w AS ( PATTERN (C (A | B) | D) DEFINE A AS v % 4 = 0, B AS v % 4 = 1, C AS v % 4 = 2, D AS v % 4 = 3 );'); - rpr_explain_filter -------------------------------------------------------------------- - WindowAgg + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=20.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: (c (a | b) | d) - -> Function Scan on generate_series s -(4 rows) + Storage: Memory Maximum Storage: NkB + NFA States: 4 peak, 47 total, 0 merged + NFA Contexts: 3 peak, 21 total, 10 pruned + NFA: 5 matched (len 1/1/1.0), 5 mismatched (len 2/2/2.0) + -> Function Scan on generate_series s (actual rows=20.00 loops=1) +(8 rows) DROP VIEW rpr_v; -- ============================================================ @@ -3098,9 +3179,9 @@ WINDOW w AS ( Pattern: (a' b')+" Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 61 total, 0 merged - NFA Contexts: 3 peak, 41 total, 20 pruned + NFA Contexts: 2 peak, 41 total, 0 pruned NFA: 1 matched (len 40/40/40.0), 0 mismatched - NFA: 19 absorbed (len 1/1/1.0), 0 skipped + NFA: 19 absorbed (len 1/1/1.0), 20 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=40.00 loops=1) (9 rows) @@ -3137,10 +3218,10 @@ WINDOW w AS ( Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: (a b){2,4} Storage: Memory Maximum Storage: NkB - NFA States: 7 peak, 66 total, 0 merged - NFA Contexts: 6 peak, 41 total, 20 pruned + NFA States: 4 peak, 51 total, 0 merged + NFA Contexts: 3 peak, 41 total, 5 pruned NFA: 5 matched (len 8/8/8.0), 0 mismatched - NFA: 0 absorbed, 15 skipped (len 2/6/4.0) + NFA: 0 absorbed, 30 skipped (len 1/2/1.5) -> Function Scan on generate_series s (actual rows=40.00 loops=1) (9 rows) @@ -3177,10 +3258,10 @@ WINDOW w AS ( Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: ((a b){2})+ Storage: Memory Maximum Storage: NkB - NFA States: 60 peak, 286 total, 0 merged - NFA Contexts: 32 peak, 61 total, 30 pruned - NFA: 1 matched (len 60/60/60.0), 1 mismatched (len 2/2/2.0) - NFA: 0 absorbed, 28 skipped (len 4/58/31.0) + NFA States: 5 peak, 76 total, 0 merged + NFA Contexts: 4 peak, 61 total, 15 pruned + NFA: 1 matched (len 60/60/60.0), 0 mismatched + NFA: 0 absorbed, 44 skipped (len 1/4/2.3) -> Function Scan on generate_series s (actual rows=60.00 loops=1) (9 rows) @@ -3201,7 +3282,7 @@ SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line (1 row) SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 40) AS s(v) WINDOW w AS ( @@ -3209,13 +3290,18 @@ WINDOW w AS ( PATTERN ((((A | B)+)+)+) DEFINE A AS v % 2 = 0, B AS v % 2 = 1 );'); - rpr_explain_filter -------------------------------------------------------------------- - WindowAgg + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=40.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: (a | b)+ - -> Function Scan on generate_series s -(4 rows) + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 162 total, 0 merged + NFA Contexts: 2 peak, 41 total, 0 pruned + NFA: 1 matched (len 40/40/40.0), 0 mismatched + NFA: 0 absorbed, 39 skipped (len 1/1/1.0) + -> Function Scan on generate_series s (actual rows=40.00 loops=1) +(9 rows) DROP VIEW rpr_v; -- Bounded quantifier on alternation @@ -3234,7 +3320,7 @@ SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line (1 row) SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 60) AS s(v) WINDOW w AS ( @@ -3242,13 +3328,18 @@ WINDOW w AS ( PATTERN ((A | B){2,3} C) DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 );'); - rpr_explain_filter -------------------------------------------------------------------- - WindowAgg + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: (a | b){2,3} c - -> Function Scan on generate_series s -(4 rows) + Storage: Memory Maximum Storage: NkB + NFA States: 7 peak, 200 total, 0 merged + NFA Contexts: 3 peak, 61 total, 2 pruned + NFA: 19 matched (len 3/3/3.0), 1 mismatched (len 2/2/2.0) + NFA: 0 absorbed, 38 skipped (len 1/2/1.5) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(9 rows) DROP VIEW rpr_v; -- Nested groups with quantifiers @@ -3267,7 +3358,7 @@ SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line (1 row) SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 60) AS s(v) WINDOW w AS ( @@ -3275,13 +3366,18 @@ WINDOW w AS ( PATTERN (((A B)+ C)*) DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 );'); - rpr_explain_filter -------------------------------------------------------------------- - WindowAgg + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: ((a' b')+" c)* - -> Function Scan on generate_series s -(4 rows) + Storage: Memory Maximum Storage: NkB + NFA States: 7 peak, 178 total, 0 merged + NFA Contexts: 4 peak, 61 total, 22 pruned + NFA: 1 matched (len 57/57/57.0), 0 mismatched + NFA: 0 absorbed, 37 skipped (len 1/3/2.0) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(9 rows) DROP VIEW rpr_v; -- Partial nested quantification @@ -3300,7 +3396,7 @@ SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line (1 row) SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 60) AS s(v) WINDOW w AS ( @@ -3308,13 +3404,18 @@ WINDOW w AS ( PATTERN ((A (B C)+)*) DEFINE A AS v % 3 = 0, B AS v % 3 = 1, C AS v % 3 = 2 );'); - rpr_explain_filter -------------------------------------------------------------------- - WindowAgg + rpr_explain_filter +---------------------------------------------------------------------- + WindowAgg (actual rows=60.00 loops=1) Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: (a (b c)+)* - -> Function Scan on generate_series s -(4 rows) + Storage: Memory Maximum Storage: NkB + NFA States: 5 peak, 160 total, 0 merged + NFA Contexts: 4 peak, 61 total, 22 pruned + NFA: 1 matched (len 57/57/57.0), 0 mismatched + NFA: 0 absorbed, 37 skipped (len 1/3/2.0) + -> Function Scan on generate_series s (actual rows=60.00 loops=1) +(9 rows) DROP VIEW rpr_v; -- ============================================================ @@ -3353,9 +3454,9 @@ WINDOW w AS ( Pattern: a+" b Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 55 total, 0 merged - NFA Contexts: 3 peak, 31 total, 6 pruned + NFA Contexts: 2 peak, 31 total, 0 pruned NFA: 6 matched (len 5/5/5.0), 0 mismatched - NFA: 18 absorbed (len 1/1/1.0), 0 skipped + NFA: 18 absorbed (len 1/1/1.0), 6 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=30.00 loops=1) (9 rows) @@ -3393,9 +3494,9 @@ WINDOW w AS ( Pattern: a+" b Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 55 total, 0 merged - NFA Contexts: 3 peak, 31 total, 6 pruned + NFA Contexts: 2 peak, 31 total, 0 pruned NFA: 6 matched (len 5/5/5.0), 0 mismatched - NFA: 18 absorbed (len 1/1/1.0), 0 skipped + NFA: 18 absorbed (len 1/1/1.0), 6 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=30.00 loops=1) (9 rows) @@ -3433,9 +3534,9 @@ WINDOW w AS ( Pattern: a+" b Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 55 total, 0 merged - NFA Contexts: 3 peak, 31 total, 6 pruned + NFA Contexts: 2 peak, 31 total, 0 pruned NFA: 6 matched (len 5/5/5.0), 0 mismatched - NFA: 18 absorbed (len 1/1/1.0), 0 skipped + NFA: 18 absorbed (len 1/1/1.0), 6 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=30.00 loops=1) (9 rows) @@ -3479,9 +3580,9 @@ WINDOW w AS ( Pattern: a+" b Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 55 total, 0 merged - NFA Contexts: 3 peak, 31 total, 6 pruned + NFA Contexts: 2 peak, 31 total, 0 pruned NFA: 6 matched (len 5/5/5.0), 0 mismatched - NFA: 18 absorbed (len 1/1/1.0), 0 skipped + NFA: 18 absorbed (len 1/1/1.0), 6 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=30.00 loops=1) (9 rows) @@ -3526,9 +3627,9 @@ WINDOW w AS ( Pattern: a+" b Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 78 total, 0 merged - NFA Contexts: 3 peak, 51 total, 23 pruned + NFA Contexts: 2 peak, 51 total, 6 pruned NFA: 17 matched (len 2/3/2.6), 0 mismatched - NFA: 10 absorbed (len 1/1/1.0), 0 skipped + NFA: 10 absorbed (len 1/1/1.0), 17 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=50.00 loops=1) (9 rows) @@ -3617,9 +3718,9 @@ WINDOW w AS ( Pattern: a+" b Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 55 total, 0 merged - NFA Contexts: 3 peak, 31 total, 6 pruned + NFA Contexts: 2 peak, 31 total, 0 pruned NFA: 6 matched (len 5/5/5.0), 0 mismatched - NFA: 18 absorbed (len 1/1/1.0), 0 skipped + NFA: 18 absorbed (len 1/1/1.0), 6 skipped (len 1/1/1.0) -> Function Scan on generate_series v (actual rows=30.00 loops=1) (9 rows) @@ -3660,9 +3761,9 @@ WINDOW w AS ( Pattern: a+" b c Storage: Memory Maximum Storage: NkB NFA States: 3 peak, 851 total, 0 merged - NFA Contexts: 3 peak, 501 total, 151 pruned + NFA Contexts: 3 peak, 501 total, 101 pruned NFA: 50 matched (len 8/9/9.0), 0 mismatched - NFA: 299 absorbed (len 1/1/1.0), 0 skipped + NFA: 299 absorbed (len 1/1/1.0), 50 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=500.00 loops=1) (9 rows) @@ -3700,10 +3801,11 @@ WINDOW w AS ( Pattern: a b Storage: Memory Maximum Storage: NkB NFA States: 2 peak, 501 total, 0 merged - NFA Contexts: 3 peak, 501 total, 250 pruned + NFA Contexts: 2 peak, 501 total, 0 pruned NFA: 250 matched (len 2/2/2.0), 0 mismatched + NFA: 0 absorbed, 250 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=500.00 loops=1) -(8 rows) +(9 rows) DROP VIEW rpr_v; -- High skip count scenario @@ -3749,10 +3851,11 @@ WINDOW w AS ( Pattern: a b c d e Storage: Memory Maximum Storage: NkB NFA States: 2 peak, 501 total, 0 merged - NFA Contexts: 3 peak, 501 total, 495 pruned + NFA Contexts: 3 peak, 501 total, 490 pruned NFA: 5 matched (len 5/5/5.0), 0 mismatched + NFA: 0 absorbed, 5 skipped (len 1/1/1.0) -> Function Scan on generate_series s (actual rows=500.00 loops=1) -(8 rows) +(9 rows) DROP VIEW rpr_v; -- Cleanup diff --git a/src/test/regress/expected/rpr_nfa.out b/src/test/regress/expected/rpr_nfa.out new file mode 100644 index 00000000000..46a463c2597 --- /dev/null +++ b/src/test/regress/expected/rpr_nfa.out @@ -0,0 +1,2524 @@ +-- ============================================================ +-- RPR NFA Tests +-- Tests for Row Pattern Recognition NFA Runtime Execution +-- ============================================================ +-- +-- This test suite validates the NFA (Non-deterministic Finite +-- Automaton) runtime execution engine in nodeWindowAgg.c, +-- focusing on update_reduced_frame and related functions. +-- +-- Test Strategy: +-- Diagonal pattern style using ARRAY flags to explicitly +-- control which pattern variables match at each row. +-- +-- Test Coverage: +-- Basic NFA Flow (match->absorb->advance) +-- Absorption Optimization +-- Context Lifecycle Management +-- Advance Phase (Epsilon Transitions) +-- Match Phase (Variable Matching) +-- Frame Boundary Handling +-- State Management (Deduplication) +-- Statistics and Diagnostics +-- Quantifier Runtime Behavior +-- Pathological Pattern Protection +-- Alternation Runtime Behavior +-- Deep Nested Groups +-- SKIP Options (Runtime) +-- INITIAL Mode (Runtime) +-- Frame Boundary Variations +-- Special Partition Cases +-- DEFINE Special Cases +-- Absorption Dynamic Flags +-- FIXME Issues (Known Limitations) +-- +-- Responsibility: +-- - NFA runtime execution paths +-- - Context/State lifecycle management +-- - Runtime boundary conditions and protections +-- +-- NOT tested here (covered in other files): +-- - Pattern parsing/optimization (rpr_base.sql) +-- - EXPLAIN output (rpr_explain.sql) +-- - PREV/NEXT semantics (rpr.sql) +-- ============================================================ +-- ============================================================ +-- Basic NFA Flow +-- ============================================================ +-- Simple sequential pattern +WITH test_sequential AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['_']) -- No match + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_sequential +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {B} | | + 3 | {C} | | + 4 | {D} | | + 5 | {_} | | +(5 rows) + +-- Quantified pattern (A+ B+ C+) +WITH test_quantified AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['B']), + (6, ARRAY['C']), + (7, ARRAY['C']), + (8, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_quantified +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B+ C+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 7 + 2 | {A} | 2 | 7 + 3 | {A} | 3 | 7 + 4 | {B} | | + 5 | {B} | | + 6 | {C} | | + 7 | {C} | | + 8 | {_} | | +(8 rows) + +-- Optional pattern (A B? C) +WITH test_optional AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['C']), -- B skipped + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['C']), -- B matched + (6, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_optional +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B? C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {C} | | + 3 | {A} | 3 | 5 + 4 | {B} | | + 5 | {C} | | + 6 | {_} | | +(6 rows) + +-- Alternation pattern (A (B|C) D) +WITH test_alternation AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), -- First branch + (3, ARRAY['D']), + (4, ARRAY['A']), + (5, ARRAY['C']), -- Second branch + (6, ARRAY['D']), + (7, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alternation +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A (B | C) D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {B} | | + 3 | {D} | | + 4 | {A} | 4 | 6 + 5 | {C} | | + 6 | {D} | | + 7 | {_} | | +(7 rows) + +-- ============================================================ +-- Absorption Optimization +-- ============================================================ +-- Absorbable pattern (A+) +WITH test_absorbable AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_absorbable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {A} | 4 | 4 + 5 | {_} | | +(5 rows) + +-- Mixed absorbable/non-absorbable ((A+) | B) +WITH test_mixed_absorption AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_mixed_absorption +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A+) | B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | 2 | 3 + 3 | {A} | 3 | 3 + 4 | {B} | 4 | 4 + 5 | {_} | | +(5 rows) + +-- State coverage (same elemIdx, different count) +WITH test_state_coverage AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_state_coverage +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{2,} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | | + 4 | {B} | | + 5 | {_} | | +(5 rows) + +-- ============================================================ +-- Context Lifecycle +-- ============================================================ +-- Multiple overlapping contexts (SKIP TO NEXT ROW) +WITH test_overlapping_contexts AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_overlapping_contexts +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {B} | | + 5 | {_} | | +(5 rows) + +-- Failed context cleanup (early failure) +WITH test_context_cleanup AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), -- Pruned at first row + (2, ARRAY['A']), + (3, ARRAY['_']), -- Mismatched after row 2 + (4, ARRAY['A']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_context_cleanup +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {_} | | + 2 | {A} | | + 3 | {_} | | + 4 | {A} | 4 | 5 + 5 | {B} | | +(5 rows) + +-- Partition end (incomplete contexts) +WITH test_partition_end AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']) + -- Pattern requires B, but partition ends + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_partition_end +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | | + 3 | {A} | | +(3 rows) + +-- Completed context encountered during processing +-- Pattern (A | B C D): Ctx1 takes long B->C->D path, while Ctx2 takes +-- short A path and completes first. Next row sees Ctx2 +-- with states=NULL and skips it. +WITH test_completed_ctx AS ( + SELECT * FROM (VALUES + (1, ARRAY['B', '_']), + (2, ARRAY['C', 'A']), + (3, ARRAY['D', '_']), + (4, ARRAY['_', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_completed_ctx +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A | B C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B,_} | 1 | 3 + 2 | {C,A} | 2 | 2 + 3 | {D,_} | | + 4 | {_,_} | | +(4 rows) + +-- ============================================================ +-- Advance Phase (Epsilon Transitions) +-- ============================================================ +-- Nested groups ((A B)+) +WITH test_nested_groups AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_groups +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A B)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 6 + 2 | {B} | | + 3 | {A} | 3 | 6 + 4 | {B} | | + 5 | {A} | 5 | 6 + 6 | {B} | | + 7 | {_} | | +(7 rows) + +-- Multiple alternation branches (A (B|C|D) E) +WITH test_multi_alt AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['E']), + (4, ARRAY['A']), + (5, ARRAY['C']), + (6, ARRAY['E']), + (7, ARRAY['A']), + (8, ARRAY['D']), + (9, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_multi_alt +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A (B | C | D) E) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {B} | | + 3 | {E} | | + 4 | {A} | 4 | 6 + 5 | {C} | | + 6 | {E} | | + 7 | {A} | 7 | 9 + 8 | {D} | | + 9 | {E} | | +(9 rows) + +-- Optional VAR at start (A? B C) +WITH test_optional_var AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), -- A skipped + (2, ARRAY['C']), + (3, ARRAY['A']), -- A matched + (4, ARRAY['B']), + (5, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_optional_var +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A? B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B} | 1 | 2 + 2 | {C} | | + 3 | {A} | 3 | 5 + 4 | {B} | 4 | 5 + 5 | {C} | | +(5 rows) + +-- Nested alternation ((A|B) (C|D)) +WITH test_nested_alt AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['C']), -- A C + (3, ARRAY['A']), + (4, ARRAY['D']), -- A D + (5, ARRAY['B']), + (6, ARRAY['C']), -- B C + (7, ARRAY['B']), + (8, ARRAY['D']) -- B D + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_alt +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A | B) (C | D)) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {C} | | + 3 | {A} | 3 | 4 + 4 | {D} | | + 5 | {B} | 5 | 6 + 6 | {C} | | + 7 | {B} | 7 | 8 + 8 | {D} | | +(8 rows) + +-- ============================================================ +-- Match Phase +-- ============================================================ +-- Simple VAR with END next (A B C all min=max=1) +WITH test_simple_var AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_simple_var +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {B} | | + 3 | {C} | | + 4 | {_} | | +(4 rows) + +-- VAR max exceeded (A{2,3}) +WITH test_max_exceeded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), -- Max = 3 + (4, ARRAY['A']), -- Exceeds max, state removed + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_max_exceeded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{2,3} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | 2 | 5 + 3 | {A} | 3 | 5 + 4 | {A} | | + 5 | {B} | | +(5 rows) + +-- Non-matching VAR (DEFINE false) +WITH test_non_matching AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['_']), -- B not matched (DEFINE false) + (3, ARRAY['A']), + (4, ARRAY['B']), -- B matched + (5, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_non_matching +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {_} | | + 3 | {A} | 3 | 5 + 4 | {B} | | + 5 | {C} | | +(5 rows) + +-- ============================================================ +-- Frame Boundary Handling +-- ============================================================ +-- Limited frame (ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING) +WITH test_limited_frame AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), -- Within 3 FOLLOWING + (5, ARRAY['B']), -- Beyond 3 FOLLOWING from row 1 + (6, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_limited_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {B} | | + 5 | {B} | | + 6 | {_} | | +(6 rows) + +-- Unbounded frame (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) +WITH test_unbounded_frame AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']), + (6, ARRAY['B']) -- Far from start, but unbounded + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_unbounded_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 6 + 2 | {A} | 2 | 6 + 3 | {A} | 3 | 6 + 4 | {A} | 4 | 6 + 5 | {A} | 5 | 6 + 6 | {B} | | +(6 rows) + +-- Match exceeds frame boundary +WITH test_frame_exceeded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']) + -- Frame ends at row 3 (2 FOLLOWING), B never appears + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_frame_exceeded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | | + 3 | {A} | | +(3 rows) + +-- Frame boundary forced mismatch +-- Limited frame with enough rows so that a context's frame boundary +-- is exceeded while still processing. +WITH test_frame_boundary AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']), + (6, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_frame_boundary +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | | + 3 | {A} | | + 4 | {A} | 4 | 6 + 5 | {A} | 5 | 6 + 6 | {B} | | +(6 rows) + +-- ============================================================ +-- State Management +-- ============================================================ +-- Duplicate state creation +WITH test_duplicate_states AS ( + SELECT * FROM (VALUES + (1, ARRAY['A', 'B']), -- Both A and B match (creates duplicate states via different paths) + (2, ARRAY['C', '_']), + (3, ARRAY['D', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_duplicate_states +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A | B) C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 3 + 2 | {C,_} | | + 3 | {D,_} | | +(3 rows) + +-- Large pattern (stress free list) +WITH test_large_pattern AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']), + (6, ARRAY['F']), + (7, ARRAY['G']), + (8, ARRAY['H']), + (9, ARRAY['I']), + (10, ARRAY['J']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_large_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D E F G H I J) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags), + G AS 'G' = ANY(flags), + H AS 'H' = ANY(flags), + I AS 'I' = ANY(flags), + J AS 'J' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 10 + 2 | {B} | | + 3 | {C} | | + 4 | {D} | | + 5 | {E} | | + 6 | {F} | | + 7 | {G} | | + 8 | {H} | | + 9 | {I} | | + 10 | {J} | | +(10 rows) + +-- Reduced frame map reallocation (> 1024 rows) +WITH test_map_realloc AS ( + SELECT id, CASE WHEN id % 2 = 1 THEN ARRAY['A'] ELSE ARRAY['B'] END AS flags + FROM generate_series(1, 1100) AS id +) +SELECT count(*), min(match_start), max(match_end) +FROM ( + SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end + FROM test_map_realloc + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) + ) +) sub; + count | min | max +-------+-----+------ + 1100 | 1 | 1100 +(1 row) + +-- ============================================================ +-- Statistics and Diagnostics +-- ============================================================ +-- Matched contexts +WITH test_matched AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_matched +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {B} | | + 3 | {A} | 3 | 4 + 4 | {B} | | +(4 rows) + +-- Pruned contexts (failed at first row) +WITH test_pruned AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), -- Pruned + (2, ARRAY['_']), -- Pruned + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_pruned +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {_} | | + 2 | {_} | | + 3 | {A} | 3 | 4 + 4 | {B} | | +(4 rows) + +-- Mismatched contexts (failed after multiple rows) +WITH test_mismatched AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['_']), -- Mismatched after 2 rows + (4, ARRAY['A']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_mismatched +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | | + 3 | {_} | | + 4 | {A} | 4 | 5 + 5 | {B} | | +(5 rows) + +-- Absorbed contexts +WITH test_absorbed AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_absorbed +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {A} | 4 | 4 + 5 | {_} | | +(5 rows) + +-- Skipped contexts (SKIP TO NEXT ROW) +WITH test_skipped AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']) -- Completes match starting at row 1 + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skipped +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {B} | | +(4 rows) + +-- ============================================================ +-- Quantifier Runtime Behavior +-- ============================================================ +-- Large count handling (A{100}) +WITH test_large_count AS ( + SELECT i AS id, ARRAY['A'] AS flags + FROM generate_series(1, 105) i +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_large_count +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{100}) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +-----+-------+-------------+----------- + 1 | {A} | 1 | 100 + 2 | {A} | 2 | 101 + 3 | {A} | 3 | 102 + 4 | {A} | 4 | 103 + 5 | {A} | 5 | 104 + 6 | {A} | 6 | 105 + 7 | {A} | | + 8 | {A} | | + 9 | {A} | | + 10 | {A} | | + 11 | {A} | | + 12 | {A} | | + 13 | {A} | | + 14 | {A} | | + 15 | {A} | | + 16 | {A} | | + 17 | {A} | | + 18 | {A} | | + 19 | {A} | | + 20 | {A} | | + 21 | {A} | | + 22 | {A} | | + 23 | {A} | | + 24 | {A} | | + 25 | {A} | | + 26 | {A} | | + 27 | {A} | | + 28 | {A} | | + 29 | {A} | | + 30 | {A} | | + 31 | {A} | | + 32 | {A} | | + 33 | {A} | | + 34 | {A} | | + 35 | {A} | | + 36 | {A} | | + 37 | {A} | | + 38 | {A} | | + 39 | {A} | | + 40 | {A} | | + 41 | {A} | | + 42 | {A} | | + 43 | {A} | | + 44 | {A} | | + 45 | {A} | | + 46 | {A} | | + 47 | {A} | | + 48 | {A} | | + 49 | {A} | | + 50 | {A} | | + 51 | {A} | | + 52 | {A} | | + 53 | {A} | | + 54 | {A} | | + 55 | {A} | | + 56 | {A} | | + 57 | {A} | | + 58 | {A} | | + 59 | {A} | | + 60 | {A} | | + 61 | {A} | | + 62 | {A} | | + 63 | {A} | | + 64 | {A} | | + 65 | {A} | | + 66 | {A} | | + 67 | {A} | | + 68 | {A} | | + 69 | {A} | | + 70 | {A} | | + 71 | {A} | | + 72 | {A} | | + 73 | {A} | | + 74 | {A} | | + 75 | {A} | | + 76 | {A} | | + 77 | {A} | | + 78 | {A} | | + 79 | {A} | | + 80 | {A} | | + 81 | {A} | | + 82 | {A} | | + 83 | {A} | | + 84 | {A} | | + 85 | {A} | | + 86 | {A} | | + 87 | {A} | | + 88 | {A} | | + 89 | {A} | | + 90 | {A} | | + 91 | {A} | | + 92 | {A} | | + 93 | {A} | | + 94 | {A} | | + 95 | {A} | | + 96 | {A} | | + 97 | {A} | | + 98 | {A} | | + 99 | {A} | | + 100 | {A} | | + 101 | {A} | | + 102 | {A} | | + 103 | {A} | | + 104 | {A} | | + 105 | {A} | | +(105 rows) + +-- Unlimited quantifier (A{10,}) +WITH test_unlimited AS ( + SELECT i AS id, ARRAY['A'] AS flags + FROM generate_series(1, 15) i + UNION ALL + SELECT 16, ARRAY['B'] +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_unlimited +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{10,} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 16 + 2 | {A} | 2 | 16 + 3 | {A} | 3 | 16 + 4 | {A} | 4 | 16 + 5 | {A} | 5 | 16 + 6 | {A} | 6 | 16 + 7 | {A} | | + 8 | {A} | | + 9 | {A} | | + 10 | {A} | | + 11 | {A} | | + 12 | {A} | | + 13 | {A} | | + 14 | {A} | | + 15 | {A} | | + 16 | {B} | | +(16 rows) + +-- Min boundary (A{3,5}) +WITH test_min_boundary AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), -- Min=3 reached, exit path available + (4, ARRAY['B']), -- Match ends at min + (5, ARRAY['A']), + (6, ARRAY['A']), + (7, ARRAY['A']), + (8, ARRAY['A']), + (9, ARRAY['A']), -- Count=5, max reached + (10, ARRAY['B']) -- Match ends at max + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_min_boundary +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{3,5} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | | + 3 | {A} | | + 4 | {B} | | + 5 | {A} | 5 | 10 + 6 | {A} | 6 | 10 + 7 | {A} | 7 | 10 + 8 | {A} | | + 9 | {A} | | + 10 | {B} | | +(10 rows) + +-- Max boundary exceeded (A{3,5}) +WITH test_max_boundary AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']), + (6, ARRAY['A']), -- Count=6 > max=5, row 1 context removed + (7, ARRAY['B']) -- Row 1 context: no match (exceeded max) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_max_boundary +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{3,5} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | 2 | 7 + 3 | {A} | 3 | 7 + 4 | {A} | 4 | 7 + 5 | {A} | | + 6 | {A} | | + 7 | {B} | | +(7 rows) + +-- ============================================================ +-- Pathological Pattern Runtime Protection +-- ============================================================ +-- Complex nested nullable ((A* B*)*) - Runtime protection +WITH test_complex_nested AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_complex_nested +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A* B*)*) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {B} | 3 | 4 + 4 | {B} | 4 | 4 + 5 | {C} | | +(5 rows) + +-- Nested nullable with quantifier ((A{0,3})*) +WITH test_nested_quantifier AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_quantifier +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A{0,3})*) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | 2 | 3 + 3 | {A} | 3 | 3 + 4 | {B} | | +(4 rows) + +-- ============================================================ +-- Alternation Runtime Behavior +-- ============================================================ +-- Multi-branch alternation (A (B|C|D|E) F) +WITH test_multi_branch AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['F']), + (4, ARRAY['A']), + (5, ARRAY['C']), + (6, ARRAY['F']), + (7, ARRAY['A']), + (8, ARRAY['D']), + (9, ARRAY['F']), + (10, ARRAY['A']), + (11, ARRAY['E']), + (12, ARRAY['F']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_multi_branch +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A (B | C | D | E) F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {B} | | + 3 | {F} | | + 4 | {A} | 4 | 6 + 5 | {C} | | + 6 | {F} | | + 7 | {A} | 7 | 9 + 8 | {D} | | + 9 | {F} | | + 10 | {A} | 10 | 12 + 11 | {E} | | + 12 | {F} | | +(12 rows) + +-- Alternation with quantifiers (A+ | B+ | C+) +WITH test_alt_quantifiers AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['B']), + (6, ARRAY['C']), + (7, ARRAY['C']), + (8, ARRAY['C']), + (9, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_quantifiers +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ | B+ | C+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | 2 | 3 + 3 | {A} | 3 | 3 + 4 | {B} | 4 | 5 + 5 | {B} | 5 | 5 + 6 | {C} | 6 | 9 + 7 | {C} | 7 | 9 + 8 | {C} | 8 | 9 + 9 | {C} | 9 | 9 +(9 rows) + +-- altPriority replacement (A B C | D) +-- D branch (higher altPriority) matches first at row 1, +-- then A B C branch (lower altPriority) replaces it at row 3. +WITH test_alt_replace AS ( + SELECT * FROM (VALUES + (1, ARRAY['A', 'D']), + (2, ARRAY['B', '_']), + (3, ARRAY['C', '_']), + (4, ARRAY['_', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_replace +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C | D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,D} | 1 | 3 + 2 | {B,_} | | + 3 | {C,_} | | + 4 | {_,_} | | +(4 rows) + +-- ============================================================ +-- Deep Nested Groups +-- ============================================================ +-- Three-level nesting ((((A B)+)+)+) +WITH test_deep_nesting AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_deep_nesting +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((((A B)+)+)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 6 + 2 | {B} | | + 3 | {A} | 3 | 6 + 4 | {B} | | + 5 | {A} | 5 | 6 + 6 | {B} | | + 7 | {_} | | +(7 rows) + +-- Multiple groups in nesting (((A B) (C D))+) +WITH test_nested_sequential AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['C']), + (8, ARRAY['D']), + (9, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_sequential +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A B) (C D))+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 8 + 2 | {B} | | + 3 | {C} | | + 4 | {D} | | + 5 | {A} | 5 | 8 + 6 | {B} | | + 7 | {C} | | + 8 | {D} | | + 9 | {_} | | +(9 rows) + +-- Nested END→END max reached +-- Inner group (A B){2} reaches max=2 → exits to outer END +WITH test_end_nested_max AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['A']), + (8, ARRAY['B']), + (9, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_end_nested_max +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A B){2})+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 8 + 2 | {B} | | + 3 | {A} | 3 | 6 + 4 | {B} | | + 5 | {A} | 5 | 8 + 6 | {B} | | + 7 | {A} | | + 8 | {B} | | + 9 | {_} | | +(9 rows) + +-- Nested END→END between min/max +-- Inner group (A B){1,3} exits between min/max → outer END count++ +WITH test_end_nested_mid AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['A']), + (8, ARRAY['B']), + (9, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_end_nested_mid +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A B){1,3})+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 8 + 2 | {B} | | + 3 | {A} | 3 | 8 + 4 | {B} | | + 5 | {A} | 5 | 8 + 6 | {B} | | + 7 | {A} | 7 | 8 + 8 | {B} | | + 9 | {_} | | +(9 rows) + +-- ============================================================ +-- SKIP Options (Runtime) +-- ============================================================ +-- SKIP PAST LAST ROW (non-overlapping matches) +WITH test_skip_past AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skip_past +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | | + 3 | {A} | | + 4 | {A} | | + 5 | {_} | | +(5 rows) + +-- SKIP TO NEXT ROW (overlapping matches) +WITH test_skip_next AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skip_next +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {A} | 4 | 4 + 5 | {_} | | +(5 rows) + +-- SKIP difference verification +WITH test_skip_diff AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT 'SKIP PAST' AS mode, id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skip_diff +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +) +UNION ALL +SELECT 'SKIP NEXT' AS mode, id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skip_diff +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +) +ORDER BY mode, id; + mode | id | flags | match_start | match_end +-----------+----+-------+-------------+----------- + SKIP NEXT | 1 | {A} | 1 | 2 + SKIP NEXT | 2 | {B} | | + SKIP NEXT | 3 | {A} | 3 | 4 + SKIP NEXT | 4 | {B} | | + SKIP PAST | 1 | {A} | 1 | 2 + SKIP PAST | 2 | {B} | | + SKIP PAST | 3 | {A} | 3 | 4 + SKIP PAST | 4 | {B} | | +(8 rows) + +-- ============================================================ +-- INITIAL Mode (Runtime) +-- ============================================================ +-- INITIAL mode (not yet supported - produces syntax error) +WITH test_initial_mode AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), -- Unmatched + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['_']), -- Unmatched + (5, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_initial_mode +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); +ERROR: syntax error at or near "AFTER" +LINE 18: AFTER MATCH SKIP TO NEXT ROW + ^ +-- Default mode (include all rows) +WITH test_default_mode AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), -- Unmatched, but included + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['_']), -- Unmatched, but included + (5, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_default_mode +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {_} | | + 2 | {A} | 2 | 3 + 3 | {A} | 3 | 3 + 4 | {_} | | + 5 | {A} | 5 | 5 +(5 rows) + +-- Mode difference verification (INITIAL not yet supported - produces syntax error) +WITH test_mode_diff AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), + (2, ARRAY['A']), + (3, ARRAY['_']) + ) AS t(id, flags) +) +SELECT 'INITIAL' AS mode, COUNT(*) AS row_count +FROM ( + SELECT id FROM test_mode_diff + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE A AS 'A' = ANY(flags) + ) +) sub +UNION ALL +SELECT 'DEFAULT' AS mode, COUNT(*) AS row_count +FROM ( + SELECT id FROM test_mode_diff + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE A AS 'A' = ANY(flags) + ) +) sub +ORDER BY mode; +ERROR: syntax error at or near "AFTER" +LINE 15: AFTER MATCH SKIP TO NEXT ROW + ^ +-- ============================================================ +-- Frame Boundary Variations +-- ============================================================ +-- Very limited frame (1 FOLLOWING) +WITH test_one_following AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), -- Within 1 FOLLOWING + (3, ARRAY['A']), -- Beyond 1 FOLLOWING from row 1 + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_one_following +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {B} | | + 3 | {A} | 3 | 4 + 4 | {B} | | +(4 rows) + +-- Medium frame (10 FOLLOWING) +WITH test_ten_following AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']), + (6, ARRAY['A']), + (7, ARRAY['A']), + (8, ARRAY['A']), + (9, ARRAY['A']), + (10, ARRAY['A']), + (11, ARRAY['B']), -- Within 10 FOLLOWING from row 1 + (12, ARRAY['A']), + (13, ARRAY['B']) -- Beyond 10 FOLLOWING from row 1 + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_ten_following +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 10 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 11 + 2 | {A} | 2 | 11 + 3 | {A} | 3 | 11 + 4 | {A} | 4 | 11 + 5 | {A} | 5 | 11 + 6 | {A} | 6 | 11 + 7 | {A} | 7 | 11 + 8 | {A} | 8 | 11 + 9 | {A} | 9 | 11 + 10 | {A} | 10 | 11 + 11 | {B} | | + 12 | {A} | 12 | 13 + 13 | {B} | | +(13 rows) + +-- Exact boundary match +WITH test_exact_boundary AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['B']) -- Exactly at 4 FOLLOWING (frame end) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_exact_boundary +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {A} | 2 | 5 + 3 | {A} | 3 | 5 + 4 | {A} | 4 | 5 + 5 | {B} | | +(5 rows) + +-- ============================================================ +-- Special Partition Cases +-- ============================================================ +-- Empty partition (0 rows) +WITH test_empty_partition AS ( + SELECT * FROM (VALUES + (1, 1, ARRAY['A']), + (2, 2, ARRAY['_']) -- Different partition + ) AS t(id, part, flags) +) +SELECT id, part, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_empty_partition +WHERE part = 99 -- No rows match +WINDOW w AS ( + PARTITION BY part + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE + A AS 'A' = ANY(flags) +); + id | part | flags | match_start | match_end +----+------+-------+-------------+----------- +(0 rows) + +-- Single row partition +WITH test_single_row AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_single_row +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 1 +(1 row) + +-- All rows fail matching (all DEFINE false) +WITH test_all_fail AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_all_fail +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS false -- All rows fail +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | | + 3 | {A} | | +(3 rows) + +-- Partition end with absorbable pattern +-- SKIP PAST LAST ROW + unbounded frame + all rows match A +-- Triggers absorb in !rowExists path at partition boundary. +WITH test_absorb_partition_end AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_absorb_partition_end +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {A} | | + 3 | {A} | | + 4 | {A} | | + 5 | {A} | | +(5 rows) + +-- ============================================================ +-- DEFINE Special Cases +-- ============================================================ +-- Undefined variable in DEFINE +WITH test_undefined_var AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['X']), -- B not defined, defaults to TRUE + (3, ARRAY['C']), + (4, ARRAY['A']), + (5, ARRAY['_']), -- B defaults to TRUE, but no flags + (6, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_undefined_var +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C) + DEFINE + A AS 'A' = ANY(flags), + -- B is undefined, defaults to TRUE + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {X} | | + 3 | {C} | | + 4 | {A} | 4 | 6 + 5 | {_} | | + 6 | {C} | | +(6 rows) + +-- ============================================================ +-- Absorption Dynamic Flags +-- ============================================================ +-- Partial absorbable pattern ((A+) B) +WITH test_partial_absorbable AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_partial_absorbable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A+) B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {A} | 3 | 4 + 4 | {B} | | + 5 | {_} | | +(5 rows) + +-- Dynamic flag update ((A+) | B) +WITH test_dynamic_flags AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_dynamic_flags +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A+) | B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | 2 | 3 + 3 | {A} | 3 | 3 + 4 | {B} | 4 | 4 + 5 | {A} | 5 | 5 + 6 | {B} | 6 | 6 +(6 rows) + +-- Non-absorbable context during absorption +-- Pattern (A B)+ C: A,B in absorbable group, C is not. +-- When END exits to C via nfa_state_create, isAbsorbable becomes false. +WITH test_non_absorbable AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['C']), + (6, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_non_absorbable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {B} | | + 3 | {A} | | + 4 | {B} | | + 5 | {C} | | + 6 | {_} | | +(6 rows) + +-- Absorption flags early return (!hasAbsorbableState) +-- Pattern (A B)+ C D with SKIP PAST LAST ROW +-- After reaching C (non-absorbable), hasAbsorbableState becomes false. +-- On next row (D), the early return fires. +WITH test_absorption_early_return AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['C']), + (6, ARRAY['D']), + (7, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_absorption_early_return +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 6 + 2 | {B} | | + 3 | {A} | | + 4 | {B} | | + 5 | {C} | | + 6 | {D} | | + 7 | {_} | | +(7 rows) + +-- Coverage failure: older can't cover newer's states +-- Pattern A+ | B+ with SKIP PAST LAST ROW. +-- Row 1: only A → Ctx1 takes A branch only (B fails). +-- Row 2: A and B → Ctx2 takes both branches. +-- Absorption: Ctx1 has A but no B → can't cover Ctx2's B state → fails. +WITH test_coverage_fail AS ( + SELECT * FROM (VALUES + (1, ARRAY['A', '_']), + (2, ARRAY['A', 'B']), + (3, ARRAY['A', '_']), + (4, ARRAY['A', '_']), + (5, ARRAY['_', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_coverage_fail +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ | B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,_} | 1 | 4 + 2 | {A,B} | | + 3 | {A,_} | | + 4 | {A,_} | | + 5 | {_,_} | | +(5 rows) + +-- Absorb skips completed context (older->states==NULL) +-- Pattern A+ | B+ with SKIP PAST LAST ROW. +-- Row 1: A only → Ctx1 takes A branch. Row 2: B only → Ctx1 A fails (completed). +-- Ctx2 takes B branch. Absorption: Ctx1 states==NULL → skip. +WITH test_older_completed AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_older_completed +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ | B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 1 + 2 | {B} | 2 | 3 + 3 | {B} | | + 4 | {_} | | +(4 rows) + +-- Absorb skips non-absorbable context (!hasAbsorbableState) +-- Pattern A+ | B C with SKIP PAST LAST ROW (only A+ branch absorbable). +-- Row 1: B only → Ctx1 takes B branch (non-absorbable), advances to C. +-- Row 2: C,A → Ctx1 C matches (hasAbsorbableState=false). Ctx2 takes A (absorbable). +-- Absorption: Ctx1 !hasAbsorbableState → skip. +WITH test_older_non_absorbable AS ( + SELECT * FROM (VALUES + (1, ARRAY['B', '_']), + (2, ARRAY['C', 'A']), + (3, ARRAY['_', 'A']), + (4, ARRAY['_', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_older_non_absorbable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ | B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B,_} | 1 | 2 + 2 | {C,A} | | + 3 | {_,A} | 3 | 3 + 4 | {_,_} | | +(4 rows) + +-- ============================================================ +-- FIXME Issues - Known Limitations +-- ============================================================ +-- FIXME 1 - altPriority lexical order +WITH test_alt_priority_repeated AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), -- Both A and B match + (2, ARRAY['A','B']), + (3, ARRAY['A','B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_priority_repeated +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A | B)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 3 + 2 | {A,B} | 2 | 3 + 3 | {A,B} | 3 | 3 +(3 rows) + +-- FIXME 1 - Nested ALT lexical order +WITH test_alt_priority_nested AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), + (2, ARRAY['C','D']), + (3, ARRAY['A','B']), + (4, ARRAY['C','D']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_priority_nested +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A | B) (C | D))+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 4 + 2 | {C,D} | | + 3 | {A,B} | 3 | 4 + 4 | {C,D} | | +(4 rows) + +-- FIXME 2 - Cycle prevention at count > 0 +WITH test_cycle_nonzero AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']) -- Inner A* matches 0, cycles at count=3 + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_cycle_nonzero +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A*)*) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | 2 | 3 + 3 | {A} | 3 | 3 + 4 | {B} | | +(4 rows) + +-- FIXME 2 - Cycle with mixed nullables +WITH test_cycle_mixed AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_cycle_mixed +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A* B*)*) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {B} | 2 | 2 + 3 | {A} | 3 | 3 + 4 | {C} | | +(4 rows) + diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index fc61d90ebaf..2bf141ff7d5 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -107,7 +107,7 @@ test: select_views portals_p2 foreign_key cluster dependency guc bitmapops combo # ---------- # Row Pattern Recognition tests # ---------- -test: rpr rpr_base rpr_explain +test: rpr rpr_base rpr_explain rpr_nfa # ---------- # Another group of parallel tests (JSON related) diff --git a/src/test/regress/sql/rpr.sql b/src/test/regress/sql/rpr.sql index 1071dacd687..7690c5611c0 100644 --- a/src/test/regress/sql/rpr.sql +++ b/src/test/regress/sql/rpr.sql @@ -323,6 +323,23 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER A AS TRUE ); +-- nth_value beyond reduced frame (no IGNORE NULLS) +-- Tests WinGetSlotInFrame/WinGetFuncArgInFrame out-of-frame with RPR +SELECT company, tdate, price, + nth_value(price, 5) OVER w AS nth_5 +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + -- backtracking with reclassification of rows -- using AFTER MATCH SKIP PAST LAST ROW SELECT company, tdate, price, first_value(tdate) OVER w, last_value(tdate) OVER w @@ -499,6 +516,25 @@ count(*) OVER w DOWN AS price < PREV(price) ); +-- ReScan test: LATERAL join forces WindowAgg rescan with RPR +-- Tests ExecReScanWindowAgg clearing prev_slot/next_slot +SELECT g.x, sub.* +FROM generate_series(1, 2) g(x), +LATERAL ( + SELECT id, price, count(*) OVER w AS c + FROM (VALUES (1, 100), (2, 200), (3, 150)) AS t(id, price) + WHERE id <= g.x + 1 + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (START UP+) + DEFINE + START AS TRUE, + UP AS price > PREV(price) + ) +) sub +ORDER BY g.x, sub.id; + -- PREV has multiple column reference CREATE TEMP TABLE rpr1 (id INTEGER, i SERIAL, j INTEGER); INSERT INTO rpr1(id, j) SELECT 1, g*2 FROM generate_series(1, 10) AS g; @@ -624,6 +660,23 @@ WITH data AS ( DEFINE A AS gid < 13 ); +-- nth_value beyond reduced frame with IGNORE NULLS +-- Tests ignorenulls_getfuncarginframe early out-of-frame check +SELECT company, tdate, price, + nth_value(price, 5) IGNORE NULLS OVER w AS nth_5_in +FROM stock +WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + -- View and pg_get_viewdef tests. CREATE TEMP VIEW v_window AS SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, @@ -1358,11 +1411,10 @@ WINDOW w AS ( -- Row 3: C D E F (3-6) <- ends last! -- Test 1b: Longer pattern FAILS, shorter pattern should survive --- Pattern: A+ B C D | A+ B (greedy at front for absorption) --- Data: A B C X (D expected but X found) --- A+ B C D fails at row 4 (X instead of D) --- A+ B succeeds at row 2 --- Result should be match 1-2 (A+ B) +-- Pattern: A+ B C D E | B+ C +-- A+ B C D E fails (no E found in sequence) +-- B+ C matches at rows 2-3 +-- Result: match 2-3 (B+ C) WITH test_overlap1b AS ( SELECT * FROM (VALUES (1, ARRAY['A']), @@ -1585,8 +1637,7 @@ WINDOW w AS ( DEFINE A AS 'A' = ANY(flags) ); -- Pattern A+ is absorbable (unbounded first element, only one unbounded) --- Without absorption: 4 matches (1-4, 2-4, 3-4, 4-4) --- With absorption: Row 1 match (1-4), rows 2-4 absorbed +-- 4 matches: (1-4, 2-4, 3-4, 4-4) -- Test absorption 2: A+ B pattern - absorption with fixed suffix WITH test_absorb_suffix AS ( @@ -1611,7 +1662,7 @@ WINDOW w AS ( ); -- Pattern A+ B is absorbable (A+ unbounded first, B bounded suffix) -- All potential matches end at same row (row 4 with B) --- With absorption: Row 1 match (1-4), rows 2-3 absorbed +-- 3 matches: (1-4, 2-4, 3-4) -- Test absorption 3: Per-branch absorption with ALT (B+ C | B+ D) WITH test_absorb_alt AS ( @@ -1636,8 +1687,7 @@ WINDOW w AS ( D AS 'D' = ANY(flags) ); -- Both branches B+ C and B+ D are absorbable (B+ unbounded first) --- B+ D branch matches: potential 1-4, 2-4, 3-4 --- Row 1 (1-4) absorbs Row 2 (2-4) and Row 3 (3-4) - same endpoint +-- B+ D branch matches: 3 matches (1-4, 2-4, 3-4) -- Test absorption 4: Non-absorbable pattern (A B+ - unbounded not first) WITH test_no_absorb AS ( @@ -1687,8 +1737,7 @@ WINDOW w AS ( B AS 'B' = ANY(flags) ); -- Pattern optimized: (A B) (A B)+ -> (A B){2,} --- Potential matches: 1-6 (3 reps), 3-6 (2 reps), 5-6 needs 2 reps (fail) --- Row 1 (1-6) absorbs Row 3 (3-6) - same endpoint, top-level unbounded GROUP +-- 2 matches: 1-6 (3 reps), 3-6 (2 reps) -- Test absorption 6: Multiple unbounded - first element unbounded enables absorption WITH test_multi_unbounded AS ( @@ -1711,7 +1760,7 @@ WINDOW w AS ( A AS 'A' = ANY(flags), B AS 'B' = ANY(flags) ); --- Row 1: 1-4, Row 2: absorbed (same endpoint 4) +-- 2 matches: 1-4, 2-4 (same endpoint 4) -- ============================================ -- Jacob's RPR Patterns (from jacob branch) @@ -1963,7 +2012,6 @@ WINDOW w AS ( -- ============================================ -- Test: (A | B)+ C - alternation inside quantified group followed by C --- BUG: Currently returns NULL instead of matching 1-4 WITH jacob_alt_quant AS ( SELECT * FROM (VALUES (1, ARRAY['A']), @@ -1987,7 +2035,6 @@ WINDOW w AS ( -- Expected: 1-4 (A B A C) -- Test: ((A | B) C)+ - alternation inside group with outer quantifier --- Currently returns two separate matches (1-2, 3-4) instead of single 1-4 WITH jacob_alt_group AS ( SELECT * FROM (VALUES (1, ARRAY['A']), @@ -2015,8 +2062,7 @@ WINDOW w AS ( -- RELUCTANT quantifiers (not yet supported) -- ============================================ --- Test: A+? B (reluctant) - CURRENTLY ACTS LIKE A+ (GREEDY) --- Parser accepts the syntax but executor doesn't differentiate +-- Test: A+? B (reluctant) - parser rejects with ERROR WITH jacob_reluctant AS ( SELECT * FROM (VALUES (1, ARRAY['A']), @@ -2036,10 +2082,9 @@ WINDOW w AS ( A AS 'A' = ANY(flags), B AS 'B' = ANY(flags) ); --- Current: 1-4 (A A A B) - greedy behavior --- Expected when RELUCTANT implemented: 3-4 (A B) - shortest A before B +-- Expected: ERROR (reluctant quantifiers not yet supported) --- Test: A{1,3}? B (reluctant bounded) +-- Test: A{1,3}? B (reluctant bounded) - parser rejects with ERROR WITH jacob_reluctant_bounded AS ( SELECT * FROM (VALUES (1, ARRAY['A']), @@ -2059,8 +2104,7 @@ WINDOW w AS ( A AS 'A' = ANY(flags), B AS 'B' = ANY(flags) ); --- Current: 1-4 (greedy, takes max A before B) --- Expected when RELUCTANT implemented: 3-4 (takes min A before B) +-- Expected: ERROR (reluctant quantifiers not yet supported) -- ============================================ -- Nested quantifiers (pathological patterns) diff --git a/src/test/regress/sql/rpr_base.sql b/src/test/regress/sql/rpr_base.sql index 6f38cd1ae92..879f5fe48a2 100644 --- a/src/test/regress/sql/rpr_base.sql +++ b/src/test/regress/sql/rpr_base.sql @@ -188,7 +188,7 @@ WINDOW w AS ( PATTERN (A+) DEFINE A AS id > 0, A AS id < 10 ); --- Expected: ERROR: row pattern definition variable name "A" appears more than once in DEFINE clause +-- Expected: ERROR: row pattern definition variable name "a" appears more than once in DEFINE clause DROP TABLE rpr_dup; @@ -355,7 +355,7 @@ WINDOW w AS ( PATTERN (A+) DEFINE A AS val > 0 ); --- Expected: ERROR: FRAME must start at current row when row pattern recognition is used +-- Expected: ERROR: FRAME option RANGE is not permitted when row pattern recognition is used -- GROUPS frame not starting at CURRENT ROW SELECT COUNT(*) OVER w @@ -366,7 +366,7 @@ WINDOW w AS ( PATTERN (A+) DEFINE A AS val > 0 ); --- Expected: ERROR: FRAME must start at current row when row pattern recognition is used +-- Expected: ERROR: FRAME option GROUP is not permitted when row pattern recognition is used -- Starting with N PRECEDING SELECT COUNT(*) OVER w @@ -401,7 +401,7 @@ WINDOW w AS ( PATTERN (A+) DEFINE A AS val > 0 ); --- Expected: ERROR: frame end cannot be before frame start +-- Expected: ERROR: frame starting from current row cannot have preceding rows -- End before start: CURRENT ROW AND UNBOUNDED PRECEDING SELECT COUNT(*) OVER w @@ -412,7 +412,7 @@ WINDOW w AS ( PATTERN (A+) DEFINE A AS val > 0 ); --- Expected: ERROR: frame end cannot be before frame start +-- Expected: ERROR: frame end cannot be UNBOUNDED PRECEDING -- Single row frame: CURRENT ROW AND CURRENT ROW SELECT id, val, COUNT(*) OVER w as cnt @@ -462,14 +462,7 @@ WINDOW w AS ( ) ORDER BY id; --- RANGE: includes all rows with same ORDER BY value --- Expected result: Includes peer rows (same val) in range calculation --- id=1 (val=10): range [10,20] includes all val=10 and val=20 peers -> cnt=2 (only first in peer group gets match) --- id=2,3 (val=10): already matched by id=1 -> cnt=0 --- id=4 (val=20): range [20,30] includes all val=20 and val=30 peers -> cnt=2 --- id=5 (val=20): already matched by id=4 -> cnt=0 --- id=6 (val=30): range [30,40] includes only val=30 -> cnt=1 --- Result: [2,0,0,2,0,1] +-- RANGE frame with RPR (not permitted) SELECT id, val, COUNT(*) OVER w as cnt FROM rpr_frame WINDOW w AS ( @@ -480,15 +473,9 @@ WINDOW w AS ( DEFINE A AS val >= 0, B AS val >= 0 ) ORDER BY id; +-- Expected: ERROR: FRAME option RANGE is not permitted when row pattern recognition is used --- GROUPS: treats rows with same value as one group (1 FOLLOWING = next group) --- Expected result: 1 FOLLOWING means current group + 1 next group --- id=1 (val=10): groups [val=10, val=20] -> cnt=2 (only first in group gets match) --- id=2,3 (val=10): already matched by id=1 -> cnt=0 --- id=4 (val=20): groups [val=20, val=30] -> cnt=2 --- id=5 (val=20): already matched by id=4 -> cnt=0 --- id=6 (val=30): groups [val=30] (no next group) -> cnt=1 --- Result: [2,0,0,2,0,1] +-- GROUPS frame with RPR (not permitted) SELECT id, val, COUNT(*) OVER w as cnt FROM rpr_frame WINDOW w AS ( @@ -499,6 +486,7 @@ WINDOW w AS ( DEFINE A AS val >= 0, B AS val >= 0 ) ORDER BY id; +-- Expected: ERROR: FRAME option GROUP is not permitted when row pattern recognition is used DROP TABLE rpr_frame; @@ -513,7 +501,6 @@ INSERT INTO rpr_partition VALUES (4, 2, 15), (5, 2, 25), (6, 2, 35); -- PARTITION BY with ROWS frame --- Expected: Pattern matching should reset for each partition SELECT id, grp, val, COUNT(*) OVER w as cnt FROM rpr_partition WINDOW w AS ( @@ -525,9 +512,9 @@ WINDOW w AS ( DEFINE A AS val >= 10, B AS val > 15 ) ORDER BY id; +-- Expected: Pattern matching should reset for each partition -- PARTITION BY with RANGE frame --- Expected: Each partition processed independently SELECT id, grp, val, COUNT(*) OVER w as cnt FROM rpr_partition WINDOW w AS ( @@ -539,6 +526,7 @@ WINDOW w AS ( DEFINE A AS val >= 10, B AS val >= 20 ) ORDER BY id; +-- Expected: ERROR: FRAME option RANGE is not permitted when row pattern recognition is used DROP TABLE rpr_partition; @@ -1911,7 +1899,7 @@ WINDOW w AS ( PATTERN (A+) DEFINE A AS val > 'string' ); --- Expected: ERROR: operator does not exist or type mismatch +-- Expected: ERROR: invalid input syntax for type integer: "string" -- Aggregate function in DEFINE (if not allowed) SELECT COUNT(*) OVER w @@ -2065,7 +2053,7 @@ SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN (A ((B) (C))) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); --- ALT flatten: (A | (B | C)) -> (a | b | c) +-- ALT flatten: (A | (B | C))+ -> (a | b | c)+ EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -2162,14 +2150,14 @@ SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A B)) DEFINE A AS val <= 50, B AS val > 50); --- Combined optimization: A A (B B)+ B B C C C -> a{2} (b{2}){3,} c{3} +-- Combined optimization: A A (B B)+ B B C C C -> a{2} (b{2}){2,} c{3} EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN (A A (B B)+ B B C C C) DEFINE A AS val <= 20, B AS val > 20 AND val <= 70, C AS val > 70); --- Consecutive GROUP merge with unbounded: (A+){2} -> (a+){2} +-- Consecutive GROUP merge with unbounded: (A+) (A+) -> a{2,} -- Tests mergeConsecutiveGroups with child->max == INF EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan @@ -2255,8 +2243,8 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A B*)+ A B*) DEFINE A AS val <= 50, B AS val > 50); --- Unwrap GROUP{1,1}: (A | B | C) -> A | B | C --- Tests tryUnwrapGroup removing redundant GROUP +-- Unwrap GROUP{1,1}: ((A | B | C)) -> (a | b | c) +-- Tests tryUnwrapGroup removing redundant outer GROUP EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING @@ -2611,7 +2599,6 @@ CREATE TABLE rpr_fallback (id INT, val INT); INSERT INTO rpr_fallback VALUES (1, 10), (2, 20); -- Test: min quantifier overflow causes optimization fallback (min == max case) --- Expected: Fallback - pattern not merged due to min overflow (4000000000 > INT32_MAX) EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_fallback WINDOW w AS ( @@ -2620,9 +2607,9 @@ WINDOW w AS ( PATTERN ((A{2000000000}){2}) DEFINE A AS val > 0 ); +-- Expected: Fallback - pattern not merged due to min overflow (4000000000 > INT32_MAX) -- Test: max-only quantifier overflow causes optimization fallback --- Expected: Fallback - min OK (2*1=2), but max overflow (2*2000000000 > INT32_MAX) EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_fallback WINDOW w AS ( @@ -2631,9 +2618,9 @@ WINDOW w AS ( PATTERN ((A{1,2000000000}){2}) DEFINE A AS val > 0 ); +-- Expected: Fallback - min OK (2*1=2), but max overflow (2*2000000000 > INT32_MAX) --- Test: min quantifier overflow causes optimization fallback (min != max case) --- Expected: Fallback - pattern not merged due to min overflow +-- Test: max quantifier exceeds valid range (2147483647 = INT_MAX, limit is 2147483646) EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_fallback WINDOW w AS ( @@ -2642,9 +2629,9 @@ WINDOW w AS ( PATTERN ((A{2000000000,2147483647}){2}) DEFINE A AS val > 0 ); +-- Expected: ERROR at parse time before optimization -- Test: nested unbounded with large min causes overflow fallback --- Expected: Fallback - min overflow (2000000000 * 2000000000 > INT32_MAX) EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_fallback WINDOW w AS ( @@ -2653,9 +2640,9 @@ WINDOW w AS ( PATTERN ((A{2000000000,}){2000000000,}) DEFINE A AS val > 0 ); +-- Expected: Fallback - min overflow (2000000000 * 2000000000 > INT32_MAX) -- Test: prefix mismatch causes optimization fallback --- Expected: Fallback - prefix elements don't match GROUP content EXPLAIN (COSTS OFF) SELECT COUNT(*) OVER w FROM rpr_fallback WINDOW w AS ( @@ -2664,6 +2651,7 @@ WINDOW w AS ( PATTERN (A B (C D)+) DEFINE A AS val > 0, B AS val > 5, C AS val > 10, D AS val > 15 ); +-- Expected: Fallback - prefix elements don't match GROUP content DROP TABLE rpr_fallback; @@ -2752,7 +2740,6 @@ FROM rpr_planner ORDER BY id; -- Window with Aggregate Functions - SELECT category, COUNT(*) OVER w as window_cnt, COUNT(*) as agg_cnt @@ -2766,6 +2753,7 @@ WINDOW w AS ( ) GROUP BY category ORDER BY category; +-- Expected: ERROR (GROUP BY with window RPR not supported) -- ============================================================ -- Subquery and CTE Tests @@ -3320,7 +3308,6 @@ CREATE TABLE rpr_errors (id INT, val INT); INSERT INTO rpr_errors VALUES (1, 10), (2, 20); -- Test: PATTERN variable without DEFINE (A), DEFINE variable not in PATTERN (B) --- Expected: Success - A is implicitly TRUE, B is filtered out SELECT id, val, COUNT(*) OVER w FROM rpr_errors WINDOW w AS ( ORDER BY id @@ -3329,9 +3316,9 @@ WINDOW w AS ( DEFINE B AS TRUE ); +-- Expected: Success - A is implicitly TRUE, B is filtered out -- Test: 3 variables in PATTERN, 253 in DEFINE (DEFINE filtering test) --- Expected: Success - unused DEFINE variables are filtered out SELECT COUNT(*) OVER w FROM rpr_errors WINDOW w AS ( ORDER BY id @@ -3365,9 +3352,9 @@ WINDOW w AS ( V241 AS val > 0, V242 AS val > 0, V243 AS val > 0, V244 AS val > 0, V245 AS val > 0, V246 AS val > 0, V247 AS val > 0, V248 AS val > 0, V249 AS val > 0, V250 AS val > 0, V251 AS val > 0, V252 AS val > 0, V253 AS val > 0 ); +-- Expected: Success - unused DEFINE variables are filtered out -- Test: 251 variables in PATTERN, 252 in DEFINE (boundary - should succeed) --- Expected: Success - unused DEFINE variables are filtered out SELECT COUNT(*) OVER w FROM rpr_errors WINDOW w AS ( ORDER BY id @@ -3401,10 +3388,9 @@ WINDOW w AS ( V241 AS val > 0, V242 AS val > 0, V243 AS val > 0, V244 AS val > 0, V245 AS val > 0, V246 AS val > 0, V247 AS val > 0, V248 AS val > 0, V249 AS val > 0, V250 AS val > 0, V251 AS val > 0, V252 AS val > 0 ); - +-- Expected: Success - unused DEFINE variables are filtered out -- Test: 252 variables in PATTERN, 251 in DEFINE (exceeds limit with implicit TRUE) --- Expected: ERROR - too many pattern variables (Maximum is 251) SELECT COUNT(*) OVER w FROM rpr_errors WINDOW w AS ( ORDER BY id @@ -3438,9 +3424,9 @@ WINDOW w AS ( V241 AS val > 0, V242 AS val > 0, V243 AS val > 0, V244 AS val > 0, V245 AS val > 0, V246 AS val > 0, V247 AS val > 0, V248 AS val > 0, V249 AS val > 0, V250 AS val > 0, V251 AS val > 0 ); +-- Expected: ERROR - too many pattern variables (Maximum is 251) -- Test: Pattern nesting at maximum depth (depth 253) --- Expected: Should succeed -- Note: 253 nested GROUP{3,7} quantifiers produce depth 253 after optimization SELECT id, val, COUNT(*) OVER w FROM rpr_errors WINDOW w AS ( @@ -3449,9 +3435,9 @@ WINDOW w AS ( PATTERN ((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A{3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}) DEFINE A AS val > 0 ); +-- Expected: Should succeed -- Test: Pattern nesting depth exceeds maximum (depth 254) --- Expected: ERROR - pattern nesting too deep -- Note: 254 nested GROUP{3,7} quantifiers produce depth 254 after optimization SELECT id, val, COUNT(*) OVER w FROM rpr_errors WINDOW w AS ( @@ -3460,6 +3446,7 @@ WINDOW w AS ( PATTERN (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((A{3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}){3,7}) DEFINE A AS val > 0 ); +-- Expected: ERROR - pattern nesting too deep DROP TABLE rpr_errors; @@ -3567,6 +3554,25 @@ WINDOW w AS ( DEFINE A AS val <= 30, B AS val BETWEEN 31 AND 60, C AS val > 80 ); +-- Test: (A+ | (A | B)+)* - nested alternation inside quantified group +-- Previously caused infinite recursion in nfa_advance_alt when the inner +-- BEGIN(+)'s skip jump was followed as an ALT branch pointer. +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM (VALUES + (1, ARRAY['A', 'B']), + (2, ARRAY['B']), + (3, ARRAY['C']) +) AS t(id, flags) +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A+ | (A | B)+)*) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + -- ============================================================ -- Pathological Patterns -- ============================================================ diff --git a/src/test/regress/sql/rpr_explain.sql b/src/test/regress/sql/rpr_explain.sql index 80088ab18e2..f8c8f62e594 100644 --- a/src/test/regress/sql/rpr_explain.sql +++ b/src/test/regress/sql/rpr_explain.sql @@ -189,7 +189,7 @@ WINDOW w AS ( ); SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 20) AS s(v) WINDOW w AS ( @@ -212,7 +212,7 @@ WINDOW w AS ( ); SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 30) AS s(v) WINDOW w AS ( @@ -365,7 +365,7 @@ WINDOW w AS ( ); SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 40) AS s(v) WINDOW w AS ( @@ -389,7 +389,7 @@ WINDOW w AS ( ); SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 40) AS s(v) WINDOW w AS ( @@ -412,7 +412,7 @@ WINDOW w AS ( ); SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 40) AS s(v) WINDOW w AS ( @@ -1680,7 +1680,7 @@ WINDOW w AS ( ); SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 100) AS s(v) WINDOW w AS ( @@ -1701,7 +1701,7 @@ WINDOW w AS ( ); SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 60) AS s(v) WINDOW w AS ( @@ -1722,7 +1722,7 @@ WINDOW w AS ( ); SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 100) AS s(v) WINDOW w AS ( @@ -1743,7 +1743,7 @@ WINDOW w AS ( ); SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 60) AS s(v) WINDOW w AS ( @@ -1764,7 +1764,7 @@ WINDOW w AS ( ); SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 60) AS s(v) WINDOW w AS ( @@ -1786,7 +1786,7 @@ WINDOW w AS ( ); SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 20) AS s(v) WINDOW w AS ( @@ -1808,7 +1808,7 @@ WINDOW w AS ( ); SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 20) AS s(v) WINDOW w AS ( @@ -1902,7 +1902,7 @@ WINDOW w AS ( ); SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 40) AS s(v) WINDOW w AS ( @@ -1923,7 +1923,7 @@ WINDOW w AS ( ); SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 60) AS s(v) WINDOW w AS ( @@ -1944,7 +1944,7 @@ WINDOW w AS ( ); SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 60) AS s(v) WINDOW w AS ( @@ -1965,7 +1965,7 @@ WINDOW w AS ( ); SELECT line FROM unnest(string_to_array(pg_get_viewdef('rpr_v'), E'\n')) AS line WHERE line ~ 'PATTERN'; SELECT rpr_explain_filter(' -EXPLAIN (COSTS OFF) +EXPLAIN (ANALYZE, BUFFERS OFF, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT count(*) OVER w FROM generate_series(1, 60) AS s(v) WINDOW w AS ( diff --git a/src/test/regress/sql/rpr_nfa.sql b/src/test/regress/sql/rpr_nfa.sql new file mode 100644 index 00000000000..9573d1dab3b --- /dev/null +++ b/src/test/regress/sql/rpr_nfa.sql @@ -0,0 +1,1865 @@ +-- ============================================================ +-- RPR NFA Tests +-- Tests for Row Pattern Recognition NFA Runtime Execution +-- ============================================================ +-- +-- This test suite validates the NFA (Non-deterministic Finite +-- Automaton) runtime execution engine in nodeWindowAgg.c, +-- focusing on update_reduced_frame and related functions. +-- +-- Test Strategy: +-- Diagonal pattern style using ARRAY flags to explicitly +-- control which pattern variables match at each row. +-- +-- Test Coverage: +-- Basic NFA Flow (match->absorb->advance) +-- Absorption Optimization +-- Context Lifecycle Management +-- Advance Phase (Epsilon Transitions) +-- Match Phase (Variable Matching) +-- Frame Boundary Handling +-- State Management (Deduplication) +-- Statistics and Diagnostics +-- Quantifier Runtime Behavior +-- Pathological Pattern Protection +-- Alternation Runtime Behavior +-- Deep Nested Groups +-- SKIP Options (Runtime) +-- INITIAL Mode (Runtime) +-- Frame Boundary Variations +-- Special Partition Cases +-- DEFINE Special Cases +-- Absorption Dynamic Flags +-- FIXME Issues (Known Limitations) +-- +-- Responsibility: +-- - NFA runtime execution paths +-- - Context/State lifecycle management +-- - Runtime boundary conditions and protections +-- +-- NOT tested here (covered in other files): +-- - Pattern parsing/optimization (rpr_base.sql) +-- - EXPLAIN output (rpr_explain.sql) +-- - PREV/NEXT semantics (rpr.sql) +-- ============================================================ + +-- ============================================================ +-- Basic NFA Flow +-- ============================================================ + +-- Simple sequential pattern +WITH test_sequential AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['_']) -- No match + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_sequential +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- Quantified pattern (A+ B+ C+) +WITH test_quantified AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['B']), + (6, ARRAY['C']), + (7, ARRAY['C']), + (8, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_quantified +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B+ C+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + +-- Optional pattern (A B? C) +WITH test_optional AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['C']), -- B skipped + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['C']), -- B matched + (6, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_optional +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B? C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + +-- Alternation pattern (A (B|C) D) +WITH test_alternation AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), -- First branch + (3, ARRAY['D']), + (4, ARRAY['A']), + (5, ARRAY['C']), -- Second branch + (6, ARRAY['D']), + (7, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alternation +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A (B | C) D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- ============================================================ +-- Absorption Optimization +-- ============================================================ + +-- Absorbable pattern (A+) +WITH test_absorbable AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_absorbable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + +-- Mixed absorbable/non-absorbable ((A+) | B) +WITH test_mixed_absorption AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_mixed_absorption +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A+) | B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- State coverage (same elemIdx, different count) +WITH test_state_coverage AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_state_coverage +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{2,} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- ============================================================ +-- Context Lifecycle +-- ============================================================ + +-- Multiple overlapping contexts (SKIP TO NEXT ROW) +WITH test_overlapping_contexts AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_overlapping_contexts +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Failed context cleanup (early failure) +WITH test_context_cleanup AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), -- Pruned at first row + (2, ARRAY['A']), + (3, ARRAY['_']), -- Mismatched after row 2 + (4, ARRAY['A']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_context_cleanup +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Partition end (incomplete contexts) +WITH test_partition_end AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']) + -- Pattern requires B, but partition ends + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_partition_end +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Completed context encountered during processing +-- Pattern (A | B C D): Ctx1 takes long B->C->D path, while Ctx2 takes +-- short A path and completes first. Next row sees Ctx2 +-- with states=NULL and skips it. +WITH test_completed_ctx AS ( + SELECT * FROM (VALUES + (1, ARRAY['B', '_']), + (2, ARRAY['C', 'A']), + (3, ARRAY['D', '_']), + (4, ARRAY['_', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_completed_ctx +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A | B C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- ============================================================ +-- Advance Phase (Epsilon Transitions) +-- ============================================================ + +-- Nested groups ((A B)+) +WITH test_nested_groups AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_groups +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A B)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Multiple alternation branches (A (B|C|D) E) +WITH test_multi_alt AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['E']), + (4, ARRAY['A']), + (5, ARRAY['C']), + (6, ARRAY['E']), + (7, ARRAY['A']), + (8, ARRAY['D']), + (9, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_multi_alt +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A (B | C | D) E) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + +-- Optional VAR at start (A? B C) +WITH test_optional_var AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), -- A skipped + (2, ARRAY['C']), + (3, ARRAY['A']), -- A matched + (4, ARRAY['B']), + (5, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_optional_var +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A? B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + +-- Nested alternation ((A|B) (C|D)) +WITH test_nested_alt AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['C']), -- A C + (3, ARRAY['A']), + (4, ARRAY['D']), -- A D + (5, ARRAY['B']), + (6, ARRAY['C']), -- B C + (7, ARRAY['B']), + (8, ARRAY['D']) -- B D + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_alt +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A | B) (C | D)) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- ============================================================ +-- Match Phase +-- ============================================================ + +-- Simple VAR with END next (A B C all min=max=1) +WITH test_simple_var AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_simple_var +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + +-- VAR max exceeded (A{2,3}) +WITH test_max_exceeded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), -- Max = 3 + (4, ARRAY['A']), -- Exceeds max, state removed + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_max_exceeded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{2,3} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Non-matching VAR (DEFINE false) +WITH test_non_matching AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['_']), -- B not matched (DEFINE false) + (3, ARRAY['A']), + (4, ARRAY['B']), -- B matched + (5, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_non_matching +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + +-- ============================================================ +-- Frame Boundary Handling +-- ============================================================ + +-- Limited frame (ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING) +WITH test_limited_frame AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), -- Within 3 FOLLOWING + (5, ARRAY['B']), -- Beyond 3 FOLLOWING from row 1 + (6, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_limited_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Unbounded frame (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) +WITH test_unbounded_frame AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']), + (6, ARRAY['B']) -- Far from start, but unbounded + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_unbounded_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Match exceeds frame boundary +WITH test_frame_exceeded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']) + -- Frame ends at row 3 (2 FOLLOWING), B never appears + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_frame_exceeded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Frame boundary forced mismatch +-- Limited frame with enough rows so that a context's frame boundary +-- is exceeded while still processing. +WITH test_frame_boundary AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']), + (6, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_frame_boundary +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- ============================================================ +-- State Management +-- ============================================================ + +-- Duplicate state creation +WITH test_duplicate_states AS ( + SELECT * FROM (VALUES + (1, ARRAY['A', 'B']), -- Both A and B match (creates duplicate states via different paths) + (2, ARRAY['C', '_']), + (3, ARRAY['D', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_duplicate_states +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A | B) C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- Large pattern (stress free list) +WITH test_large_pattern AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']), + (6, ARRAY['F']), + (7, ARRAY['G']), + (8, ARRAY['H']), + (9, ARRAY['I']), + (10, ARRAY['J']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_large_pattern +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D E F G H I J) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags), + G AS 'G' = ANY(flags), + H AS 'H' = ANY(flags), + I AS 'I' = ANY(flags), + J AS 'J' = ANY(flags) +); + +-- Reduced frame map reallocation (> 1024 rows) +WITH test_map_realloc AS ( + SELECT id, CASE WHEN id % 2 = 1 THEN ARRAY['A'] ELSE ARRAY['B'] END AS flags + FROM generate_series(1, 1100) AS id +) +SELECT count(*), min(match_start), max(match_end) +FROM ( + SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end + FROM test_map_realloc + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) + ) +) sub; + +-- ============================================================ +-- Statistics and Diagnostics +-- ============================================================ + +-- Matched contexts +WITH test_matched AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_matched +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Pruned contexts (failed at first row) +WITH test_pruned AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), -- Pruned + (2, ARRAY['_']), -- Pruned + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_pruned +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Mismatched contexts (failed after multiple rows) +WITH test_mismatched AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['_']), -- Mismatched after 2 rows + (4, ARRAY['A']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_mismatched +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Absorbed contexts +WITH test_absorbed AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_absorbed +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + +-- Skipped contexts (SKIP TO NEXT ROW) +WITH test_skipped AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']) -- Completes match starting at row 1 + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skipped +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- ============================================================ +-- Quantifier Runtime Behavior +-- ============================================================ + +-- Large count handling (A{100}) +WITH test_large_count AS ( + SELECT i AS id, ARRAY['A'] AS flags + FROM generate_series(1, 105) i +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_large_count +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{100}) + DEFINE + A AS 'A' = ANY(flags) +); + +-- Unlimited quantifier (A{10,}) +WITH test_unlimited AS ( + SELECT i AS id, ARRAY['A'] AS flags + FROM generate_series(1, 15) i + UNION ALL + SELECT 16, ARRAY['B'] +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_unlimited +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{10,} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Min boundary (A{3,5}) +WITH test_min_boundary AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), -- Min=3 reached, exit path available + (4, ARRAY['B']), -- Match ends at min + (5, ARRAY['A']), + (6, ARRAY['A']), + (7, ARRAY['A']), + (8, ARRAY['A']), + (9, ARRAY['A']), -- Count=5, max reached + (10, ARRAY['B']) -- Match ends at max + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_min_boundary +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{3,5} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Max boundary exceeded (A{3,5}) +WITH test_max_boundary AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']), + (6, ARRAY['A']), -- Count=6 > max=5, row 1 context removed + (7, ARRAY['B']) -- Row 1 context: no match (exceeded max) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_max_boundary +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A{3,5} B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- ============================================================ +-- Pathological Pattern Runtime Protection +-- ============================================================ + +-- Complex nested nullable ((A* B*)*) - Runtime protection +WITH test_complex_nested AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_complex_nested +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A* B*)*) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Nested nullable with quantifier ((A{0,3})*) +WITH test_nested_quantifier AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_quantifier +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A{0,3})*) + DEFINE + A AS 'A' = ANY(flags) +); + +-- ============================================================ +-- Alternation Runtime Behavior +-- ============================================================ + +-- Multi-branch alternation (A (B|C|D|E) F) +WITH test_multi_branch AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['F']), + (4, ARRAY['A']), + (5, ARRAY['C']), + (6, ARRAY['F']), + (7, ARRAY['A']), + (8, ARRAY['D']), + (9, ARRAY['F']), + (10, ARRAY['A']), + (11, ARRAY['E']), + (12, ARRAY['F']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_multi_branch +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A (B | C | D | E) F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); + +-- Alternation with quantifiers (A+ | B+ | C+) +WITH test_alt_quantifiers AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['B']), + (6, ARRAY['C']), + (7, ARRAY['C']), + (8, ARRAY['C']), + (9, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_quantifiers +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ | B+ | C+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + +-- altPriority replacement (A B C | D) +-- D branch (higher altPriority) matches first at row 1, +-- then A B C branch (lower altPriority) replaces it at row 3. +WITH test_alt_replace AS ( + SELECT * FROM (VALUES + (1, ARRAY['A', 'D']), + (2, ARRAY['B', '_']), + (3, ARRAY['C', '_']), + (4, ARRAY['_', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_replace +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C | D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- ============================================================ +-- Deep Nested Groups +-- ============================================================ + +-- Three-level nesting ((((A B)+)+)+) +WITH test_deep_nesting AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_deep_nesting +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((((A B)+)+)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Multiple groups in nesting (((A B) (C D))+) +WITH test_nested_sequential AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['C']), + (8, ARRAY['D']), + (9, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_sequential +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A B) (C D))+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- Nested END→END max reached +-- Inner group (A B){2} reaches max=2 → exits to outer END +WITH test_end_nested_max AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['A']), + (8, ARRAY['B']), + (9, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_end_nested_max +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A B){2})+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Nested END→END between min/max +-- Inner group (A B){1,3} exits between min/max → outer END count++ +WITH test_end_nested_mid AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['A']), + (8, ARRAY['B']), + (9, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_end_nested_mid +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A B){1,3})+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- ============================================================ +-- SKIP Options (Runtime) +-- ============================================================ + +-- SKIP PAST LAST ROW (non-overlapping matches) +WITH test_skip_past AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skip_past +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + +-- SKIP TO NEXT ROW (overlapping matches) +WITH test_skip_next AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skip_next +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + +-- SKIP difference verification +WITH test_skip_diff AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT 'SKIP PAST' AS mode, id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skip_diff +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +) +UNION ALL +SELECT 'SKIP NEXT' AS mode, id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_skip_diff +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +) +ORDER BY mode, id; + +-- ============================================================ +-- INITIAL Mode (Runtime) +-- ============================================================ + +-- INITIAL mode (not yet supported - produces syntax error) +WITH test_initial_mode AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), -- Unmatched + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['_']), -- Unmatched + (5, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_initial_mode +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + +-- Default mode (include all rows) +WITH test_default_mode AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), -- Unmatched, but included + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['_']), -- Unmatched, but included + (5, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_default_mode +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + +-- Mode difference verification (INITIAL not yet supported - produces syntax error) +WITH test_mode_diff AS ( + SELECT * FROM (VALUES + (1, ARRAY['_']), + (2, ARRAY['A']), + (3, ARRAY['_']) + ) AS t(id, flags) +) +SELECT 'INITIAL' AS mode, COUNT(*) AS row_count +FROM ( + SELECT id FROM test_mode_diff + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE A AS 'A' = ANY(flags) + ) +) sub +UNION ALL +SELECT 'DEFAULT' AS mode, COUNT(*) AS row_count +FROM ( + SELECT id FROM test_mode_diff + WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE A AS 'A' = ANY(flags) + ) +) sub +ORDER BY mode; + +-- ============================================================ +-- Frame Boundary Variations +-- ============================================================ + +-- Very limited frame (1 FOLLOWING) +WITH test_one_following AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), -- Within 1 FOLLOWING + (3, ARRAY['A']), -- Beyond 1 FOLLOWING from row 1 + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_one_following +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Medium frame (10 FOLLOWING) +WITH test_ten_following AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']), + (6, ARRAY['A']), + (7, ARRAY['A']), + (8, ARRAY['A']), + (9, ARRAY['A']), + (10, ARRAY['A']), + (11, ARRAY['B']), -- Within 10 FOLLOWING from row 1 + (12, ARRAY['A']), + (13, ARRAY['B']) -- Beyond 10 FOLLOWING from row 1 + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_ten_following +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 10 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Exact boundary match +WITH test_exact_boundary AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['B']) -- Exactly at 4 FOLLOWING (frame end) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_exact_boundary +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- ============================================================ +-- Special Partition Cases +-- ============================================================ + +-- Empty partition (0 rows) +WITH test_empty_partition AS ( + SELECT * FROM (VALUES + (1, 1, ARRAY['A']), + (2, 2, ARRAY['_']) -- Different partition + ) AS t(id, part, flags) +) +SELECT id, part, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_empty_partition +WHERE part = 99 -- No rows match +WINDOW w AS ( + PARTITION BY part + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE + A AS 'A' = ANY(flags) +); + +-- Single row partition +WITH test_single_row AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_single_row +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A) + DEFINE + A AS 'A' = ANY(flags) +); + +-- All rows fail matching (all DEFINE false) +WITH test_all_fail AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_all_fail +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE + A AS false -- All rows fail +); + +-- Partition end with absorbable pattern +-- SKIP PAST LAST ROW + unbounded frame + all rows match A +-- Triggers absorb in !rowExists path at partition boundary. +WITH test_absorb_partition_end AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['A']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_absorb_partition_end +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+) + DEFINE + A AS 'A' = ANY(flags) +); + +-- ============================================================ +-- DEFINE Special Cases +-- ============================================================ + +-- Undefined variable in DEFINE +WITH test_undefined_var AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['X']), -- B not defined, defaults to TRUE + (3, ARRAY['C']), + (4, ARRAY['A']), + (5, ARRAY['_']), -- B defaults to TRUE, but no flags + (6, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_undefined_var +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C) + DEFINE + A AS 'A' = ANY(flags), + -- B is undefined, defaults to TRUE + C AS 'C' = ANY(flags) +); + +-- ============================================================ +-- Absorption Dynamic Flags +-- ============================================================ + +-- Partial absorbable pattern ((A+) B) +WITH test_partial_absorbable AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_partial_absorbable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A+) B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Dynamic flag update ((A+) | B) +WITH test_dynamic_flags AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_dynamic_flags +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A+) | B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Non-absorbable context during absorption +-- Pattern (A B)+ C: A,B in absorbable group, C is not. +-- When END exits to C via nfa_state_create, isAbsorbable becomes false. +WITH test_non_absorbable AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['C']), + (6, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_non_absorbable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + +-- Absorption flags early return (!hasAbsorbableState) +-- Pattern (A B)+ C D with SKIP PAST LAST ROW +-- After reaching C (non-absorbable), hasAbsorbableState becomes false. +-- On next row (D), the early return fires. +WITH test_absorption_early_return AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['C']), + (6, ARRAY['D']), + (7, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_absorption_early_return +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+ C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- Coverage failure: older can't cover newer's states +-- Pattern A+ | B+ with SKIP PAST LAST ROW. +-- Row 1: only A → Ctx1 takes A branch only (B fails). +-- Row 2: A and B → Ctx2 takes both branches. +-- Absorption: Ctx1 has A but no B → can't cover Ctx2's B state → fails. +WITH test_coverage_fail AS ( + SELECT * FROM (VALUES + (1, ARRAY['A', '_']), + (2, ARRAY['A', 'B']), + (3, ARRAY['A', '_']), + (4, ARRAY['A', '_']), + (5, ARRAY['_', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_coverage_fail +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ | B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Absorb skips completed context (older->states==NULL) +-- Pattern A+ | B+ with SKIP PAST LAST ROW. +-- Row 1: A only → Ctx1 takes A branch. Row 2: B only → Ctx1 A fails (completed). +-- Ctx2 takes B branch. Absorption: Ctx1 states==NULL → skip. +WITH test_older_completed AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_older_completed +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ | B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Absorb skips non-absorbable context (!hasAbsorbableState) +-- Pattern A+ | B C with SKIP PAST LAST ROW (only A+ branch absorbable). +-- Row 1: B only → Ctx1 takes B branch (non-absorbable), advances to C. +-- Row 2: C,A → Ctx1 C matches (hasAbsorbableState=false). Ctx2 takes A (absorbable). +-- Absorption: Ctx1 !hasAbsorbableState → skip. +WITH test_older_non_absorbable AS ( + SELECT * FROM (VALUES + (1, ARRAY['B', '_']), + (2, ARRAY['C', 'A']), + (3, ARRAY['_', 'A']), + (4, ARRAY['_', '_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_older_non_absorbable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ | B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + +-- ============================================================ +-- FIXME Issues - Known Limitations +-- ============================================================ + +-- FIXME 1 - altPriority lexical order +WITH test_alt_priority_repeated AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), -- Both A and B match + (2, ARRAY['A','B']), + (3, ARRAY['A','B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_priority_repeated +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A | B)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- FIXME 1 - Nested ALT lexical order +WITH test_alt_priority_nested AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), + (2, ARRAY['C','D']), + (3, ARRAY['A','B']), + (4, ARRAY['C','D']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_priority_nested +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (((A | B) (C | D))+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + +-- FIXME 2 - Cycle prevention at count > 0 +WITH test_cycle_nonzero AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']) -- Inner A* matches 0, cycles at count=3 + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_cycle_nonzero +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A*)*) + DEFINE + A AS 'A' = ANY(flags) +); + +-- FIXME 2 - Cycle with mixed nullables +WITH test_cycle_mixed AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_cycle_mixed +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A* B*)*) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); -- 2.50.1 (Apple Git-155)