From 6032b704c3e2930c5f55fe03a881a1bb59bc7e14 Mon Sep 17 00:00:00 2001 From: Henson Choi Date: Fri, 20 Feb 2026 13:18:19 +0900 Subject: [PATCH 04/10] Implement reluctant quantifiers for row pattern recognition --- src/backend/executor/nodeWindowAgg.c | 170 ++++++-- src/backend/optimizer/plan/rpr.c | 33 +- src/backend/parser/gram.y | 6 - src/test/regress/expected/rpr.out | 51 +-- src/test/regress/expected/rpr_base.out | 278 ++++++++++--- src/test/regress/expected/rpr_nfa.out | 516 +++++++++++++++++++++++++ src/test/regress/sql/rpr.sql | 25 +- src/test/regress/sql/rpr_base.sql | 87 ++++- src/test/regress/sql/rpr_nfa.sql | 398 +++++++++++++++++++ 9 files changed, 1428 insertions(+), 136 deletions(-) diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index 96b2cd4b9c4..371439aad91 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -6149,16 +6149,40 @@ nfa_advance_begin(WindowAggState *winstate, RPRNFAContext *ctx, state->counts, state->isAbsorbable); } - /* Enter group: route to first child (lexically first) */ - state->elemIdx = elem->next; - nfa_route_to_elem(winstate, ctx, state, - &elements[state->elemIdx], currentPos, initialAdvance); - - /* Now route skip path (lexically second) */ - if (skipState != NULL) + if (skipState != NULL && RPRElemIsReluctant(elem)) { + RPRNFAState *savedMatch = ctx->matchedState; + + /* Reluctant: skip first (prefer fewer iterations), enter second */ nfa_route_to_elem(winstate, ctx, skipState, &elements[elem->jump], currentPos, initialAdvance); + + /* + * If skip path reached FIN, shortest match is found. Skip group entry + * to prevent longer matches. + */ + if (ctx->matchedState != savedMatch) + { + nfa_state_free(winstate, state); + return; + } + + state->elemIdx = elem->next; + nfa_route_to_elem(winstate, ctx, state, + &elements[state->elemIdx], currentPos, initialAdvance); + } + else + { + /* Greedy: enter first, skip second */ + state->elemIdx = elem->next; + nfa_route_to_elem(winstate, ctx, state, + &elements[state->elemIdx], currentPos, initialAdvance); + + if (skipState != NULL) + { + nfa_route_to_elem(winstate, ctx, skipState, + &elements[elem->jump], currentPos, initialAdvance); + } } } @@ -6225,7 +6249,8 @@ nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, { /* * Between min and max (with at least one iteration) - can exit or - * loop + * loop. Greedy: loop first (prefer more iterations). Reluctant: exit + * first (prefer fewer iterations). */ RPRNFAState *exitState; RPRPatternElement *jumpElem; @@ -6244,16 +6269,43 @@ nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, if (RPRElemIsEnd(nextElem) && exitState->counts[nextElem->depth] < RPR_COUNT_MAX) exitState->counts[nextElem->depth]++; - /* Route loop state first (earlier in pattern = lexical order) */ + /* Prepare loop state */ for (int d = depth + 1; d < pattern->maxDepth; d++) state->counts[d] = 0; state->elemIdx = elem->jump; jumpElem = &elements[state->elemIdx]; - nfa_route_to_elem(winstate, ctx, state, jumpElem, currentPos, initialAdvance); + if (RPRElemIsReluctant(elem)) + { + RPRNFAState *savedMatch = ctx->matchedState; + + /* Exit first (preferred for reluctant) */ + nfa_route_to_elem(winstate, ctx, exitState, nextElem, + currentPos, initialAdvance); - /* Then route exit state */ - nfa_route_to_elem(winstate, ctx, exitState, nextElem, currentPos, initialAdvance); + /* + * If exit path reached FIN, shortest match is found. Skip loop to + * prevent longer matches from replacing it. + */ + if (ctx->matchedState != savedMatch) + { + nfa_state_free(winstate, state); + return; + } + + /* Loop second */ + nfa_route_to_elem(winstate, ctx, state, jumpElem, + currentPos, initialAdvance); + } + else + { + /* Loop first (preferred for greedy) */ + nfa_route_to_elem(winstate, ctx, state, jumpElem, + currentPos, initialAdvance); + /* Exit second */ + nfa_route_to_elem(winstate, ctx, exitState, nextElem, + currentPos, initialAdvance); + } } } @@ -6280,34 +6332,86 @@ nfa_advance_var(WindowAggState *winstate, RPRNFAContext *ctx, if (canLoop && canExit) { - /* Both: clone for loop, modify original for exit */ - RPRNFAState *loopState; + /* + * Both loop and exit possible. Greedy: loop first (prefer longer + * match). Reluctant: exit first (prefer shorter match). + */ + RPRNFAState *cloneState; RPRPatternElement *nextElem; - - loopState = nfa_state_create(winstate, state->elemIdx, - state->counts, state->isAbsorbable); - nfa_add_state_unique(winstate, ctx, loopState); - - /* Exit: advance to next element */ - state->counts[depth] = 0; - state->elemIdx = elem->next; - nextElem = &elements[state->elemIdx]; + bool reluctant = RPRElemIsReluctant(elem); /* - * When a quantified VAR (e.g., A+) exits directly to an outer END, - * increment the END's iteration count. Simple VARs (min=max=1) - * handle this via inline advance in nfa_match, but quantified VARs - * bypass that path. Only increment when the VAR actually consumed - * rows (count > 0) to preserve cycle prevention for zero-progress - * loops. + * Clone state for the second-priority path. For greedy, clone is the + * loop state; for reluctant, clone is the exit state. */ - if (RPRElemIsEnd(nextElem) && count > 0) + if (reluctant) { - if (state->counts[nextElem->depth] < RPR_COUNT_MAX) - state->counts[nextElem->depth]++; + RPRNFAState *savedMatch = ctx->matchedState; + + /* Clone for exit, original stays for loop */ + cloneState = nfa_state_create(winstate, elem->next, + state->counts, state->isAbsorbable); + cloneState->counts[depth] = 0; + nextElem = &elements[cloneState->elemIdx]; + + /* + * When exiting directly to an outer END, increment the END's + * iteration count. + */ + if (RPRElemIsEnd(nextElem) && count > 0) + { + if (cloneState->counts[nextElem->depth] < RPR_COUNT_MAX) + cloneState->counts[nextElem->depth]++; + } + + /* Exit first (preferred for reluctant) */ + nfa_route_to_elem(winstate, ctx, cloneState, nextElem, + currentPos, initialAdvance); + + /* + * If exit path reached FIN, the shortest match is found. Skip + * loop state to prevent longer matches from replacing it. + */ + if (ctx->matchedState != savedMatch) + { + nfa_state_free(winstate, state); + return; + } + + /* Loop second */ + nfa_add_state_unique(winstate, ctx, state); } + else + { + /* Clone for loop, original used for exit */ + cloneState = nfa_state_create(winstate, state->elemIdx, + state->counts, state->isAbsorbable); - nfa_route_to_elem(winstate, ctx, state, nextElem, currentPos, initialAdvance); + /* Loop first (preferred for greedy) */ + nfa_add_state_unique(winstate, ctx, cloneState); + + /* Exit second */ + state->counts[depth] = 0; + state->elemIdx = elem->next; + nextElem = &elements[state->elemIdx]; + + /* + * When exiting directly to an outer END, increment the END's + * iteration count. Simple VARs (min=max=1) handle this via + * inline advance in nfa_match, but quantified VARs bypass that + * path. Only increment when the VAR actually consumed rows + * (count > 0) to preserve cycle prevention for zero-progress + * loops. + */ + if (RPRElemIsEnd(nextElem) && count > 0) + { + if (state->counts[nextElem->depth] < RPR_COUNT_MAX) + state->counts[nextElem->depth]++; + } + + nfa_route_to_elem(winstate, ctx, state, nextElem, + currentPos, initialAdvance); + } } else if (canLoop) { diff --git a/src/backend/optimizer/plan/rpr.c b/src/backend/optimizer/plan/rpr.c index 112ed034fe2..987f595d434 100644 --- a/src/backend/optimizer/plan/rpr.c +++ b/src/backend/optimizer/plan/rpr.c @@ -835,21 +835,44 @@ tryMultiplyQuantifiers(RPRPatternNode *pattern) * Examples: * (A){1,1} -> A * (A B){1,1} -> SEQ(A, B) (unwraps the inner SEQ) + * (A)? -> A? (propagate quantifier to single VAR child) + * (A)+? -> A+? (propagate quantifier including reluctant) * - * If GROUP has min=1, max=1, and is not reluctant, return the child directly. - * Otherwise returns the pattern unchanged. + * If GROUP has min=1, max=1, return the child directly (reluctant on + * {1,1} is meaningless). If GROUP has a single VAR child with default + * quantifier {1,1}, propagate the GROUP's quantifier to the child and + * unwrap. Otherwise returns the pattern unchanged. * * Note: Parser always creates GROUP with exactly one child via list_make1(). */ static RPRPatternNode * tryUnwrapGroup(RPRPatternNode *pattern) { - if (pattern->min != 1 || pattern->max != 1 || pattern->reluctant >= 0) - return pattern; + RPRPatternNode *child; /* Parser always creates GROUP with single child */ Assert(list_length(pattern->children) == 1); - return (RPRPatternNode *) linitial(pattern->children); + + child = (RPRPatternNode *) linitial(pattern->children); + + /* GROUP{1,1}: unwrap directly (reluctant on {1,1} is meaningless) */ + if (pattern->min == 1 && pattern->max == 1) + return child; + + /* + * Single VAR child with default {1,1}: propagate GROUP's quantifier to + * the child and unwrap. E.g., (A)?? -> A??, (A)+? -> A+? + */ + if (child->nodeType == RPR_PATTERN_VAR && + child->min == 1 && child->max == 1 && child->reluctant < 0) + { + child->min = pattern->min; + child->max = pattern->max; + child->reluctant = pattern->reluctant; + return child; + } + + return pattern; } /* diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index e6ae23f368b..5a3d141e992 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -20376,12 +20376,6 @@ makeRPRQuantifier(int min, int max, ParseLoc reluctant, int location, { RPRPatternNode *n = makeNode(RPRPatternNode); - if (reluctant >= 0) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("reluctant quantifiers are not yet supported"), - parser_errposition(reluctant))); - n->min = min; n->max = max; n->reluctant = reluctant; diff --git a/src/test/regress/expected/rpr.out b/src/test/regress/expected/rpr.out index c921badb006..8af44392700 100644 --- a/src/test/regress/expected/rpr.out +++ b/src/test/regress/expected/rpr.out @@ -2814,6 +2814,7 @@ ERROR: unsupported quantifier "~" LINE 9: PATTERN (START UP~ DOWN+) ^ HINT: Valid quantifiers are: *, +, ?, *?, +?, ??, {n}, {n,}, {,m}, {n,m} and their reluctant versions. +-- PREV(1) missing column reference (error) SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w FROM stock WINDOW w AS ( @@ -2828,9 +2829,7 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER UP AS price > PREV(1), DOWN AS price < PREV(1) ); -ERROR: reluctant quantifiers are not yet supported -LINE 9: PATTERN (START UP+? DOWN+) - ^ +ERROR: row pattern navigation operation's argument must include at least one column reference -- Maximum pattern variables is 251 (RPR_VARID_MAX) -- Error: 252 variables exceeds limit of 251 DO $$ @@ -3836,15 +3835,15 @@ WINDOW w AS ( -- Expected: 1-4 (A C B C) -- ============================================ --- RELUCTANT quantifiers (not yet supported) +-- RELUCTANT quantifiers -- ============================================ --- Test: A+? B (reluctant) - parser rejects with ERROR +-- Test: A+? B (reluctant) - B available at row 3, A exits early WITH jacob_reluctant AS ( SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['A']), - (3, ARRAY['A']), - (4, ARRAY['B']) + (1, ARRAY['A','_']), + (2, ARRAY['A','_']), + (3, ARRAY['A','B']), + (4, ARRAY['B','_']) ) AS t(id, flags) ) SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end @@ -3858,17 +3857,21 @@ WINDOW w AS ( A AS 'A' = ANY(flags), B AS 'B' = ANY(flags) ); -ERROR: reluctant quantifiers are not yet supported -LINE 15: PATTERN (A+? B) - ^ --- Expected: ERROR (reluctant quantifiers not yet supported) --- Test: A{1,3}? B (reluctant bounded) - parser rejects with ERROR + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,_} | 1 | 3 + 2 | {A,_} | | + 3 | {A,B} | | + 4 | {B,_} | | +(4 rows) + +-- Test: A{1,3}? B (reluctant bounded) - same data, bounded quantifier WITH jacob_reluctant_bounded AS ( SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['A']), - (3, ARRAY['A']), - (4, ARRAY['B']) + (1, ARRAY['A','_']), + (2, ARRAY['A','_']), + (3, ARRAY['A','B']), + (4, ARRAY['B','_']) ) AS t(id, flags) ) SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end @@ -3882,10 +3885,14 @@ WINDOW w AS ( A AS 'A' = ANY(flags), B AS 'B' = ANY(flags) ); -ERROR: reluctant quantifiers are not yet supported -LINE 15: PATTERN (A{1,3}? B) - ^ --- Expected: ERROR (reluctant quantifiers not yet supported) + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,_} | 1 | 3 + 2 | {A,_} | | + 3 | {A,B} | | + 4 | {B,_} | | +(4 rows) + -- ============================================ -- Nested quantifiers (pathological patterns) -- ============================================ diff --git a/src/test/regress/expected/rpr_base.out b/src/test/regress/expected/rpr_base.out index d317ec70c8d..43eea32130f 100644 --- a/src/test/regress/expected/rpr_base.out +++ b/src/test/regress/expected/rpr_base.out @@ -1076,7 +1076,7 @@ ORDER BY id; (10 rows) DROP TABLE rpr_quant; --- Reluctant quantifiers (not yet supported) +-- Reluctant quantifiers CREATE TABLE rpr_reluctant (id INT, val INT); INSERT INTO rpr_reluctant VALUES (1, 10), (2, 20), (3, 30); -- *? (zero or more, reluctant) @@ -1088,10 +1088,14 @@ WINDOW w AS ( PATTERN (A*?) DEFINE A AS val > 0 ); -ERROR: reluctant quantifiers are not yet supported -LINE 6: PATTERN (A*?) - ^ --- Expected: ERROR: reluctant quantifiers are not yet supported + count +------- + 1 + 1 + 1 +(3 rows) + +-- Reluctant quantifier: prefer shortest match -- +? (one or more, reluctant) SELECT COUNT(*) OVER w FROM rpr_reluctant @@ -1101,10 +1105,14 @@ WINDOW w AS ( PATTERN (A+?) DEFINE A AS val > 0 ); -ERROR: reluctant quantifiers are not yet supported -LINE 6: PATTERN (A+?) - ^ --- Expected: ERROR: reluctant quantifiers are not yet supported + count +------- + 1 + 1 + 1 +(3 rows) + +-- Reluctant quantifier: prefer shortest match -- ?? (zero or one, reluctant) SELECT COUNT(*) OVER w FROM rpr_reluctant @@ -1114,10 +1122,14 @@ WINDOW w AS ( PATTERN (A??) DEFINE A AS val > 0 ); -ERROR: reluctant quantifiers are not yet supported -LINE 6: PATTERN (A??) - ^ --- Expected: ERROR: reluctant quantifiers are not yet supported + count +------- + 1 + 1 + 1 +(3 rows) + +-- Reluctant quantifier: prefer shortest match -- {n,}? (n or more, reluctant) SELECT COUNT(*) OVER w FROM rpr_reluctant @@ -1127,10 +1139,14 @@ WINDOW w AS ( PATTERN (A{2,}?) DEFINE A AS val > 0 ); -ERROR: reluctant quantifiers are not yet supported -LINE 6: PATTERN (A{2,}?) - ^ --- Expected: ERROR: reluctant quantifiers are not yet supported + count +------- + 2 + 0 + 0 +(3 rows) + +-- Reluctant quantifier: prefer shortest match -- {n,m}? (n to m, reluctant) SELECT COUNT(*) OVER w FROM rpr_reluctant @@ -1140,10 +1156,14 @@ WINDOW w AS ( PATTERN (A{1,3}?) DEFINE A AS val > 0 ); -ERROR: reluctant quantifiers are not yet supported -LINE 6: PATTERN (A{1,3}?) - ^ --- Expected: ERROR: reluctant quantifiers are not yet supported + count +------- + 1 + 1 + 1 +(3 rows) + +-- Reluctant quantifier: prefer shortest match -- {n}? (exactly n, reluctant) SELECT COUNT(*) OVER w FROM rpr_reluctant @@ -1153,10 +1173,14 @@ WINDOW w AS ( PATTERN (A{2}?) DEFINE A AS val > 0 ); -ERROR: reluctant quantifiers are not yet supported -LINE 6: PATTERN (A{2}?) - ^ --- Expected: ERROR: reluctant quantifiers are not yet supported + count +------- + 2 + 0 + 0 +(3 rows) + +-- Reluctant quantifier: prefer shortest match -- {,m}? (up to m, reluctant) - COMPLETELY UNTESTED RULE! SELECT COUNT(*) OVER w FROM rpr_reluctant @@ -1166,10 +1190,14 @@ WINDOW w AS ( PATTERN (A{,3}?) DEFINE A AS val > 0 ); -ERROR: reluctant quantifiers are not yet supported -LINE 6: PATTERN (A{,3}?) - ^ --- Expected: ERROR: reluctant quantifiers are not yet supported + count +------- + 1 + 1 + 1 +(3 rows) + +-- Reluctant quantifier: prefer shortest match -- Invalid reluctant patterns (wrong token after quantifier) -- {2}+ (should be {2}? not {2}+) SELECT COUNT(*) OVER w @@ -1352,10 +1380,14 @@ WINDOW w AS ( PATTERN (A* ?) DEFINE A AS val > 0 ); -ERROR: reluctant quantifiers are not yet supported -LINE 6: PATTERN (A* ?) - ^ --- Expected: ERROR: reluctant quantifiers are not yet supported + count +------- + 1 + 1 + 1 +(3 rows) + +-- Reluctant quantifier: prefer shortest match -- + ? (token separated) SELECT COUNT(*) OVER w FROM rpr_reluctant @@ -1365,10 +1397,14 @@ WINDOW w AS ( PATTERN (A+ ?) DEFINE A AS val > 0 ); -ERROR: reluctant quantifiers are not yet supported -LINE 6: PATTERN (A+ ?) - ^ --- Expected: ERROR: reluctant quantifiers are not yet supported + count +------- + 1 + 1 + 1 +(3 rows) + +-- Reluctant quantifier: prefer shortest match -- {2,} ? (token separated) SELECT COUNT(*) OVER w FROM rpr_reluctant @@ -1378,10 +1414,14 @@ WINDOW w AS ( PATTERN (A{2,} ?) DEFINE A AS val > 0 ); -ERROR: reluctant quantifiers are not yet supported -LINE 6: PATTERN (A{2,} ?) - ^ --- Expected: ERROR: reluctant quantifiers are not yet supported + count +------- + 2 + 0 + 0 +(3 rows) + +-- Reluctant quantifier: prefer shortest match -- Invalid token combinations -- * + (invalid combination) SELECT COUNT(*) OVER w @@ -1418,10 +1458,14 @@ WINDOW w AS ( PATTERN (A? ?) DEFINE A AS val > 0 ); -ERROR: reluctant quantifiers are not yet supported -LINE 6: PATTERN (A? ?) - ^ --- Expected: ERROR: reluctant quantifiers are not yet supported + count +------- + 1 + 1 + 1 +(3 rows) + +-- Reluctant quantifier: prefer shortest match DROP TABLE rpr_reluctant; -- Quantifier boundary conditions CREATE TABLE rpr_bounds (id INT); @@ -3391,6 +3435,150 @@ WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING -> Seq Scan on rpr_plan (6 rows) +-- Reluctant optimization bypass: VAR merge +-- A+? A stays as a+? a (greedy A+ A merges to a{2,}) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+? A) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+? a + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Reluctant optimization bypass: GROUP merge +-- (A B)+? (A B) stays separate (greedy merges to (a b){2,}) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B)+? (A B)) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b)+? a b + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Reluctant optimization bypass: quantifier multiply (outer reluctant) +-- (A{2}){3}? stays as (a{2}){3}? (greedy merges to a{6}) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2}){3}?) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{2}){3}? + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Reluctant optimization bypass: quantifier multiply (inner reluctant) +-- (A{2}?){3} stays as (a{2}?){3} (greedy merges to a{6}) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2}?){3}) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a{2}?){3} + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Reluctant optimization bypass: PREFIX merge +-- A B (A B)+? stays separate (greedy merges to (a b){2,}) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (A B)+?) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b (a b)+? + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Reluctant optimization bypass: SUFFIX merge +-- (A B)+? A B stays separate (greedy merges to (a b){2,}) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B)+? A B) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b)+? a b + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- GROUP unwrap with quantifier propagation: (A)?? B -> a?? b +-- Single VAR child {1,1} receives GROUP's quantifier and reluctant +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A)?? B) DEFINE A AS val <= 50, B AS val > 50); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a?? b + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Reluctant preserved through ALT flatten +-- (A | (B | C))+? flattens to (a | b | c)+? - inner ALT flattened, reluctant kept +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | (B | C))+?) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a | b | c)+? + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + +-- Reluctant optimization bypass: absorption flags +-- A+? with SKIP PAST LAST ROW - no absorption markers (greedy A+ gets a+") +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (A+?) DEFINE A AS val > 0); + QUERY PLAN +------------------------------------------------------------------------------- + WindowAgg + Window: w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+? + -> Sort + Sort Key: id + -> Seq Scan on rpr_plan +(6 rows) + -- ============================================================ -- Absorption Flag Display Tests -- ============================================================ diff --git a/src/test/regress/expected/rpr_nfa.out b/src/test/regress/expected/rpr_nfa.out index 7d38d62ac31..a2a257329b7 100644 --- a/src/test/regress/expected/rpr_nfa.out +++ b/src/test/regress/expected/rpr_nfa.out @@ -289,6 +289,39 @@ WINDOW w AS ( 5 | {_} | | (5 rows) +-- Reluctant pattern (A+?) - not absorbable +-- Compare with greedy A+ above: reluctant excluded from absorption. +-- Each context produces minimum match independently. +WITH test_reluctant_absorption AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_absorption +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+?) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 1 + 2 | {A} | 2 | 2 + 3 | {A} | 3 | 3 + 4 | {A} | 4 | 4 + 5 | {_} | | +(5 rows) + -- ============================================================ -- Context Lifecycle -- ============================================================ @@ -420,6 +453,38 @@ WINDOW w AS ( 4 | {_,_} | | (4 rows) +-- Reluctant context lifecycle (A+? B with SKIP TO NEXT ROW) +-- A+? exits early but if B not available, falls back to loop. +-- Contexts not absorbed (reluctant), so multiple survive. +WITH test_reluctant_context AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_context +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | 2 | 3 + 3 | {B} | | + 4 | {_} | | +(4 rows) + -- ============================================================ -- Advance Phase (Epsilon Transitions) -- ============================================================ @@ -575,6 +640,100 @@ WINDOW w AS ( 8 | {D} | | (8 rows) +-- Mixed greedy/reluctant sequence: A+? B+ (reluctant A, greedy B) +-- A exits as early as possible, B consumes the rest greedily +WITH test_mixed_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A','B']), + (4, ARRAY['B']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_mixed_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+? B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {A} | | + 3 | {A,B} | | + 4 | {B} | | + 5 | {B} | | +(5 rows) + +-- Optional reluctant group: (A B)?? C +-- nfa_advance_begin: reluctant prefers skip (0 iterations) over enter +WITH test_optional_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_optional_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)?? C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {B} | | + 3 | {C} | | +(3 rows) + +-- Greedy/reluctant sequence: A+ B+? (greedy A, reluctant B at end) +-- A consumes greedily, B+? exits to FIN after minimum match +WITH test_greedy_then_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A','B']), + (3, ARRAY['B']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_greedy_then_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+?) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A,B} | | + 3 | {B} | | + 4 | {B} | | +(4 rows) + -- ============================================================ -- Match Phase -- ============================================================ @@ -810,6 +969,37 @@ WINDOW w AS ( 6 | {B} | | (6 rows) +-- Reluctant with limited frame (A+? B with 2 FOLLOWING) +-- Reluctant exits early, B must be within frame boundary +WITH test_reluctant_frame AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {A} | 2 | 3 + 3 | {B} | | + 4 | {_} | | +(4 rows) + -- ============================================================ -- State Management -- ============================================================ @@ -843,6 +1033,35 @@ WINDOW w AS ( 3 | {D,_} | | (3 rows) +-- Reluctant duplicate state handling +-- (A+? | B+?) creates exit and loop states; exit paths may converge +WITH test_reluctant_dedup AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), + (2, ARRAY['A','B']), + (3, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_dedup +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A+? | B+?)) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 1 + 2 | {A,B} | 2 | 2 + 3 | {_} | | +(3 rows) + -- Large pattern (stress free list) WITH test_large_pattern AS ( SELECT * FROM (VALUES @@ -1014,6 +1233,39 @@ WINDOW w AS ( 5 | {B} | | (5 rows) +-- Reluctant not absorbed (A+? with SKIP TO NEXT ROW) +-- Compare with greedy A+ below: reluctant is not absorbable, +-- so all contexts survive independently. +WITH test_reluctant_stats AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_stats +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+?) + DEFINE + A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 1 + 2 | {A} | 2 | 2 + 3 | {A} | 3 | 3 + 4 | {A} | 4 | 4 + 5 | {_} | | +(5 rows) + -- Absorbed contexts WITH test_absorbed AS ( SELECT * FROM (VALUES @@ -1322,6 +1574,96 @@ WINDOW w AS ( 7 | {B} | | (7 rows) +-- Greedy vs reluctant: A+ matches all rows, A+? matches minimum +WITH test_greedy_vs_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','_']), + (2, ARRAY['A','_']), + (3, ARRAY['A','B']), + (4, ARRAY['B','_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_greedy_vs_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,_} | 1 | 4 + 2 | {A,_} | | + 3 | {A,B} | | + 4 | {B,_} | | +(4 rows) + +-- Same data, reluctant A+? exits at row 3 where B is first available +WITH test_greedy_vs_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','_']), + (2, ARRAY['A','_']), + (3, ARRAY['A','B']), + (4, ARRAY['B','_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_greedy_vs_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,_} | 1 | 3 + 2 | {A,_} | | + 3 | {A,B} | | + 4 | {B,_} | | +(4 rows) + +-- Reluctant group: (A B)+? matches minimum 1 iteration +WITH test_reluctant_group AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_group +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+?) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {B} | | + 3 | {A} | 3 | 4 + 4 | {B} | | +(4 rows) + -- ============================================================ -- Pathological Pattern Runtime Protection -- ============================================================ @@ -1386,6 +1728,37 @@ WINDOW w AS ( 4 | {B} | | (4 rows) +-- Reluctant nullable: A*? (prefers 0 matches) +-- A*? always takes skip path (0 iterations preferred) +WITH test_reluctant_nullable AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_nullable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A*? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | | + 2 | {A} | | + 3 | {A} | | + 4 | {_} | | +(4 rows) + -- ============================================================ -- Alternation Runtime Behavior -- ============================================================ @@ -1542,6 +1915,35 @@ WINDOW w AS ( 2 | {_,C} | | (2 rows) +-- ALT with reluctant: (A+? | B+) - A branch is reluctant, B is greedy. +-- Row 1 matches both A and B. A+? exits immediately (match 1-1). +WITH test_alt_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), + (2, ARRAY['B','_']), + (3, ARRAY['B','_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A+? | B+)) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 1 + 2 | {B,_} | 2 | 3 + 3 | {B,_} | | +(3 rows) + -- ============================================================ -- Deep Nested Groups -- ============================================================ @@ -1705,6 +2107,40 @@ WINDOW w AS ( 9 | {_} | | (9 rows) +-- Nested reluctant group ((A B)+?) with following element C +-- Inner group exits after minimum 1 iteration +WITH test_nested_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A B)+? C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {B} | | + 3 | {A} | 3 | 5 + 4 | {B} | | + 5 | {C} | | +(5 rows) + -- ============================================================ -- SKIP Options (Runtime) -- ============================================================ @@ -1819,6 +2255,53 @@ ORDER BY mode, id; SKIP PAST | 4 | {B} | | (8 rows) +-- Reluctant SKIP comparison: A+? with SKIP PAST vs SKIP NEXT +WITH test_reluctant_skip AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT 'SKIP PAST' AS mode, id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_skip +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+?) + DEFINE + A AS 'A' = ANY(flags) +) +UNION ALL +SELECT 'SKIP NEXT' AS mode, id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_skip +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+?) + DEFINE + A AS 'A' = ANY(flags) +) +ORDER BY mode, id; + mode | id | flags | match_start | match_end +-----------+----+-------+-------------+----------- + SKIP NEXT | 1 | {A} | 1 | 1 + SKIP NEXT | 2 | {A} | 2 | 2 + SKIP NEXT | 3 | {A} | 3 | 3 + SKIP NEXT | 4 | {_} | | + SKIP PAST | 1 | {A} | 1 | 1 + SKIP PAST | 2 | {A} | 2 | 2 + SKIP PAST | 3 | {A} | 3 | 3 + SKIP PAST | 4 | {_} | | +(8 rows) + -- ============================================================ -- INITIAL Mode (Runtime) -- ============================================================ @@ -2428,6 +2911,39 @@ WINDOW w AS ( 4 | {_,_} | | (4 rows) +-- Reluctant branch in ALT not absorbable: (A+?) | B +-- A+? is reluctant so not absorbable. Compare with greedy (A+) | B above. +WITH test_reluctant_alt_absorption AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_alt_absorption +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A+?) | B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 1 + 2 | {A} | 2 | 2 + 3 | {A} | 3 | 3 + 4 | {B} | 4 | 4 + 5 | {_} | | +(5 rows) + -- ============================================================ -- FIXME Issues - Known Limitations -- ============================================================ diff --git a/src/test/regress/sql/rpr.sql b/src/test/regress/sql/rpr.sql index 7690c5611c0..a263074e3e9 100644 --- a/src/test/regress/sql/rpr.sql +++ b/src/test/regress/sql/rpr.sql @@ -1271,6 +1271,7 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER DOWN AS price < PREV(1) ); +-- PREV(1) missing column reference (error) SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w FROM stock WINDOW w AS ( @@ -2059,16 +2060,16 @@ WINDOW w AS ( -- Expected: 1-4 (A C B C) -- ============================================ --- RELUCTANT quantifiers (not yet supported) +-- RELUCTANT quantifiers -- ============================================ --- Test: A+? B (reluctant) - parser rejects with ERROR +-- Test: A+? B (reluctant) - B available at row 3, A exits early WITH jacob_reluctant AS ( SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['A']), - (3, ARRAY['A']), - (4, ARRAY['B']) + (1, ARRAY['A','_']), + (2, ARRAY['A','_']), + (3, ARRAY['A','B']), + (4, ARRAY['B','_']) ) AS t(id, flags) ) SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end @@ -2082,15 +2083,14 @@ WINDOW w AS ( A AS 'A' = ANY(flags), B AS 'B' = ANY(flags) ); --- Expected: ERROR (reluctant quantifiers not yet supported) --- Test: A{1,3}? B (reluctant bounded) - parser rejects with ERROR +-- Test: A{1,3}? B (reluctant bounded) - same data, bounded quantifier WITH jacob_reluctant_bounded AS ( SELECT * FROM (VALUES - (1, ARRAY['A']), - (2, ARRAY['A']), - (3, ARRAY['A']), - (4, ARRAY['B']) + (1, ARRAY['A','_']), + (2, ARRAY['A','_']), + (3, ARRAY['A','B']), + (4, ARRAY['B','_']) ) AS t(id, flags) ) SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end @@ -2104,7 +2104,6 @@ WINDOW w AS ( A AS 'A' = ANY(flags), B AS 'B' = ANY(flags) ); --- Expected: ERROR (reluctant quantifiers not yet supported) -- ============================================ -- Nested quantifiers (pathological patterns) diff --git a/src/test/regress/sql/rpr_base.sql b/src/test/regress/sql/rpr_base.sql index b752ad12873..992c081205a 100644 --- a/src/test/regress/sql/rpr_base.sql +++ b/src/test/regress/sql/rpr_base.sql @@ -762,7 +762,7 @@ ORDER BY id; DROP TABLE rpr_quant; --- Reluctant quantifiers (not yet supported) +-- Reluctant quantifiers CREATE TABLE rpr_reluctant (id INT, val INT); INSERT INTO rpr_reluctant VALUES (1, 10), (2, 20), (3, 30); @@ -775,7 +775,7 @@ WINDOW w AS ( PATTERN (A*?) DEFINE A AS val > 0 ); --- Expected: ERROR: reluctant quantifiers are not yet supported +-- Reluctant quantifier: prefer shortest match -- +? (one or more, reluctant) SELECT COUNT(*) OVER w @@ -786,7 +786,7 @@ WINDOW w AS ( PATTERN (A+?) DEFINE A AS val > 0 ); --- Expected: ERROR: reluctant quantifiers are not yet supported +-- Reluctant quantifier: prefer shortest match -- ?? (zero or one, reluctant) SELECT COUNT(*) OVER w @@ -797,7 +797,7 @@ WINDOW w AS ( PATTERN (A??) DEFINE A AS val > 0 ); --- Expected: ERROR: reluctant quantifiers are not yet supported +-- Reluctant quantifier: prefer shortest match -- {n,}? (n or more, reluctant) SELECT COUNT(*) OVER w @@ -808,7 +808,7 @@ WINDOW w AS ( PATTERN (A{2,}?) DEFINE A AS val > 0 ); --- Expected: ERROR: reluctant quantifiers are not yet supported +-- Reluctant quantifier: prefer shortest match -- {n,m}? (n to m, reluctant) SELECT COUNT(*) OVER w @@ -819,7 +819,7 @@ WINDOW w AS ( PATTERN (A{1,3}?) DEFINE A AS val > 0 ); --- Expected: ERROR: reluctant quantifiers are not yet supported +-- Reluctant quantifier: prefer shortest match -- {n}? (exactly n, reluctant) SELECT COUNT(*) OVER w @@ -830,7 +830,7 @@ WINDOW w AS ( PATTERN (A{2}?) DEFINE A AS val > 0 ); --- Expected: ERROR: reluctant quantifiers are not yet supported +-- Reluctant quantifier: prefer shortest match -- {,m}? (up to m, reluctant) - COMPLETELY UNTESTED RULE! SELECT COUNT(*) OVER w @@ -841,7 +841,7 @@ WINDOW w AS ( PATTERN (A{,3}?) DEFINE A AS val > 0 ); --- Expected: ERROR: reluctant quantifiers are not yet supported +-- Reluctant quantifier: prefer shortest match -- Invalid reluctant patterns (wrong token after quantifier) @@ -1002,7 +1002,7 @@ WINDOW w AS ( PATTERN (A* ?) DEFINE A AS val > 0 ); --- Expected: ERROR: reluctant quantifiers are not yet supported +-- Reluctant quantifier: prefer shortest match -- + ? (token separated) SELECT COUNT(*) OVER w @@ -1013,7 +1013,7 @@ WINDOW w AS ( PATTERN (A+ ?) DEFINE A AS val > 0 ); --- Expected: ERROR: reluctant quantifiers are not yet supported +-- Reluctant quantifier: prefer shortest match -- {2,} ? (token separated) SELECT COUNT(*) OVER w @@ -1024,7 +1024,7 @@ WINDOW w AS ( PATTERN (A{2,} ?) DEFINE A AS val > 0 ); --- Expected: ERROR: reluctant quantifiers are not yet supported +-- Reluctant quantifier: prefer shortest match -- Invalid token combinations @@ -1059,7 +1059,7 @@ WINDOW w AS ( PATTERN (A? ?) DEFINE A AS val > 0 ); --- Expected: ERROR: reluctant quantifiers are not yet supported +-- Reluctant quantifier: prefer shortest match DROP TABLE rpr_reluctant; @@ -2278,6 +2278,69 @@ SELECT COUNT(*) OVER w FROM rpr_plan WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING PATTERN ((A | B | C)) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); +-- Reluctant optimization bypass: VAR merge +-- A+? A stays as a+? a (greedy A+ A merges to a{2,}) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A+? A) DEFINE A AS val > 0); + +-- Reluctant optimization bypass: GROUP merge +-- (A B)+? (A B) stays separate (greedy merges to (a b){2,}) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B)+? (A B)) DEFINE A AS val <= 50, B AS val > 50); + +-- Reluctant optimization bypass: quantifier multiply (outer reluctant) +-- (A{2}){3}? stays as (a{2}){3}? (greedy merges to a{6}) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2}){3}?) DEFINE A AS val > 0); + +-- Reluctant optimization bypass: quantifier multiply (inner reluctant) +-- (A{2}?){3} stays as (a{2}?){3} (greedy merges to a{6}) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A{2}?){3}) DEFINE A AS val > 0); + +-- Reluctant optimization bypass: PREFIX merge +-- A B (A B)+? stays separate (greedy merges to (a b){2,}) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN (A B (A B)+?) DEFINE A AS val <= 50, B AS val > 50); + +-- Reluctant optimization bypass: SUFFIX merge +-- (A B)+? A B stays separate (greedy merges to (a b){2,}) +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A B)+? A B) DEFINE A AS val <= 50, B AS val > 50); + +-- GROUP unwrap with quantifier propagation: (A)?? B -> a?? b +-- Single VAR child {1,1} receives GROUP's quantifier and reluctant +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A)?? B) DEFINE A AS val <= 50, B AS val > 50); + +-- Reluctant preserved through ALT flatten +-- (A | (B | C))+? flattens to (a | b | c)+? - inner ALT flattened, reluctant kept +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + PATTERN ((A | (B | C))+?) DEFINE A AS val <= 30, B AS val <= 60, C AS val > 60); + +-- Reluctant optimization bypass: absorption flags +-- A+? with SKIP PAST LAST ROW - no absorption markers (greedy A+ gets a+") +EXPLAIN (COSTS OFF) +SELECT COUNT(*) OVER w FROM rpr_plan +WINDOW w AS (ORDER BY id ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW PATTERN (A+?) DEFINE A AS val > 0); + -- ============================================================ -- Absorption Flag Display Tests -- ============================================================ diff --git a/src/test/regress/sql/rpr_nfa.sql b/src/test/regress/sql/rpr_nfa.sql index e9894cfa691..0e853ca753b 100644 --- a/src/test/regress/sql/rpr_nfa.sql +++ b/src/test/regress/sql/rpr_nfa.sql @@ -230,6 +230,31 @@ WINDOW w AS ( B AS 'B' = ANY(flags) ); +-- Reluctant pattern (A+?) - not absorbable +-- Compare with greedy A+ above: reluctant excluded from absorption. +-- Each context produces minimum match independently. +WITH test_reluctant_absorption AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_absorption +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+?) + DEFINE + A AS 'A' = ANY(flags) +); + -- ============================================================ -- Context Lifecycle -- ============================================================ @@ -333,6 +358,31 @@ WINDOW w AS ( D AS 'D' = ANY(flags) ); +-- Reluctant context lifecycle (A+? B with SKIP TO NEXT ROW) +-- A+? exits early but if B not available, falls back to loop. +-- Contexts not absorbed (reluctant), so multiple survive. +WITH test_reluctant_context AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_context +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + -- ============================================================ -- Advance Phase (Epsilon Transitions) -- ============================================================ @@ -448,6 +498,79 @@ WINDOW w AS ( D AS 'D' = ANY(flags) ); +-- Mixed greedy/reluctant sequence: A+? B+ (reluctant A, greedy B) +-- A exits as early as possible, B consumes the rest greedily +WITH test_mixed_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A','B']), + (4, ARRAY['B']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_mixed_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+? B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Optional reluctant group: (A B)?? C +-- nfa_advance_begin: reluctant prefers skip (0 iterations) over enter +WITH test_optional_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_optional_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)?? C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + +-- Greedy/reluctant sequence: A+ B+? (greedy A, reluctant B at end) +-- A consumes greedily, B+? exits to FIN after minimum match +WITH test_greedy_then_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A','B']), + (3, ARRAY['B']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_greedy_then_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B+?) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + -- ============================================================ -- Match Phase -- ============================================================ @@ -629,6 +752,30 @@ WINDOW w AS ( B AS 'B' = ANY(flags) ); +-- Reluctant with limited frame (A+? B with 2 FOLLOWING) +-- Reluctant exits early, B must be within frame boundary +WITH test_reluctant_frame AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_frame +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + -- ============================================================ -- State Management -- ============================================================ @@ -657,6 +804,29 @@ WINDOW w AS ( D AS 'D' = ANY(flags) ); +-- Reluctant duplicate state handling +-- (A+? | B+?) creates exit and loop states; exit paths may converge +WITH test_reluctant_dedup AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), + (2, ARRAY['A','B']), + (3, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_dedup +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A+? | B+?)) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + -- Large pattern (stress free list) WITH test_large_pattern AS ( SELECT * FROM (VALUES @@ -790,6 +960,31 @@ WINDOW w AS ( B AS 'B' = ANY(flags) ); +-- Reluctant not absorbed (A+? with SKIP TO NEXT ROW) +-- Compare with greedy A+ below: reluctant is not absorbable, +-- so all contexts survive independently. +WITH test_reluctant_stats AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_stats +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+?) + DEFINE + A AS 'A' = ANY(flags) +); + -- Absorbed contexts WITH test_absorbed AS ( SELECT * FROM (VALUES @@ -934,6 +1129,75 @@ WINDOW w AS ( B AS 'B' = ANY(flags) ); +-- Greedy vs reluctant: A+ matches all rows, A+? matches minimum +WITH test_greedy_vs_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','_']), + (2, ARRAY['A','_']), + (3, ARRAY['A','B']), + (4, ARRAY['B','_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_greedy_vs_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Same data, reluctant A+? exits at row 3 where B is first available +WITH test_greedy_vs_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','_']), + (2, ARRAY['A','_']), + (3, ARRAY['A','B']), + (4, ARRAY['B','_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_greedy_vs_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + +-- Reluctant group: (A B)+? matches minimum 1 iteration +WITH test_reluctant_group AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_group +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A B)+?) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + -- ============================================================ -- Pathological Pattern Runtime Protection -- ============================================================ @@ -984,6 +1248,30 @@ WINDOW w AS ( A AS 'A' = ANY(flags) ); +-- Reluctant nullable: A*? (prefers 0 matches) +-- A*? always takes skip path (0 iterations preferred) +WITH test_reluctant_nullable AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_nullable +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A*? B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + -- ============================================================ -- Alternation Runtime Behavior -- ============================================================ @@ -1102,6 +1390,29 @@ WINDOW w AS ( C AS 'C' = ANY(flags) ); +-- ALT with reluctant: (A+? | B+) - A branch is reluctant, B is greedy. +-- Row 1 matches both A and B. A+? exits immediately (match 1-1). +WITH test_alt_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), + (2, ARRAY['B','_']), + (3, ARRAY['B','_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_alt_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN ((A+? | B+)) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + -- ============================================================ -- Deep Nested Groups -- ============================================================ @@ -1220,6 +1531,32 @@ WINDOW w AS ( B AS 'B' = ANY(flags) ); +-- Nested reluctant group ((A B)+?) with following element C +-- Inner group exits after minimum 1 iteration +WITH test_nested_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nested_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A B)+? C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + -- ============================================================ -- SKIP Options (Runtime) -- ============================================================ @@ -1308,6 +1645,42 @@ WINDOW w AS ( ) ORDER BY mode, id; +-- Reluctant SKIP comparison: A+? with SKIP PAST vs SKIP NEXT +WITH test_reluctant_skip AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['_']) + ) AS t(id, flags) +) +SELECT 'SKIP PAST' AS mode, id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_skip +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A+?) + DEFINE + A AS 'A' = ANY(flags) +) +UNION ALL +SELECT 'SKIP NEXT' AS mode, id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_skip +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+?) + DEFINE + A AS 'A' = ANY(flags) +) +ORDER BY mode, id; + -- ============================================================ -- INITIAL Mode (Runtime) -- ============================================================ @@ -1791,6 +2164,31 @@ WINDOW w AS ( C AS 'C' = ANY(flags) ); +-- Reluctant branch in ALT not absorbable: (A+?) | B +-- A+? is reluctant so not absorbable. Compare with greedy (A+) | B above. +WITH test_reluctant_alt_absorption AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['_']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_reluctant_alt_absorption +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A+?) | B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + -- ============================================================ -- FIXME Issues - Known Limitations -- ============================================================ -- 2.50.1 (Apple Git-155)