From fb200f06d1202c13b85935c40d2a5d80c1da584e Mon Sep 17 00:00:00 2001 From: Henson Choi Date: Mon, 1 Jun 2026 23:49:46 +0900 Subject: [PATCH 40/68] Honor reluctant quantifier for non-leading optional RPR variables A reluctant optional pattern variable that is not the leading element -- for example A?? in PATTERN (B A?? C) -- matched greedily. nfa_route_to_elem builds the skip path of a min=0 variable in a fixed order, adding the enter (match) state before the skip state. Since the state list order encodes match preference, the enter path always won, ignoring RPRElemIsReluctant. Leading variables and optional groups were unaffected: they go through nfa_advance_var / nfa_advance_begin, which already order the skip path first when reluctant. Mirror that handling in nfa_route_to_elem: when the optional variable is reluctant, route the skip path first and drop the enter state if the skip path has already reached FIN. Add regression tests for non-leading reluctant optional variables and groups. Per a report from a static analysis tool. --- src/backend/executor/execRPR.c | 29 ++++++++++- src/test/regress/expected/rpr_nfa.out | 69 +++++++++++++++++++++++++++ src/test/regress/sql/rpr_nfa.sql | 56 ++++++++++++++++++++++ 3 files changed, 152 insertions(+), 2 deletions(-) diff --git a/src/backend/executor/execRPR.c b/src/backend/executor/execRPR.c index a2c304da0d1..580b25f398d 100644 --- a/src/backend/executor/execRPR.c +++ b/src/backend/executor/execRPR.c @@ -957,10 +957,35 @@ nfa_route_to_elem(WindowAggState *winstate, RPRNFAContext *ctx, skipState = nfa_state_clone(winstate, nextElem->next, state->counts, state->isAbsorbable); - nfa_add_state_unique(winstate, ctx, state); + if (skipState != NULL && RPRElemIsReluctant(nextElem)) + { + RPRNFAState *savedMatch = ctx->matchedState; - if (skipState != NULL) + /* + * Reluctant optional VAR: prefer skipping. Explore the skip path + * first so it outranks the enter (match) path; if it reaches FIN + * the shortest match is found and the enter state is dropped. + * This mirrors the reluctant branch of nfa_advance_begin used by + * the leading-position and optional-group paths. + */ nfa_advance_state(winstate, ctx, skipState, currentPos); + + if (ctx->matchedState != savedMatch) + { + nfa_state_free(winstate, state); + return; + } + + nfa_add_state_unique(winstate, ctx, state); + } + else + { + /* Greedy (or non-skippable): enter first, then skip */ + nfa_add_state_unique(winstate, ctx, state); + + if (skipState != NULL) + nfa_advance_state(winstate, ctx, skipState, currentPos); + } } else { diff --git a/src/test/regress/expected/rpr_nfa.out b/src/test/regress/expected/rpr_nfa.out index 2a0d4f11e74..829e8251aed 100644 --- a/src/test/regress/expected/rpr_nfa.out +++ b/src/test/regress/expected/rpr_nfa.out @@ -1222,6 +1222,75 @@ WINDOW w AS ( 3 | {C} | | (3 rows) +-- Non-leading reluctant optional VAR: (B A?? C) +-- Reluctant A?? should prefer to skip, matching B(1) C(2) with A left +-- unmatched (match_end 2). The leading/group reluctant cases above go through +-- the begin path; this exercises the non-leading skip path in +-- nfa_route_to_elem, which must honor reluctant ordering too. +WITH test_nonleading_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['A', 'C']), + (3, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nonleading_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (B A?? C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B} | 1 | 2 + 2 | {A,C} | | + 3 | {C} | | +(3 rows) + +-- Non-leading reluctant optional GROUP with a follower: (B (A X)?? C) +-- Like the VAR case above but a multi-element group; it goes through the +-- begin path (nfa_advance_begin), which already honors reluctant ordering. +-- Reluctant (A X)?? should skip, matching B(1) C(2), with the group skipped +-- to the following C (not to FIN). +WITH test_nonleading_reluctant_group AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['A', 'C']), + (3, ARRAY['X']), + (4, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nonleading_reluctant_group +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (B (A X)?? C) + DEFINE + A AS 'A' = ANY(flags), + X AS 'X' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B} | 1 | 2 + 2 | {A,C} | | + 3 | {X} | | + 4 | {C} | | +(4 rows) + -- Greedy/reluctant sequence: A+ B+? (greedy A, reluctant B at end) -- A consumes greedily, B+? exits to FIN after minimum match WITH test_greedy_then_reluctant AS ( diff --git a/src/test/regress/sql/rpr_nfa.sql b/src/test/regress/sql/rpr_nfa.sql index 6362c69f385..3bbec496279 100644 --- a/src/test/regress/sql/rpr_nfa.sql +++ b/src/test/regress/sql/rpr_nfa.sql @@ -860,6 +860,62 @@ WINDOW w AS ( C AS 'C' = ANY(flags) ); +-- Non-leading reluctant optional VAR: (B A?? C) +-- Reluctant A?? should prefer to skip, matching B(1) C(2) with A left +-- unmatched (match_end 2). The leading/group reluctant cases above go through +-- the begin path; this exercises the non-leading skip path in +-- nfa_route_to_elem, which must honor reluctant ordering too. +WITH test_nonleading_reluctant AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['A', 'C']), + (3, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nonleading_reluctant +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (B A?? C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + +-- Non-leading reluctant optional GROUP with a follower: (B (A X)?? C) +-- Like the VAR case above but a multi-element group; it goes through the +-- begin path (nfa_advance_begin), which already honors reluctant ordering. +-- Reluctant (A X)?? should skip, matching B(1) C(2), with the group skipped +-- to the following C (not to FIN). +WITH test_nonleading_reluctant_group AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['A', 'C']), + (3, ARRAY['X']), + (4, ARRAY['C']) + ) AS t(id, flags) +) +SELECT id, flags, + first_value(id) OVER w AS match_start, + last_value(id) OVER w AS match_end +FROM test_nonleading_reluctant_group +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (B (A X)?? C) + DEFINE + A AS 'A' = ANY(flags), + X AS 'X' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags) +); + -- Greedy/reluctant sequence: A+ B+? (greedy A, reluctant B at end) -- A consumes greedily, B+? exits to FIN after minimum match WITH test_greedy_then_reluctant AS ( -- 2.50.1 (Apple Git-155)