From d13cf490ce7585fd99a7690911a75d90a51017bb Mon Sep 17 00:00:00 2001 From: Henson Choi Date: Wed, 4 Mar 2026 14:08:47 +0900 Subject: [PATCH 8/8] Fix zero-min reluctant quantifier to produce zero-length match diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index 4441b4d9c51..3075c51789c 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -307,24 +307,24 @@ static inline bool nfa_eval_var_match(WindowAggState *winstate, static void nfa_match(WindowAggState *winstate, RPRNFAContext *ctx, bool *varMatched); static void nfa_advance_state(WindowAggState *winstate, RPRNFAContext *ctx, - RPRNFAState *state, int64 currentPos, bool initialAdvance); + RPRNFAState *state, int64 currentPos); static void nfa_route_to_elem(WindowAggState *winstate, RPRNFAContext *ctx, RPRNFAState *state, RPRPatternElement *nextElem, - int64 currentPos, bool initialAdvance); + int64 currentPos); static void nfa_advance_alt(WindowAggState *winstate, RPRNFAContext *ctx, RPRNFAState *state, RPRPatternElement *elem, - int64 currentPos, bool initialAdvance); + int64 currentPos); static void nfa_advance_begin(WindowAggState *winstate, RPRNFAContext *ctx, RPRNFAState *state, RPRPatternElement *elem, - int64 currentPos, bool initialAdvance); + int64 currentPos); static void nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, RPRNFAState *state, RPRPatternElement *elem, - int64 currentPos, bool initialAdvance); + int64 currentPos); static void nfa_advance_var(WindowAggState *winstate, RPRNFAContext *ctx, RPRNFAState *state, RPRPatternElement *elem, - int64 currentPos, bool initialAdvance); + int64 currentPos); static void nfa_advance(WindowAggState *winstate, RPRNFAContext *ctx, - int64 currentPos, bool initialAdvance); + int64 currentPos); /* * Not null info bit array consists of 2-bit items @@ -5008,9 +5008,14 @@ register_result: * handles nested groups like "((A|B)+)+" correctly - exiting the inner * group counts as one iteration of the outer group. * - * - initialAdvance flag: The first advance after context creation must - * skip FIN recording. Reaching FIN without evaluating any VAR would - * create a zero-length match, which is invalid. + * - Zero-length match handling: The initial advance uses currentPos = + * startPos - 1 (before any row is consumed). If FIN is reached via + * epsilon transitions alone, matchEndRow = startPos - 1 < matchStartRow, + * resulting in UNMATCHED. For reluctant min=0 patterns (A*?, A??), + * the skip path reaches FIN first and early termination prunes enter + * paths, yielding an immediate zero-length (unmatched) result. For + * greedy patterns (A*), the enter path adds VAR states first, then + * the skip FIN is recorded but VAR states survive for later matching. * * Context Absorption Runtime: * --------------------------- @@ -5122,7 +5127,7 @@ nfa_process_row(WindowAggState *winstate, int64 currentPos, Assert(!hasLimitedFrame || currentPos < ctx->matchStartRow + frameOffset + 1); - nfa_advance(winstate, ctx, currentPos, false); + nfa_advance(winstate, ctx, currentPos); } } @@ -5467,10 +5472,11 @@ nfa_start_context(WindowAggState *winstate, int64 startPos) * states for VAR elements with min=0. This prepares the context for the * first row's match phase. * - * Pass initialAdvance=true to prevent recording zero-length matches when - * optional patterns can skip all VARs to reach FIN immediately. + * Use startPos - 1 as currentPos since no row has been consumed yet. If + * FIN is reached via epsilon transitions, matchEndRow = startPos - 1 + * which is less than matchStartRow, resulting in UNMATCHED treatment. */ - nfa_advance(winstate, ctx, startPos, true); + nfa_advance(winstate, ctx, startPos - 1); return ctx; } @@ -5711,7 +5717,7 @@ nfa_finalize_all_contexts(WindowAggState *winstate, int64 lastPos) if (ctx->states != NULL) { nfa_match(winstate, ctx, NULL); - nfa_advance(winstate, ctx, lastPos, false); + nfa_advance(winstate, ctx, lastPos); } } } @@ -6047,7 +6053,7 @@ nfa_match(WindowAggState *winstate, RPRNFAContext *ctx, bool *varMatched) static void nfa_route_to_elem(WindowAggState *winstate, RPRNFAContext *ctx, RPRNFAState *state, RPRPatternElement *nextElem, - int64 currentPos, bool initialAdvance) + int64 currentPos) { if (RPRElemIsVar(nextElem)) { @@ -6061,11 +6067,11 @@ nfa_route_to_elem(WindowAggState *winstate, RPRNFAContext *ctx, nfa_add_state_unique(winstate, ctx, state); if (skipState != NULL) - nfa_advance_state(winstate, ctx, skipState, currentPos, initialAdvance); + nfa_advance_state(winstate, ctx, skipState, currentPos); } else { - nfa_advance_state(winstate, ctx, state, currentPos, initialAdvance); + nfa_advance_state(winstate, ctx, state, currentPos); } } @@ -6077,7 +6083,7 @@ nfa_route_to_elem(WindowAggState *winstate, RPRNFAContext *ctx, static void nfa_advance_alt(WindowAggState *winstate, RPRNFAContext *ctx, RPRNFAState *state, RPRPatternElement *elem, - int64 currentPos, bool initialAdvance) + int64 currentPos) { RPRPattern *pattern = winstate->rpPattern; RPRPatternElement *elements = pattern->elements; @@ -6097,7 +6103,7 @@ nfa_advance_alt(WindowAggState *winstate, RPRNFAContext *ctx, state->counts, state->isAbsorbable); /* Recursively process this branch before next */ - nfa_advance_state(winstate, ctx, newState, currentPos, initialAdvance); + nfa_advance_state(winstate, ctx, newState, currentPos); altIdx = altElem->jump; } @@ -6115,7 +6121,7 @@ nfa_advance_alt(WindowAggState *winstate, RPRNFAContext *ctx, static void nfa_advance_begin(WindowAggState *winstate, RPRNFAContext *ctx, RPRNFAState *state, RPRPatternElement *elem, - int64 currentPos, bool initialAdvance) + int64 currentPos) { RPRPattern *pattern = winstate->rpPattern; RPRPatternElement *elements = pattern->elements; @@ -6136,7 +6142,7 @@ nfa_advance_begin(WindowAggState *winstate, RPRNFAContext *ctx, /* Reluctant: skip first (prefer fewer iterations), enter second */ nfa_route_to_elem(winstate, ctx, skipState, - &elements[elem->jump], currentPos, initialAdvance); + &elements[elem->jump], currentPos); /* * If skip path reached FIN, shortest match is found. Skip group entry @@ -6150,19 +6156,19 @@ nfa_advance_begin(WindowAggState *winstate, RPRNFAContext *ctx, state->elemIdx = elem->next; nfa_route_to_elem(winstate, ctx, state, - &elements[state->elemIdx], currentPos, initialAdvance); + &elements[state->elemIdx], currentPos); } else { /* Greedy: enter first, skip second */ state->elemIdx = elem->next; nfa_route_to_elem(winstate, ctx, state, - &elements[state->elemIdx], currentPos, initialAdvance); + &elements[state->elemIdx], currentPos); if (skipState != NULL) { nfa_route_to_elem(winstate, ctx, skipState, - &elements[elem->jump], currentPos, initialAdvance); + &elements[elem->jump], currentPos); } } } @@ -6176,7 +6182,7 @@ nfa_advance_begin(WindowAggState *winstate, RPRNFAContext *ctx, static void nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, RPRNFAState *state, RPRPatternElement *elem, - int64 currentPos, bool initialAdvance) + int64 currentPos) { RPRPattern *pattern = winstate->rpPattern; RPRPatternElement *elements = pattern->elements; @@ -6199,7 +6205,7 @@ nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, state->elemIdx = elem->jump; jumpElem = &elements[state->elemIdx]; nfa_route_to_elem(winstate, ctx, state, jumpElem, - currentPos, initialAdvance); + currentPos); /* * Fast-forward fallback for nullable bodies. E.g. (A?){2,3} when A @@ -6223,7 +6229,7 @@ nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, ffState->counts[nextElem->depth]++; nfa_route_to_elem(winstate, ctx, ffState, nextElem, - currentPos, initialAdvance); + currentPos); } } else if (elem->max != RPR_QUANTITY_INF && count >= elem->max) @@ -6239,7 +6245,7 @@ nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, if (RPRElemIsEnd(nextElem) && state->counts[nextElem->depth] < RPR_COUNT_MAX) state->counts[nextElem->depth]++; - nfa_route_to_elem(winstate, ctx, state, nextElem, currentPos, initialAdvance); + nfa_route_to_elem(winstate, ctx, state, nextElem, currentPos); } else { @@ -6277,7 +6283,7 @@ nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, /* Exit first (preferred for reluctant) */ nfa_route_to_elem(winstate, ctx, exitState, nextElem, - currentPos, initialAdvance); + currentPos); /* * If exit path reached FIN, shortest match is found. Skip loop to @@ -6291,16 +6297,16 @@ nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, /* Loop second */ nfa_route_to_elem(winstate, ctx, state, jumpElem, - currentPos, initialAdvance); + currentPos); } else { /* Loop first (preferred for greedy) */ nfa_route_to_elem(winstate, ctx, state, jumpElem, - currentPos, initialAdvance); + currentPos); /* Exit second */ nfa_route_to_elem(winstate, ctx, exitState, nextElem, - currentPos, initialAdvance); + currentPos); } } } @@ -6314,7 +6320,7 @@ nfa_advance_end(WindowAggState *winstate, RPRNFAContext *ctx, static void nfa_advance_var(WindowAggState *winstate, RPRNFAContext *ctx, RPRNFAState *state, RPRPatternElement *elem, - int64 currentPos, bool initialAdvance) + int64 currentPos) { RPRPattern *pattern = winstate->rpPattern; RPRPatternElement *elements = pattern->elements; @@ -6359,7 +6365,7 @@ nfa_advance_var(WindowAggState *winstate, RPRNFAContext *ctx, /* Exit first (preferred for reluctant) */ nfa_route_to_elem(winstate, ctx, cloneState, nextElem, - currentPos, initialAdvance); + currentPos); /* * If exit path reached FIN, the shortest match is found. Skip @@ -6400,7 +6406,7 @@ nfa_advance_var(WindowAggState *winstate, RPRNFAContext *ctx, } nfa_route_to_elem(winstate, ctx, state, nextElem, - currentPos, initialAdvance); + currentPos); } } else if (canLoop) @@ -6424,7 +6430,7 @@ nfa_advance_var(WindowAggState *winstate, RPRNFAContext *ctx, state->counts[nextElem->depth]++; } - nfa_route_to_elem(winstate, ctx, state, nextElem, currentPos, initialAdvance); + nfa_route_to_elem(winstate, ctx, state, nextElem, currentPos); } } @@ -6436,7 +6442,7 @@ nfa_advance_var(WindowAggState *winstate, RPRNFAContext *ctx, */ static void nfa_advance_state(WindowAggState *winstate, RPRNFAContext *ctx, - RPRNFAState *state, int64 currentPos, bool initialAdvance) + RPRNFAState *state, int64 currentPos) { RPRPattern *pattern = winstate->rpPattern; RPRPatternElement *elem; @@ -6458,28 +6464,25 @@ nfa_advance_state(WindowAggState *winstate, RPRNFAContext *ctx, switch (elem->varId) { case RPR_VARID_FIN: - /* FIN: record match (skip for initial advance) */ - if (!initialAdvance) - nfa_add_matched_state(winstate, ctx, state, currentPos); - else - nfa_state_free(winstate, state); + /* FIN: record match */ + nfa_add_matched_state(winstate, ctx, state, currentPos); break; case RPR_VARID_ALT: - nfa_advance_alt(winstate, ctx, state, elem, currentPos, initialAdvance); + nfa_advance_alt(winstate, ctx, state, elem, currentPos); break; case RPR_VARID_BEGIN: - nfa_advance_begin(winstate, ctx, state, elem, currentPos, initialAdvance); + nfa_advance_begin(winstate, ctx, state, elem, currentPos); break; case RPR_VARID_END: - nfa_advance_end(winstate, ctx, state, elem, currentPos, initialAdvance); + nfa_advance_end(winstate, ctx, state, elem, currentPos); break; default: /* VAR element */ - nfa_advance_var(winstate, ctx, state, elem, currentPos, initialAdvance); + nfa_advance_var(winstate, ctx, state, elem, currentPos); break; } } @@ -6489,13 +6492,12 @@ nfa_advance_state(WindowAggState *winstate, RPRNFAContext *ctx, * * Advance phase (divergence): transition from all surviving states. * Called after match phase with matched VAR states, or at context creation - * for initial epsilon expansion (initialAdvance=true skips FIN matches). + * for initial epsilon expansion (with currentPos = startPos - 1). * * Processes states in order, using recursive DFS to maintain lexical order. */ static void -nfa_advance(WindowAggState *winstate, RPRNFAContext *ctx, int64 currentPos, - bool initialAdvance) +nfa_advance(WindowAggState *winstate, RPRNFAContext *ctx, int64 currentPos) { RPRNFAState *states = ctx->states; RPRNFAState *state; @@ -6515,7 +6517,7 @@ nfa_advance(WindowAggState *winstate, RPRNFAContext *ctx, int64 currentPos, states = states->next; state->next = NULL; - nfa_advance_state(winstate, ctx, state, currentPos, initialAdvance); + nfa_advance_state(winstate, ctx, state, currentPos); /* * Early termination: if a FIN was newly reached in this advance, diff --git a/src/test/regress/expected/rpr_base.out b/src/test/regress/expected/rpr_base.out index a7c536625f1..2fefb933c71 100644 --- a/src/test/regress/expected/rpr_base.out +++ b/src/test/regress/expected/rpr_base.out @@ -1090,9 +1090,9 @@ WINDOW w AS ( ); count ------- - 1 - 1 - 1 + 0 + 0 + 0 (3 rows) -- Reluctant quantifier: prefer shortest match @@ -1124,9 +1124,9 @@ WINDOW w AS ( ); count ------- - 1 - 1 - 1 + 0 + 0 + 0 (3 rows) -- Reluctant quantifier: prefer shortest match @@ -1192,9 +1192,9 @@ WINDOW w AS ( ); count ------- - 1 - 1 - 1 + 0 + 0 + 0 (3 rows) -- Reluctant quantifier: prefer shortest match @@ -1382,9 +1382,9 @@ WINDOW w AS ( ); count ------- - 1 - 1 - 1 + 0 + 0 + 0 (3 rows) -- Reluctant quantifier: prefer shortest match @@ -1460,9 +1460,9 @@ WINDOW w AS ( ); count ------- - 1 - 1 - 1 + 0 + 0 + 0 (3 rows) -- Reluctant quantifier: prefer shortest match diff --git a/src/test/regress/expected/rpr_explain.out b/src/test/regress/expected/rpr_explain.out index ef184b7950b..d5cd80f423e 100644 --- a/src/test/regress/expected/rpr_explain.out +++ b/src/test/regress/expected/rpr_explain.out @@ -3347,7 +3347,7 @@ WINDOW w AS ( Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: ((a' b')+" c)* Storage: Memory Maximum Storage: NkB - NFA States: 7 peak, 178 total, 0 merged + NFA States: 9 peak, 178 total, 0 merged NFA Contexts: 4 peak, 61 total, 22 pruned NFA: 1 matched (len 57/57/57.0), 0 mismatched NFA: 0 absorbed, 37 skipped (len 1/3/2.0) @@ -3384,7 +3384,7 @@ WINDOW w AS ( Window: w AS (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) Pattern: (a (b c)+)* Storage: Memory Maximum Storage: NkB - NFA States: 5 peak, 160 total, 0 merged + NFA States: 7 peak, 160 total, 0 merged NFA Contexts: 4 peak, 61 total, 22 pruned NFA: 1 matched (len 57/57/57.0), 0 mismatched NFA: 0 absorbed, 37 skipped (len 1/3/2.0) -- 2.50.1 (Apple Git-155)