From 17d96417d053dced68f3fb1168bbbf77ec5486a5 Mon Sep 17 00:00:00 2001 From: Henson Choi Date: Thu, 15 Jan 2026 21:05:04 +0900 Subject: [PATCH] Row pattern recognition: Refactor update_reduced_frame for readability and performance --- src/backend/executor/nodeWindowAgg.c | 299 ++++++++++++--------------- 1 file changed, 132 insertions(+), 167 deletions(-) diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index d56e2e67170..3fc73db4e59 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -263,15 +263,17 @@ static void nfa_state_free_list(WindowAggState *winstate, RPRNFAState *list); static RPRNFAState *nfa_state_clone(WindowAggState *winstate, int16 elemIdx, int16 altPriority, int16 *counts, RPRNFAState *list); -static bool nfa_evaluate_row(WindowObject winobj, int64 pos, bool *varMatched, bool *anyMatch); +static bool nfa_evaluate_row(WindowObject winobj, int64 pos, bool *varMatched); static RPRNFAContext *nfa_context_alloc(WindowAggState *winstate); static void nfa_unlink_context(WindowAggState *winstate, RPRNFAContext *ctx); static void nfa_context_free(WindowAggState *winstate, RPRNFAContext *ctx); -static void nfa_start_context(WindowAggState *winstate, int64 startPos); +static RPRNFAContext *nfa_start_context(WindowAggState *winstate, int64 startPos); +static void nfa_step(WindowAggState *winstate, RPRNFAContext *ctx, + bool *varMatched, int64 pos); +static void nfa_process_context(WindowAggState *winstate, RPRNFAContext *ctx, + int64 currentPos, bool hasLimitedFrame, int64 frameOffset); static void nfa_step_single(WindowAggState *winstate, RPRNFAContext *ctx, RPRNFAState *state, bool *varMatched, int64 currentPos); -static void nfa_finalize_boundary(WindowAggState *winstate, RPRNFAContext *ctx, - int64 matchEndPos); static RPRNFAContext *nfa_find_context_for_pos(WindowAggState *winstate, int64 pos); static void nfa_remove_contexts_up_to(WindowAggState *winstate, int64 endPos); static void nfa_absorb_contexts(WindowAggState *winstate, RPRNFAContext *excludeCtx, int64 currentPos); @@ -4713,8 +4715,6 @@ update_reduced_frame(WindowObject winobj, int64 pos) { WindowAggState *winstate = winobj->winstate; RPRNFAContext *targetCtx; - RPRNFAContext *firstCtx; - int64 matchLength = 0; int64 currentPos; int64 startPos; int frameOptions = winstate->frameOptions; @@ -4733,21 +4733,14 @@ update_reduced_frame(WindowObject winobj, int64 pos) /* * Case 1: pos is before any existing context's start position. * This means the position was already processed and determined unmatched. - * Note: contexts are added at head with increasing positions, so we need - * to find the minimum matchStartRow (the oldest context). + * Head is the oldest context (lowest matchStartRow) since contexts are + * added at tail with increasing positions. */ + if (winstate->nfaContext != NULL && + pos < winstate->nfaContext->matchStartRow) { - int64 minStartRow = INT64_MAX; - for (firstCtx = winstate->nfaContext; firstCtx != NULL; firstCtx = firstCtx->next) - { - if (firstCtx->matchStartRow < minStartRow) - minStartRow = firstCtx->matchStartRow; - } - if (minStartRow != INT64_MAX && pos < minStartRow) - { - register_reduced_frame_map(winstate, pos, RF_UNMATCHED); - return; - } + register_reduced_frame_map(winstate, pos, RF_UNMATCHED); + return; } /* @@ -4756,14 +4749,30 @@ update_reduced_frame(WindowObject winobj, int64 pos) targetCtx = nfa_find_context_for_pos(winstate, pos); if (targetCtx == NULL) { - /* No context exists - create one and start fresh */ - nfa_start_context(winstate, pos); - targetCtx = winstate->nfaContext; + /* + * No context exists. If pos is already processed, it means this row + * was already determined to be unmatched or skipped - no need to + * reprocess. + */ + if (pos <= winstate->nfaLastProcessedRow) + { + register_reduced_frame_map(winstate, pos, RF_UNMATCHED); + return; + } + /* Not yet processed - create new context and start fresh */ + targetCtx = nfa_start_context(winstate, pos); + } + else if (targetCtx->states == NULL) + { + /* Context already completed - skip to result registration */ + goto register_result; } /* * Determine where to start processing. - * If we've already evaluated rows beyond pos, continue from there. + * Usually nfaLastProcessedRow+1 >= pos since contexts are created at + * currentPos+1 during processing. However, pos can exceed this when + * rows are skipped (e.g., unmatched rows don't update nfaLastProcessedRow). */ startPos = Max(pos, winstate->nfaLastProcessedRow + 1); @@ -4774,11 +4783,10 @@ update_reduced_frame(WindowObject winobj, int64 pos) for (currentPos = startPos; targetCtx->states != NULL; currentPos++) { bool rowExists; - bool anyMatch; RPRNFAContext *ctx; /* Evaluate variables for this row - done only once, shared by all contexts */ - rowExists = nfa_evaluate_row(winobj, currentPos, winstate->nfaVarMatched, &anyMatch); + rowExists = nfa_evaluate_row(winobj, currentPos, winstate->nfaVarMatched); /* No more rows in partition? Finalize all contexts */ if (!rowExists) @@ -4786,7 +4794,7 @@ update_reduced_frame(WindowObject winobj, int64 pos) for (ctx = winstate->nfaContext; ctx != NULL; ctx = ctx->next) { if (ctx->states != NULL) - nfa_finalize_boundary(winstate, ctx, currentPos - 1); + nfa_step(winstate, ctx, NULL, currentPos - 1); } /* Absorb completed contexts at partition boundary */ nfa_absorb_contexts(winstate, NULL, currentPos - 1); @@ -4796,69 +4804,15 @@ update_reduced_frame(WindowObject winobj, int64 pos) /* Update last processed row */ winstate->nfaLastProcessedRow = currentPos; - /* - * Process each active context with this row's evaluation results. - * Each context has its own frame boundary based on matchStartRow. - */ + /* Process each active context with this row's evaluation results */ for (ctx = winstate->nfaContext; ctx != NULL; ctx = ctx->next) - { - int64 ctxFrameEnd; - - /* Skip already-completed contexts */ - if (ctx->states == NULL) - continue; - - /* - * Calculate per-context frame end. - * For "ROWS BETWEEN CURRENT ROW AND N FOLLOWING", each context's - * frame end is matchStartRow + offset + 1 (exclusive). - */ - if (hasLimitedFrame) - { - ctxFrameEnd = ctx->matchStartRow + frameOffset + 1; - if (currentPos >= ctxFrameEnd) - { - nfa_finalize_boundary(winstate, ctx, ctxFrameEnd - 1); - continue; - } - } - - /* First row of this context must match at least one variable */ - if (currentPos == ctx->matchStartRow && !anyMatch) - { - /* Clear states to mark as unmatched */ - nfa_state_free_list(winstate, ctx->states); - ctx->states = NULL; - continue; - } - - /* Skip if this row is before context's start */ - if (currentPos < ctx->matchStartRow) - continue; - - /* Process states for this context */ - { - RPRNFAState *states = ctx->states; - RPRNFAState *state; - RPRNFAState *nextState; - - ctx->states = NULL; - - for (state = states; state != NULL; state = nextState) - { - nextState = state->next; - state->next = NULL; - nfa_step_single(winstate, ctx, state, winstate->nfaVarMatched, currentPos); - } - } - } + nfa_process_context(winstate, ctx, currentPos, hasLimitedFrame, frameOffset); /* * Create a new context for the next potential start position. * This enables overlapping match detection for SKIP TO NEXT ROW. */ - if (anyMatch) - nfa_start_context(winstate, currentPos + 1); + nfa_start_context(winstate, currentPos + 1); /* * Absorb redundant contexts. @@ -4873,23 +4827,20 @@ update_reduced_frame(WindowObject winobj, int64 pos) break; } - /* - * Get match result from target context. - */ - if (targetCtx->matchEndRow >= pos) - matchLength = targetCtx->matchEndRow - pos + 1; +register_result: + Assert(pos == targetCtx->matchStartRow); /* * Register reduced frame map based on match result. */ - if (matchLength <= 0) + if (targetCtx->matchEndRow < targetCtx->matchStartRow) { - register_reduced_frame_map(winstate, pos, RF_UNMATCHED); + register_reduced_frame_map(winstate, targetCtx->matchStartRow, RF_UNMATCHED); } else { - register_reduced_frame_map(winstate, pos, RF_FRAME_HEAD); - for (int64 i = pos + 1; i < pos + matchLength; i++) + register_reduced_frame_map(winstate, targetCtx->matchStartRow, RF_FRAME_HEAD); + for (int64 i = targetCtx->matchStartRow + 1; i <= targetCtx->matchEndRow; i++) { register_reduced_frame_map(winstate, i, RF_SKIPPED); } @@ -4898,26 +4849,16 @@ update_reduced_frame(WindowObject winobj, int64 pos) /* * Cleanup contexts based on SKIP mode. */ - if (matchLength > 0 && winstate->rpSkipTo == ST_PAST_LAST_ROW) + if (targetCtx->matchEndRow >= targetCtx->matchStartRow && + winstate->rpSkipTo == ST_PAST_LAST_ROW) { /* Remove all contexts with start <= matchEnd */ - nfa_remove_contexts_up_to(winstate, pos + matchLength - 1); + nfa_remove_contexts_up_to(winstate, targetCtx->matchEndRow); } else { /* SKIP TO NEXT ROW or no match: just remove the target context */ - RPRNFAContext *ctx = winstate->nfaContext; - - while (ctx != NULL) - { - if (ctx == targetCtx) - { - nfa_unlink_context(winstate, ctx); - nfa_context_free(winstate, ctx); - break; - } - ctx = ctx->next; - } + nfa_context_free(winstate, targetCtx); } } @@ -5152,22 +5093,19 @@ nfa_free_matched_state(WindowAggState *winstate, RPRNFAState *state) * * Evaluate all DEFINE variables for current row. * Returns true if the row exists, false if out of partition. - * If row exists, fills varMatched array and sets *anyMatch if any variable matched. + * If row exists, fills varMatched array. * varMatched[i] = true if variable i matched at current row. */ static bool -nfa_evaluate_row(WindowObject winobj, int64 pos, bool *varMatched, bool *anyMatchOut) +nfa_evaluate_row(WindowObject winobj, int64 pos, bool *varMatched) { WindowAggState *winstate = winobj->winstate; ExprContext *econtext = winstate->ss.ps.ps_ExprContext; int numDefineVars = list_length(winstate->defineVariableList); ListCell *lc; int varIdx = 0; - bool anyMatch = false; TupleTableSlot *slot; - *anyMatchOut = false; - /* * Set up slots for current, previous, and next rows. * We don't call get_slots() here to avoid recursion through @@ -5208,22 +5146,13 @@ nfa_evaluate_row(WindowObject winobj, int64 pos, bool *varMatched, bool *anyMatc /* Evaluate DEFINE expression */ result = ExecEvalExpr(exprState, econtext, &isnull); - if (!isnull && DatumGetBool(result)) - { - varMatched[varIdx] = true; - anyMatch = true; - } - else - { - varMatched[varIdx] = false; - } + varMatched[varIdx] = (!isnull && DatumGetBool(result)); varIdx++; if (varIdx >= numDefineVars) break; } - *anyMatchOut = anyMatch; return true; /* Row exists */ } @@ -5286,12 +5215,15 @@ nfa_unlink_context(WindowAggState *winstate, RPRNFAContext *ctx) /* * nfa_context_free * - * Return a context to free list. Also frees any states in the context. - * Note: Caller must unlink context from active list first using nfa_unlink_context. + * Unlink context from active list and return it to free list. + * Also frees any states in the context. */ static void nfa_context_free(WindowAggState *winstate, RPRNFAContext *ctx) { + /* Unlink from active list first */ + nfa_unlink_context(winstate, ctx); + if (ctx->states != NULL) nfa_state_free_list(winstate, ctx->states); if (ctx->matchedState != NULL) @@ -5307,9 +5239,9 @@ nfa_context_free(WindowAggState *winstate, RPRNFAContext *ctx) * nfa_start_context * * Start a new match context at given position. - * Adds context to winstate->nfaContext list. + * Adds context to winstate->nfaContext list and returns the new context. */ -static void +static RPRNFAContext * nfa_start_context(WindowAggState *winstate, int64 startPos) { RPRNFAContext *ctx; @@ -5318,14 +5250,16 @@ nfa_start_context(WindowAggState *winstate, int64 startPos) ctx->matchStartRow = startPos; ctx->states = nfa_state_alloc(winstate); /* initial state at elem 0 */ - /* Add to head of active context list (doubly-linked) */ - ctx->next = winstate->nfaContext; - ctx->prev = NULL; - if (winstate->nfaContext != NULL) - winstate->nfaContext->prev = ctx; + /* Add to tail of active context list (doubly-linked, oldest-first) */ + ctx->prev = winstate->nfaContextTail; + ctx->next = NULL; + if (winstate->nfaContextTail != NULL) + winstate->nfaContextTail->next = ctx; else - winstate->nfaContextTail = ctx; /* first context becomes tail */ - winstate->nfaContext = ctx; + winstate->nfaContext = ctx; /* first context becomes head */ + winstate->nfaContextTail = ctx; + + return ctx; } /* @@ -5339,10 +5273,16 @@ nfa_find_context_for_pos(WindowAggState *winstate, int64 pos) { RPRNFAContext *ctx; + /* + * List is sorted by matchStartRow ascending (oldest/smallest at head). + * Stop early if we pass the target position. + */ for (ctx = winstate->nfaContext; ctx != NULL; ctx = ctx->next) { if (ctx->matchStartRow == pos) return ctx; + if (ctx->matchStartRow > pos) + break; /* won't find it, list is sorted */ } return NULL; } @@ -5359,17 +5299,13 @@ nfa_remove_contexts_up_to(WindowAggState *winstate, int64 endPos) RPRNFAContext *ctx; RPRNFAContext *next; - ctx = winstate->nfaContext; - while (ctx != NULL) + /* Contexts are sorted by matchStartRow ascending, so we can stop early */ + for (ctx = winstate->nfaContext; ctx != NULL; ctx = next) { next = ctx->next; - if (ctx->matchStartRow <= endPos) - { - /* Remove this context */ - nfa_unlink_context(winstate, ctx); - nfa_context_free(winstate, ctx); - } - ctx = next; + if (ctx->matchStartRow > endPos) + break; + nfa_context_free(winstate, ctx); } } @@ -5384,8 +5320,8 @@ nfa_remove_contexts_up_to(WindowAggState *winstate, int64 endPos) * - For unbounded quantifiers (max=INT32_MAX): older.counts >= newer.counts * - For bounded quantifiers: older.counts == newer.counts * - * Note: List is newest-first, so we check if later nodes (older contexts) - * can absorb earlier nodes (newer contexts). + * Note: List is oldest-first (head=oldest, tail=newest), so we start from + * tail (newest) and check if older contexts (via prev) can absorb it. */ static void nfa_absorb_contexts(WindowAggState *winstate, RPRNFAContext *excludeCtx, int64 currentPos) @@ -5421,14 +5357,14 @@ nfa_absorb_contexts(WindowAggState *winstate, RPRNFAContext *excludeCtx, int64 c * for all depths d * 3. For bounded elements: counts must be exactly equal at all depths * - * List is newest-first (head = highest matchStartRow). - * We iterate from head (newest) and check if older contexts can absorb it. + * List is oldest-first (head = lowest matchStartRow, tail = highest). + * We iterate from tail (newest) via prev and check if older contexts can absorb it. */ - ctx = winstate->nfaContext; + ctx = winstate->nfaContextTail; while (ctx != NULL) { - RPRNFAContext *nextCtx = ctx->next; + RPRNFAContext *nextCtx = ctx->prev; /* traverse toward older (head) */ RPRNFAContext *older; bool absorbed = false; @@ -5461,7 +5397,7 @@ nfa_absorb_contexts(WindowAggState *winstate, RPRNFAContext *excludeCtx, int64 c } /* Check if any older completed context can absorb this one */ - for (older = ctx->next; older != NULL && !absorbed; older = older->next) + for (older = ctx->prev; older != NULL && !absorbed; older = older->prev) { /* Must have started earlier */ if (older->matchStartRow >= ctx->matchStartRow) @@ -5478,7 +5414,6 @@ nfa_absorb_contexts(WindowAggState *winstate, RPRNFAContext *excludeCtx, int64 c if (older->matchEndRow >= ctx->matchEndRow) { /* Absorb: remove ctx (newer) */ - nfa_unlink_context(winstate, ctx); nfa_context_free(winstate, ctx); absorbed = true; } @@ -5518,9 +5453,9 @@ nfa_absorb_contexts(WindowAggState *winstate, RPRNFAContext *excludeCtx, int64 c /* * Check if any older context can absorb this one. - * Older contexts have lower matchStartRow. + * Older contexts have lower matchStartRow (toward head via prev). */ - for (older = ctx->next; older != NULL && !absorbed; older = older->next) + for (older = ctx->prev; older != NULL && !absorbed; older = older->prev) { RPRNFAState *laterState; RPRNFAState *earlierState; @@ -5616,7 +5551,6 @@ nfa_absorb_contexts(WindowAggState *winstate, RPRNFAContext *excludeCtx, int64 c if (canAbsorb) { /* Absorb: remove ctx (newer) */ - nfa_unlink_context(winstate, ctx); nfa_context_free(winstate, ctx); absorbed = true; } @@ -5627,32 +5561,57 @@ nfa_absorb_contexts(WindowAggState *winstate, RPRNFAContext *excludeCtx, int64 c } /* - * nfa_finalize_boundary + * nfa_step * - * Finalize NFA states at partition/frame boundary. - * Sets all varMatched to false and processes remaining states. + * Process all states in context through NFA for one row. + * Calls nfa_step_single for each state. */ static void -nfa_finalize_boundary(WindowAggState *winstate, RPRNFAContext *ctx, int64 matchEndPos) +nfa_step(WindowAggState *winstate, RPRNFAContext *ctx, bool *varMatched, int64 pos) { RPRNFAState *states = ctx->states; RPRNFAState *state; RPRNFAState *nextState; - int numVars = list_length(winstate->defineVariableList); ctx->states = NULL; - for (int i = 0; i < numVars; i++) - winstate->nfaVarMatched[i] = false; - for (state = states; state != NULL; state = nextState) { nextState = state->next; state->next = NULL; - nfa_step_single(winstate, ctx, state, winstate->nfaVarMatched, matchEndPos); + nfa_step_single(winstate, ctx, state, varMatched, pos); } } +/* + * nfa_process_context + * + * Process one context for the current row. + * Handles frame boundary check and NFA step. + */ +static void +nfa_process_context(WindowAggState *winstate, RPRNFAContext *ctx, + int64 currentPos, bool hasLimitedFrame, int64 frameOffset) +{ + /* Skip already-completed contexts */ + if (ctx->states == NULL) + return; + + /* Check frame boundary */ + if (hasLimitedFrame) + { + int64 ctxFrameEnd = ctx->matchStartRow + frameOffset + 1; + if (currentPos >= ctxFrameEnd) + { + nfa_step(winstate, ctx, NULL, ctxFrameEnd - 1); + return; + } + } + + /* Process states for this context */ + nfa_step(winstate, ctx, winstate->nfaVarMatched, currentPos); +} + /* * nfa_step_single * @@ -5688,15 +5647,21 @@ nfa_step_single(WindowAggState *winstate, RPRNFAContext *ctx, { /* * Variable: check if it matches current row. - * With DEFINE-first ordering, varId < numDefines has DEFINE expr, + * If varMatched is NULL (boundary), all variables are forced to false. + * Otherwise, varId < numDefines uses DEFINE expr, * varId >= numDefines defaults to TRUE. */ - int numDefines = list_length(winstate->defineVariableList); - - if (elem->varId >= numDefines) - matched = true; /* Not defined in DEFINE, defaults to TRUE */ + if (varMatched == NULL) + matched = false; else - matched = varMatched[elem->varId]; + { + int numDefines = list_length(winstate->defineVariableList); + + if (elem->varId >= numDefines) + matched = true; /* Not defined in DEFINE, defaults to TRUE */ + else + matched = varMatched[elem->varId]; + } count = state->counts[depth]; -- 2.50.1 (Apple Git-155)