From 07251e04688c389938dd268c28b1a00f25bec931 Mon Sep 17 00:00:00 2001 From: Henson Choi Date: Wed, 14 Jan 2026 23:43:13 +0900 Subject: [PATCH] Row pattern recognition: Implement streaming NFA-based pattern matching --- src/backend/commands/explain.c | 143 ++ src/backend/executor/nodeWindowAgg.c | 2067 +++++++++--------- src/backend/nodes/copyfuncs.c | 39 + src/backend/nodes/equalfuncs.c | 33 + src/backend/nodes/outfuncs.c | 51 + src/backend/nodes/readfuncs.c | 79 + src/backend/optimizer/plan/Makefile | 1 + src/backend/optimizer/plan/createplan.c | 68 +- src/backend/optimizer/plan/meson.build | 1 + src/backend/optimizer/plan/rpr.c | 1024 +++++++++ src/backend/parser/Makefile | 1 + src/backend/parser/README | 1 + src/backend/parser/gram.y | 281 ++- src/backend/parser/meson.build | 1 + src/backend/parser/parse_clause.c | 262 +-- src/backend/parser/parse_rpr.c | 340 +++ src/backend/parser/scan.l | 4 +- src/backend/utils/adt/ruleutils.c | 125 +- src/include/nodes/execnodes.h | 63 +- src/include/nodes/parsenodes.h | 43 +- src/include/nodes/plannodes.h | 73 +- src/include/optimizer/rpr.h | 46 + src/include/parser/parse_clause.h | 3 + src/include/parser/parse_rpr.h | 22 + src/test/regress/expected/rpr.out | 2632 ++++++++++++++++++----- src/test/regress/sql/rpr.sql | 827 +++++++ src/tools/pgindent/typedefs.list | 7 + 27 files changed, 6297 insertions(+), 1940 deletions(-) create mode 100644 src/backend/optimizer/plan/rpr.c create mode 100644 src/backend/parser/parse_rpr.c create mode 100644 src/include/optimizer/rpr.h create mode 100644 src/include/parser/parse_rpr.h diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index b7bb111688c..969c9195864 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -29,6 +29,7 @@ #include "nodes/extensible.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "optimizer/rpr.h" #include "parser/analyze.h" #include "parser/parsetree.h" #include "rewrite/rewriteHandler.h" @@ -117,6 +118,8 @@ static void show_window_def(WindowAggState *planstate, static void show_window_keys(StringInfo buf, PlanState *planstate, int nkeys, AttrNumber *keycols, List *ancestors, ExplainState *es); +static void append_rpr_quantifier(StringInfo buf, RPRPatternElement *elem); +static char *deparse_rpr_pattern(RPRPattern *pattern); static void show_storage_info(char *maxStorageType, int64 maxSpaceUsed, ExplainState *es); static void show_tablesample(TableSampleClause *tsc, PlanState *planstate, @@ -2889,6 +2892,134 @@ show_sortorder_options(StringInfo buf, Node *sortexpr, } } +/* + * Append quantifier suffix for a pattern element. + */ +static void +append_rpr_quantifier(StringInfo buf, RPRPatternElement *elem) +{ + if (elem->min == 1 && elem->max == 1) + return; /* no quantifier */ + else if (elem->min == 0 && elem->max == RPR_QUANTITY_INF) + appendStringInfoChar(buf, '*'); + else if (elem->min == 1 && elem->max == RPR_QUANTITY_INF) + appendStringInfoChar(buf, '+'); + else if (elem->min == 0 && elem->max == 1) + appendStringInfoChar(buf, '?'); + else if (elem->max == RPR_QUANTITY_INF) + appendStringInfo(buf, "{%d,}", elem->min); + else if (elem->min == elem->max) + appendStringInfo(buf, "{%d}", elem->min); + else + appendStringInfo(buf, "{%d,%d}", elem->min, elem->max); + + if (RPRElemIsReluctant(elem)) + appendStringInfoChar(buf, '?'); +} + +/* + * Deparse a compiled RPRPattern (bytecode) back to pattern string. + * Simple approach: output parens for each depth level as-is. + */ +static char * +deparse_rpr_pattern(RPRPattern *pattern) +{ + StringInfoData buf; + int i; + RPRDepth prevDepth = 0; + bool needSpace = false; + RPRElemIdx altSepAt = RPR_ELEMIDX_INVALID; + + if (pattern == NULL || pattern->numElements == 0) + return NULL; + + initStringInfo(&buf); + + for (i = 0; i < pattern->numElements; i++) + { + RPRPatternElement *elem = &pattern->elements[i]; + + if (RPRElemIsFin(elem)) + break; + + /* Alternation separator */ + if (altSepAt == i) + { + appendStringInfoString(&buf, " | "); + needSpace = false; + altSepAt = RPR_ELEMIDX_INVALID; + } + + if (RPRElemIsAlt(elem)) + { + /* Open parens up to ALT's content depth */ + while (prevDepth <= elem->depth) + { + if (needSpace) + appendStringInfoChar(&buf, ' '); + appendStringInfoChar(&buf, '('); + prevDepth++; + needSpace = false; + } + continue; + } + + if (RPRElemIsEnd(elem)) + { + /* Close down to END's depth, output quantifier */ + while (prevDepth > elem->depth + 1) + { + appendStringInfoChar(&buf, ')'); + prevDepth--; + } + appendStringInfoChar(&buf, ')'); + append_rpr_quantifier(&buf, elem); + prevDepth = elem->depth; + needSpace = true; + continue; + } + + if (RPRElemIsVar(elem)) + { + /* Open parens for depth increase */ + while (prevDepth < elem->depth) + { + if (needSpace) + appendStringInfoChar(&buf, ' '); + appendStringInfoChar(&buf, '('); + prevDepth++; + needSpace = false; + } + + /* Close parens for depth decrease */ + while (prevDepth > elem->depth) + { + appendStringInfoChar(&buf, ')'); + prevDepth--; + } + + if (needSpace) + appendStringInfoChar(&buf, ' '); + + appendStringInfoString(&buf, pattern->varNames[elem->varId]); + append_rpr_quantifier(&buf, elem); + needSpace = true; + + if (elem->jump != RPR_ELEMIDX_INVALID) + altSepAt = elem->jump; + } + } + + /* Close remaining */ + while (prevDepth > 0) + { + appendStringInfoChar(&buf, ')'); + prevDepth--; + } + + return buf.data; +} + /* * Show the window definition for a WindowAgg node. */ @@ -2947,6 +3078,18 @@ show_window_def(WindowAggState *planstate, List *ancestors, ExplainState *es) appendStringInfoChar(&wbuf, ')'); ExplainPropertyText("Window", wbuf.data, es); pfree(wbuf.data); + + /* Show Row Pattern Recognition pattern if present */ + if (wagg->rpPattern != NULL) + { + char *patternStr = deparse_rpr_pattern(wagg->rpPattern); + + if (patternStr != NULL) + { + ExplainPropertyText("Pattern", patternStr, es); + pfree(patternStr); + } + } } /* diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index 6a945a1a94d..cf43c1f6127 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -42,8 +42,10 @@ #include "executor/nodeWindowAgg.h" #include "miscadmin.h" #include "nodes/nodeFuncs.h" +#include "nodes/plannodes.h" #include "optimizer/clauses.h" #include "optimizer/optimizer.h" +#include "optimizer/rpr.h" #include "parser/parse_agg.h" #include "parser/parse_coerce.h" #include "regex/regex.h" @@ -173,24 +175,6 @@ typedef struct WindowStatePerAggData bool restart; /* need to restart this agg in this cycle? */ } WindowStatePerAggData; -/* - * Set of StringInfo. Used in RPR. - */ -#define STRSET_FROZEN (1 << 0) /* string is frozen */ -#define STRSET_DISCARDED (1 << 1) /* string is scheduled to be discarded */ -#define STRSET_MATCHED (1 << 2) /* string is confirmed to be matched - * with pattern */ - -typedef struct StringSet -{ - StringInfo *str_set; - Size set_size; /* current array allocation size in number of - * items */ - int set_index; /* current used size */ - int *info; /* an array of information bit per StringInfo. - * see above */ -} StringSet; - /* * Structure used by check_rpr_navigation() and rpr_navigation_walker(). */ @@ -269,44 +253,29 @@ static void register_reduced_frame_map(WindowAggState *winstate, int64 pos, static void clear_reduced_frame_map(WindowAggState *winstate); static void update_reduced_frame(WindowObject winobj, int64 pos); -static int64 evaluate_pattern(WindowObject winobj, int64 current_pos, - char *vname, StringInfo encoded_str, bool *result); - -static bool get_slots(WindowObject winobj, int64 current_pos); - -static int search_str_set(WindowAggState *winstate, - StringSet *input_str_set); -static StringSet *generate_patterns(WindowAggState *winstate, StringSet *input_str_set, - char *pattern, VariablePos *variable_pos, - char tail_pattern_initial); -static int add_pattern(WindowAggState *winstate, StringInfo old, int old_info, - StringSet *new_str_set, - char c, char *pattern, char tail_pattern_initial, - int resultlen); -static int freeze_pattern(WindowAggState *winstate, StringInfo old, int old_info, - StringSet *new_str_set, - char *pattern, int resultlen); -static char pattern_initial(WindowAggState *winstate, char *vname); -static int do_pattern_match(WindowAggState *winstate, char *pattern, char *encoded_str, int len); -static void pattern_regfree(WindowAggState *winstate); - -static StringSet *string_set_init(void); -static void string_set_add(StringSet *string_set, StringInfo str, int info); -static StringInfo string_set_get(StringSet *string_set, int index, int *flag); -static int string_set_get_size(StringSet *string_set); -static void string_set_discard(StringSet *string_set); -static VariablePos *variable_pos_init(void); -static void variable_pos_register(VariablePos *variable_pos, char initial, - int pos); -static bool variable_pos_compare(VariablePos *variable_pos, - char initial1, char initial2); -static int variable_pos_fetch(VariablePos *variable_pos, char initial, - int index); -static VariablePos *variable_pos_build(WindowAggState *winstate); - static void check_rpr_navigation(Node *node, bool is_prev); static bool rpr_navigation_walker(Node *node, void *context); +/* NFA-based pattern matching functions */ +static RPRNFAState *nfa_state_alloc(WindowAggState *winstate); +static void nfa_state_free(WindowAggState *winstate, RPRNFAState *state); +static void nfa_state_free_list(WindowAggState *winstate, RPRNFAState *list); +static RPRNFAState *nfa_state_clone(WindowAggState *winstate, int16 elemIdx, + int16 altPriority, int16 *counts, + RPRNFAState *list); +static bool nfa_evaluate_row(WindowObject winobj, int64 pos, bool *varMatched, bool *anyMatch); +static RPRNFAContext *nfa_context_alloc(WindowAggState *winstate); +static void nfa_unlink_context(WindowAggState *winstate, RPRNFAContext *ctx); +static void nfa_context_free(WindowAggState *winstate, RPRNFAContext *ctx); +static void nfa_start_context(WindowAggState *winstate, int64 startPos); +static void nfa_step_single(WindowAggState *winstate, RPRNFAContext *ctx, + RPRNFAState *state, bool *varMatched, int64 currentPos); +static void nfa_finalize_boundary(WindowAggState *winstate, RPRNFAContext *ctx, + int64 matchEndPos); +static RPRNFAContext *nfa_find_context_for_pos(WindowAggState *winstate, int64 pos); +static void nfa_remove_contexts_up_to(WindowAggState *winstate, int64 endPos); +static void nfa_absorb_contexts(WindowAggState *winstate, RPRNFAContext *excludeCtx, int64 currentPos); + /* * Not null info bit array consists of 2-bit items */ @@ -1623,6 +1592,13 @@ release_partition(WindowAggState *winstate) tuplestore_clear(winstate->buffer); winstate->partition_spooled = false; winstate->next_partition = true; + + /* Reset NFA state for new partition */ + winstate->nfaContext = NULL; + winstate->nfaContextTail = NULL; + winstate->nfaContextFree = NULL; + winstate->nfaStateFree = NULL; + winstate->nfaLastProcessedRow = -1; } /* @@ -2985,9 +2961,15 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) /* Set up SKIP TO type */ winstate->rpSkipTo = node->rpSkipTo; - /* Set up row pattern recognition PATTERN clause */ - winstate->patternVariableList = node->patternVariable; - winstate->patternRegexpList = node->patternRegexp; + /* Set up row pattern recognition PATTERN clause (compiled NFA) */ + winstate->rpPattern = node->rpPattern; + + /* Calculate NFA state size for allocation */ + if (node->rpPattern != NULL) + { + winstate->nfaStateSize = offsetof(RPRNFAState, counts) + + sizeof(int16) * node->rpPattern->maxDepth; + } /* Set up row pattern recognition DEFINE clause */ winstate->defineInitial = node->defineInitial; @@ -3019,16 +3001,24 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) lappend(winstate->defineClauseList, exps); } } - /* set up regular expression compiled result cache for RPR */ - winstate->patbuf = NULL; - memset(&winstate->preg, 0, sizeof(winstate->preg)); + + /* Initialize NFA free lists for row pattern matching */ + winstate->nfaContext = NULL; + winstate->nfaContextTail = NULL; + winstate->nfaContextFree = NULL; + winstate->nfaStateFree = NULL; + winstate->nfaLastProcessedRow = -1; /* - * Build variable_pos + * Allocate varMatched array for NFA evaluation. + * With the new varNames ordering (DEFINE order first), varId == defineIdx + * for all defined variables, so no mapping is needed. */ - if (winstate->defineInitial) - winstate->variable_pos = variable_pos_build(winstate); - + if (list_length(winstate->defineVariableList) > 0) + winstate->nfaVarMatched = palloc0(sizeof(bool) * + list_length(winstate->defineVariableList)); + else + winstate->nfaVarMatched = NULL; winstate->all_first = true; winstate->partition_spooled = false; winstate->more_partitions = false; @@ -3173,8 +3163,6 @@ ExecEndWindowAgg(WindowAggState *node) pfree(node->perfunc); pfree(node->peragg); - pattern_regfree(node); - outerPlan = outerPlanState(node); ExecEndNode(outerPlan); } @@ -4540,7 +4528,7 @@ WinGetFuncArgCurrent(WindowObject winobj, int argno, bool *isnull) static bool rpr_is_defined(WindowAggState *winstate) { - return winstate->patternVariableList != NIL; + return winstate->rpPattern != NULL; } /* @@ -4614,7 +4602,7 @@ row_is_in_reduced_frame(WindowObject winobj, int64 pos) break; default: - elog(ERROR, "unrecognized state: %d at: " INT64_FORMAT, + elog(ERROR, "Unrecognized state: %d at: " INT64_FORMAT, state, pos); break; } @@ -4708,1229 +4696,1196 @@ register_reduced_frame_map(WindowAggState *winstate, int64 pos, int val) /* * update_reduced_frame - * Update reduced frame info. + * Update reduced frame info using multi-context NFA pattern matching. + * + * Maintains multiple NFA contexts simultaneously, one for each potential + * match start position. This allows sharing row evaluations across contexts, + * avoiding redundant DEFINE clause evaluations when rewinding for SKIP TO + * NEXT ROW mode. + * + * Key optimizations: + * - Row evaluations (expensive DEFINE clauses) happen only once per row + * - All active contexts share the same evaluation results + * - Contexts persist across calls, enabling O(n) DEFINE evaluations */ static void update_reduced_frame(WindowObject winobj, int64 pos) { WindowAggState *winstate = winobj->winstate; - ListCell *lc1, - *lc2; - bool expression_result; - int num_matched_rows; - int64 original_pos; - bool anymatch; - StringInfo encoded_str; - StringSet *str_set; - bool greedy = false; - int64 result_pos, - i; - int init_size; + RPRNFAContext *targetCtx; + RPRNFAContext *firstCtx; + int64 matchLength = 0; + int64 currentPos; + int64 startPos; + int frameOptions = winstate->frameOptions; + bool hasLimitedFrame; + int64 frameOffset = 0; /* - * Set of pattern variables evaluated to true. Each character corresponds - * to pattern variable. Example: str_set[0] = "AB"; str_set[1] = "AC"; In - * this case at row 0 A and B are true, and A and C are true in row 1. + * Check if we have a limited frame (ROWS ... N FOLLOWING). + * Each context needs its own frame end based on matchStartRow + offset. */ - - /* initialize pattern variables set */ - str_set = string_set_init(); - - /* save original pos */ - original_pos = pos; + hasLimitedFrame = (frameOptions & FRAMEOPTION_ROWS) && + !(frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING); + if (hasLimitedFrame && winstate->endOffsetValue != 0) + frameOffset = DatumGetInt64(winstate->endOffsetValue); /* - * Calculate initial memory allocation size from the number of pattern - * variables, + * Case 1: pos is before any existing context's start position. + * This means the position was already processed and determined unmatched. + * Note: contexts are added at head with increasing positions, so we need + * to find the minimum matchStartRow (the oldest context). */ - init_size = sizeof(char) * list_length(winstate->patternVariableList) + 1; + { + int64 minStartRow = INT64_MAX; + for (firstCtx = winstate->nfaContext; firstCtx != NULL; firstCtx = firstCtx->next) + { + if (firstCtx->matchStartRow < minStartRow) + minStartRow = firstCtx->matchStartRow; + } + if (minStartRow != INT64_MAX && pos < minStartRow) + { + register_reduced_frame_map(winstate, pos, RF_UNMATCHED); + return; + } + } /* - * Check if the pattern does not include any greedy quantifier. If it does - * not, we can just apply the pattern to each row. If it succeeds, we are - * done. + * Case 2: Find existing context for this pos, or create new one. */ - foreach(lc1, winstate->patternRegexpList) + targetCtx = nfa_find_context_for_pos(winstate, pos); + if (targetCtx == NULL) { - char *quantifier = strVal(lfirst(lc1)); - - if (*quantifier == '+' || *quantifier == '*' || *quantifier == '?') - { - greedy = true; - break; - } + /* No context exists - create one and start fresh */ + nfa_start_context(winstate, pos); + targetCtx = winstate->nfaContext; } /* - * Non greedy case + * Determine where to start processing. + * If we've already evaluated rows beyond pos, continue from there. + */ + startPos = Max(pos, winstate->nfaLastProcessedRow + 1); + + /* + * Process rows until target context completes or we hit boundaries. + * Each row evaluation is shared across all active contexts. */ - if (!greedy) + for (currentPos = startPos; targetCtx->states != NULL; currentPos++) { - num_matched_rows = 0; - encoded_str = makeStringInfoExt(init_size); + bool rowExists; + bool anyMatch; + RPRNFAContext *ctx; - foreach(lc1, winstate->patternVariableList) - { - char *vname = strVal(lfirst(lc1)); -#ifdef RPR_DEBUG - printf("pos:" INT64_FORMAT " pattern vname: %s\n", - pos, vname); -#endif - expression_result = false; + /* Evaluate variables for this row - done only once, shared by all contexts */ + rowExists = nfa_evaluate_row(winobj, currentPos, winstate->nfaVarMatched, &anyMatch); - /* evaluate row pattern against current row */ - result_pos = evaluate_pattern(winobj, pos, vname, - encoded_str, &expression_result); - if (!expression_result || result_pos < 0) + /* No more rows in partition? Finalize all contexts */ + if (!rowExists) + { + for (ctx = winstate->nfaContext; ctx != NULL; ctx = ctx->next) { -#ifdef RPR_DEBUG - printf("expression result is false or out of frame\n"); -#endif - register_reduced_frame_map(winstate, original_pos, - RF_UNMATCHED); - destroyStringInfo(encoded_str); - return; + if (ctx->states != NULL) + nfa_finalize_boundary(winstate, ctx, currentPos - 1); } - /* move to next row */ - pos++; - num_matched_rows++; - } - destroyStringInfo(encoded_str); -#ifdef RPR_DEBUG - printf("pattern matched\n"); -#endif - register_reduced_frame_map(winstate, original_pos, RF_FRAME_HEAD); - - for (i = original_pos + 1; i < original_pos + num_matched_rows; i++) - { - register_reduced_frame_map(winstate, i, RF_SKIPPED); + break; } - return; - } - /* - * Greedy quantifiers included. Loop over until none of pattern matches or - * encounters end of frame. - */ - for (;;) - { - result_pos = -1; + /* Update last processed row */ + winstate->nfaLastProcessedRow = currentPos; /* - * Loop over each PATTERN variable. + * Process each active context with this row's evaluation results. + * Each context has its own frame boundary based on matchStartRow. */ - anymatch = false; - - encoded_str = makeStringInfoExt(init_size); - - forboth(lc1, winstate->patternVariableList, lc2, - winstate->patternRegexpList) + for (ctx = winstate->nfaContext; ctx != NULL; ctx = ctx->next) { - char *vname = strVal(lfirst(lc1)); -#ifdef RPR_DEBUG - char *quantifier = strVal(lfirst(lc2)); - - printf("pos: " INT64_FORMAT " pattern vname: %s quantifier: %s\n", - pos, vname, quantifier); -#endif - expression_result = false; + int64 ctxFrameEnd; - /* evaluate row pattern against current row */ - result_pos = evaluate_pattern(winobj, pos, vname, - encoded_str, &expression_result); - if (expression_result) - { -#ifdef RPR_DEBUG - printf("expression result is true\n"); -#endif - anymatch = true; - } + /* Skip already-completed contexts */ + if (ctx->states == NULL) + continue; /* - * If out of frame, we are done. + * Calculate per-context frame end. + * For "ROWS BETWEEN CURRENT ROW AND N FOLLOWING", each context's + * frame end is matchStartRow + offset + 1 (exclusive). */ - if (result_pos < 0) - break; - } + if (hasLimitedFrame) + { + ctxFrameEnd = ctx->matchStartRow + frameOffset + 1; + if (currentPos >= ctxFrameEnd) + { + nfa_finalize_boundary(winstate, ctx, ctxFrameEnd - 1); + continue; + } + } - if (!anymatch) - { - /* none of patterns matched. */ - break; - } + /* First row of this context must match at least one variable */ + if (currentPos == ctx->matchStartRow && !anyMatch) + { + /* Clear states to mark as unmatched */ + nfa_state_free_list(winstate, ctx->states); + ctx->states = NULL; + continue; + } - string_set_add(str_set, encoded_str, 0); + /* Skip if this row is before context's start */ + if (currentPos < ctx->matchStartRow) + continue; -#ifdef RPR_DEBUG - printf("pos: " INT64_FORMAT " encoded_str: %s\n", - pos, encoded_str->data); -#endif + /* Process states for this context */ + { + RPRNFAState *states = ctx->states; + RPRNFAState *state; + RPRNFAState *nextState; - /* move to next row */ - pos++; + ctx->states = NULL; - if (result_pos < 0) - { - /* out of frame */ - break; + for (state = states; state != NULL; state = nextState) + { + nextState = state->next; + state->next = NULL; + nfa_step_single(winstate, ctx, state, winstate->nfaVarMatched, currentPos); + } + } } - } - if (string_set_get_size(str_set) == 0) - { - /* no match found in the first row */ - register_reduced_frame_map(winstate, original_pos, RF_UNMATCHED); - destroyStringInfo(encoded_str); - return; - } + /* + * Create a new context for the next potential start position. + * This enables overlapping match detection for SKIP TO NEXT ROW. + */ + if (anyMatch) + nfa_start_context(winstate, currentPos + 1); -#ifdef RPR_DEBUG - printf("pos: " INT64_FORMAT " encoded_str: %s\n", - pos, encoded_str->data); -#endif + /* + * Absorb redundant contexts. + * At the same elementIndex, if newer context's count <= older context's count, + * the newer context can be absorbed (for unbounded quantifiers). + * Note: Never absorb targetCtx - it's the context we're trying to complete. + */ + nfa_absorb_contexts(winstate, targetCtx, currentPos); - /* look for matching pattern variable sequence */ -#ifdef RPR_DEBUG - printf("search_str_set started\n"); -#endif - num_matched_rows = search_str_set(winstate, str_set); + /* Check if target context is now complete */ + if (targetCtx->states == NULL) + break; + } -#ifdef RPR_DEBUG - printf("search_str_set returns: %d\n", num_matched_rows); -#endif - string_set_discard(str_set); + /* + * Get match result from target context. + */ + if (targetCtx->matchEndRow >= pos) + matchLength = targetCtx->matchEndRow - pos + 1; /* - * We are at the first row in the reduced frame. Save the number of - * matched rows as the number of rows in the reduced frame. + * Register reduced frame map based on match result. */ - if (num_matched_rows <= 0) + if (matchLength <= 0) { - /* no match */ - register_reduced_frame_map(winstate, original_pos, RF_UNMATCHED); + register_reduced_frame_map(winstate, pos, RF_UNMATCHED); } else { - register_reduced_frame_map(winstate, original_pos, RF_FRAME_HEAD); - - for (i = original_pos + 1; i < original_pos + num_matched_rows; i++) + register_reduced_frame_map(winstate, pos, RF_FRAME_HEAD); + for (int64 i = pos + 1; i < pos + matchLength; i++) { register_reduced_frame_map(winstate, i, RF_SKIPPED); } } - return; -} - -/* - * search_str_set - * - * Perform pattern matching using "pattern" against input_str_set. pattern is - * a regular expression string derived from PATTERN clause. Note that the - * regular expression string is prefixed by '^' and followed by initials - * represented in a same way as str_set. str_set is a set of StringInfo. Each - * StringInfo has a string comprising initials of pattern variable strings - * being true in a row. The initials are one of [a-z], parallel to the order - * of variable names in DEFINE clause. Suppose DEFINE has variables START, UP - * and DOWN. If PATTERN has START, UP+ and DOWN, then the initials in PATTERN - * will be 'a', 'b' and 'c'. The "pattern" will be "^ab+c". - * - * variable_pos is an array representing the order of pattern variable string - * initials in PATTERN clause. For example initial 'a' potion is in - * variable_pos[0].pos[0] = 0. Note that if the pattern is "START UP DOWN UP" - * (UP appears twice), then "UP" (initial is 'b') has two position 1 and - * 3. Thus variable_pos for b is variable_pos[1].pos[0] = 1 and - * variable_pos[1].pos[1] = 3. - * - * Returns the longest number of the matching rows (greedy matching) if - * quatifier '+' or '*' is included in "pattern". - */ -static int -search_str_set(WindowAggState *winstate, StringSet *input_str_set) -{ - char *pattern; /* search regexp pattern */ - VariablePos *variable_pos; - int set_size; /* number of rows in the set */ - int resultlen; - int index; - StringSet *new_str_set; - int new_str_size; - int len; - int info; - char tail_pattern_initial; - /* - * Set last initial char to tail_pattern_initial if we can apply "tail - * pattern initial optimization". If the last regexp component in pattern - * is with '+' quatifier, set the initial to tail_pattern_initial. For - * example if pattern = "ab+", tail_pattern_initial will be 'b'. - * Otherwise, tail_pattern_initial is '\0'. + * Cleanup contexts based on SKIP mode. */ - pattern = winstate->pattern_str->data; - if (pattern[strlen(pattern) - 1] == '+') - tail_pattern_initial = pattern[strlen(pattern) - 2]; + if (matchLength > 0 && winstate->rpSkipTo == ST_PAST_LAST_ROW) + { + /* Remove all contexts with start <= matchEnd */ + nfa_remove_contexts_up_to(winstate, pos + matchLength - 1); + } else - tail_pattern_initial = '\0'; - - /* - * Generate all possible pattern variable name initials as a set of - * StringInfo named "new_str_set". For example, if we have two rows - * having "ab" (row 0) and "ac" (row 1) in the input str_set, new_str_set - * will have set of StringInfo "aa", "ac", "ba" and "bc" in the end. - */ - variable_pos = winstate->variable_pos; - new_str_set = generate_patterns(winstate, input_str_set, pattern, variable_pos, - tail_pattern_initial); - - /* - * Perform pattern matching to find out the longest match. - */ - new_str_size = string_set_get_size(new_str_set); - len = 0; - resultlen = 0; - set_size = string_set_get_size(input_str_set); - - for (index = 0; index < new_str_size; index++) { - StringInfo s; - - s = string_set_get(new_str_set, index, &info); - if (s == NULL) - continue; /* no data */ - - /* - * If the string is scheduled to be discarded, we just disregard it. - */ - if (info & STRSET_DISCARDED) - continue; + /* SKIP TO NEXT ROW or no match: just remove the target context */ + RPRNFAContext *ctx = winstate->nfaContext; - len = do_pattern_match(winstate, pattern, s->data, s->len); - if (len > resultlen) + while (ctx != NULL) { - /* remember the longest match */ - resultlen = len; - - /* - * If the size of result set is equal to the number of rows in the - * set, we are done because it's not possible that the number of - * matching rows exceeds the number of rows in the set. - */ - if (resultlen >= set_size) + if (ctx == targetCtx) + { + nfa_unlink_context(winstate, ctx); + nfa_context_free(winstate, ctx); break; + } + ctx = ctx->next; } } - - /* we no longer need new string set */ - string_set_discard(new_str_set); - - return resultlen; } /* - * generate_patterns + * NFA-based pattern matching implementation * - * Generate all possible pattern variable name initials in 'input_str_set' as - * a set of StringInfo and return it. For example, if we have two rows having - * "ab" (row 0) and "ac" (row 1) in 'input str_set', returned StringSet will - * have set of StringInfo "aa", "ac", "ba" and "bc" in the end. - * 'variable_pos' and 'tail_pattern_initial' are used for pruning - * optimization. + * These functions implement direct NFA execution using the compiled + * RPRPattern structure, avoiding regex compilation overhead. */ -static StringSet * -generate_patterns(WindowAggState *winstate, StringSet *input_str_set, - char *pattern, VariablePos *variable_pos, - char tail_pattern_initial) -{ - StringSet *old_str_set, - *new_str_set; - int index; - int set_size; - int old_set_size; - int info; - int resultlen; - StringInfo str; - int i; - char *p; - - new_str_set = string_set_init(); - set_size = string_set_get_size(input_str_set); - if (set_size == 0) /* if there's no row in input, return empty - * set */ - return new_str_set; - - resultlen = 0; - /* - * Generate initial new_string_set for input row 0. - */ - str = string_set_get(input_str_set, 0, &info); - p = str->data; +/* + * nfa_state_alloc + * + * Allocate an NFA state, reusing from freeList if available. + * freeList is stored in WindowAggState for reuse across match attempts. + * Uses flexible array member for counts[]. + */ +static RPRNFAState * +nfa_state_alloc(WindowAggState *winstate) +{ + RPRNFAState *state; + int maxDepth = winstate->rpPattern->maxDepth; - /* - * Loop over each new pattern variable char. - */ - while (*p) + /* Try to reuse from free list first */ + if (winstate->nfaStateFree != NULL) { - StringInfo new = makeStringInfo(); - - /* add pattern variable char */ - appendStringInfoChar(new, *p); - /* add new one to string set */ - string_set_add(new_str_set, new, 0); - p++; /* next pattern variable */ + state = winstate->nfaStateFree; + winstate->nfaStateFree = state->next; } - - /* - * Generate new_string_set for each input row. - */ - for (index = 1; index < set_size; index++) + else { - /* previous new str set now becomes old str set */ - old_str_set = new_str_set; - new_str_set = string_set_init(); /* create new string set */ - /* pick up input string */ - str = string_set_get(input_str_set, index, &info); - old_set_size = string_set_get_size(old_str_set); - - /* - * Loop over each row in the previous result set. - */ - for (i = 0; i < old_set_size; i++) - { - char last_old_char; - int old_str_len; - int old_info; - StringInfo old; - - old = string_set_get(old_str_set, i, &old_info); - p = old->data; - old_str_len = old->len; - if (old_str_len > 0) - last_old_char = p[old_str_len - 1]; - else - last_old_char = '\0'; - - /* Can this old set be discarded? */ - if (old_info & STRSET_DISCARDED) - continue; /* discard the old string */ + /* Allocate in partition context for proper lifetime */ + MemoryContext oldContext = MemoryContextSwitchTo(winstate->partcontext); + state = palloc(winstate->nfaStateSize); + MemoryContextSwitchTo(oldContext); + } - /* Is this old set frozen? */ - else if (old_info & STRSET_FROZEN) - { - /* if shorter match. we can discard it */ - if (old_str_len < resultlen) - continue; /* discard the shorter string */ + /* initialize state - clear all depth counts */ + state->next = NULL; + state->elemIdx = 0; + state->altPriority = 0; + /* Initialize all depth counts to 0 using memset */ + if (maxDepth > 0) + memset(state->counts, 0, sizeof(int16) * maxDepth); - /* move the old set to new_str_set */ - string_set_add(new_str_set, old, old_info); - old_str_set->str_set[i] = NULL; - continue; - } + return state; +} - /* - * loop over each pattern variable initial char in the input set. - */ - for (p = str->data; *p; p++) - { - /* - * Optimization. Check if the row's pattern variable initial - * character position is greater than or equal to the old - * set's last pattern variable initial character position. For - * example, if the old set's last pattern variable initials - * are "ab", then the new pattern variable initial can be "b" - * or "c" but can not be "a", if the initials in PATTERN is - * something like "a b c" or "a b+ c+" etc. This optimization - * is possible when we only allow "+" quantifier. - */ - if (variable_pos_compare(variable_pos, last_old_char, *p)) +/* + * nfa_state_free + * + * Return a state to the free list for later reuse. + */ +static void +nfa_state_free(WindowAggState *winstate, RPRNFAState *state) +{ + state->next = winstate->nfaStateFree; + winstate->nfaStateFree = state; +} - /* - * Satisfied the condition. Add new pattern char to - * new_str_set if it looks good. - */ - resultlen = add_pattern(winstate, old, old_info, new_str_set, *p, - pattern, tail_pattern_initial, resultlen); - else +/* + * nfa_state_free_list + * + * Free all states in a list using pfree. + */ +static void +nfa_state_free_list(WindowAggState *winstate, RPRNFAState *list) +{ + RPRNFAState *state; - /* - * The old_str did not satisfy the condition and it cannot - * be extended further. "Freeze" it. - */ - resultlen = freeze_pattern(winstate, old, old_info, - new_str_set, pattern, resultlen); - } - } - /* we no longer need old string set */ - string_set_discard(old_str_set); - } - return new_str_set; + while(list != NULL) + { + state = list; + list = list->next; + nfa_state_free(winstate, state); + } } /* - * add_pattern + * nfa_states_equal * - * Make a copy of 'old' (along with 'old_info' flag) and add new pattern char - * 'c' to it. Then add it to 'new_str_set'. 'pattern' and - * 'tail_pattern_initial' is checked to determine whether the copy is worth to - * add to new_str_set or not. The match length (possibly longer than - * 'resultlen') is returned. + * Check if two states are equivalent (same elemIdx and counts). */ -static int -add_pattern(WindowAggState *winstate, StringInfo old, int old_info, - StringSet *new_str_set, char c, char *pattern, char tail_pattern_initial, - int resultlen) +static bool +nfa_states_equal(WindowAggState *winstate, RPRNFAState *s1, RPRNFAState *s2) { - StringInfo new; - int info; - int len; + int maxDepth = winstate->rpPattern->maxDepth; - /* - * New char in the input row satisfies the condition above. - */ - new = makeStringInfoExt(old->len + 1); /* copy source string */ - appendStringInfoString(new, old->data); + if (s1->elemIdx != s2->elemIdx) + return false; - /* add pattern variable char */ - appendStringInfoChar(new, c); + if (maxDepth > 0 && + memcmp(s1->counts, s2->counts, sizeof(int16) * maxDepth) != 0) + return false; - /* - * Adhoc optimization. If the first letter in the input string is in the - * head and second position and there's no associated quatifier '+', then - * we can dicard the input because there's no chance to expand the string - * further. - * - * For example, pattern "abc" cannot match "aa". - */ - if (pattern[1] == new->data[0] && - pattern[1] == new->data[1] && - pattern[2] != '+' && - pattern[1] != pattern[2]) - { - destroyStringInfo(new); - return resultlen; - } + return true; +} - info = old_info; +/* + * nfa_add_state_unique + * + * Add a state to ctx->states at the END, only if no duplicate exists. + * Returns true if state was added, false if duplicate found (state is freed). + */ +static bool +nfa_add_state_unique(WindowAggState *winstate, RPRNFAContext *ctx, RPRNFAState *state) +{ + RPRNFAState *s; + RPRNFAState *prev = NULL; + RPRNFAState *tail = NULL; - /* - * Check if we can apply "tail pattern initial optimization". If the last - * regexp component in pattern has '+' quantifier, the component is set to - * the last pattern initial. For example if pattern is "ab+", - * tail_pattern_initial will become 'b'. Otherwise, tail_pattern_initial - * is '\0'. If the tail pattern initial optimization is possible, we do - * not need to apply regular expression match again. Suppose we have the - * previous string ended with "b" and the it was confirmed the regular - * expression match, then char 'b' can be added to the string without - * applying the regular expression match again. - */ - if (c == tail_pattern_initial) /* tail pattern initial optimization - * possible? */ + /* Check for duplicate and find tail */ + for (s = ctx->states; s != NULL; s = s->next) { - /* - * Is already confirmed to be matched with pattern? - */ - if ((info & STRSET_MATCHED) == 0) + if (nfa_states_equal(winstate, s, state)) { - /* not confirmed yet */ - len = do_pattern_match(winstate, pattern, new->data, new->len); - if (len > 0) - info = STRSET_MATCHED; /* set already confirmed flag */ - } - else - /* - * already confirmed. Use the string length as the matching length + * Duplicate found - keep lower altPriority for lexical order. + * Lower altPriority means earlier alternative in pattern. */ - len = new->len; - - /* update the longest match length if needed */ - if (len > resultlen) - resultlen = len; + if (state->altPriority < s->altPriority) + { + /* New state has better priority, replace existing */ + state->next = s->next; + if (prev == NULL) + ctx->states = state; + else + prev->next = state; + nfa_state_free(winstate, s); + return true; + } + /* Existing state has better/equal priority, discard new */ + nfa_state_free(winstate, state); + return false; + } + prev = s; + tail = s; } - /* add new StringInfo to the string set */ - string_set_add(new_str_set, new, info); + /* No duplicate, add at end */ + state->next = NULL; + if (tail == NULL) + ctx->states = state; + else + tail->next = state; - return resultlen; + return true; } /* - * freeze_pattern + * nfa_state_clone * - * "Freeze" 'old' (along with 'old_info' flag) and add it to - * 'new_str_set'. Frozen string is known to not be expanded further. The frozen - * string is check if it satisfies 'pattern'. If it does not, "discarded" - * mark is added. The discarded mark is also added if the match length is - * shorter than the current longest match length. The match length (possibly - * longer than 'resultlen') is returned. + * Clone a state with given elemIdx, altPriority and counts. + * Only copies counts up to elem->depth (not entire maxDepth). + * Prepends to the provided list and returns the new list head. */ -static int -freeze_pattern(WindowAggState *winstate, StringInfo old, int old_info, - StringSet *new_str_set, - char *pattern, int resultlen) +static RPRNFAState * +nfa_state_clone(WindowAggState *winstate, int16 elemIdx, int16 altPriority, + int16 *counts, RPRNFAState *list) { - int len; - StringInfo new; - int new_str_size; - int new_index; + RPRPattern *pattern = winstate->rpPattern; + int maxDepth = pattern->maxDepth; + RPRNFAState *state = nfa_state_alloc(winstate); + + state->elemIdx = elemIdx; + state->altPriority = altPriority; + /* nfa_state_alloc already zeroed all counts, now copy all depth levels */ + if (counts != NULL && maxDepth > 0) + memcpy(state->counts, counts, sizeof(int16) * maxDepth); + state->next = list; + + return state; +} - /* - * We are freezing this pattern string. If the pattern string length is - * shorter than the current longest string length, we don't need to keep - * it. - */ - if (old->len < resultlen) - return resultlen; +/* + * nfa_add_matched_state + * + * Record a matched state following SQL standard lexical order preference. + * Priority: lower altPriority wins (lexical order), then longer match. + */ +static void +nfa_add_matched_state(WindowAggState *winstate, RPRNFAContext *ctx, + RPRNFAState *state, int64 matchEndRow) +{ + bool shouldUpdate = false; - if (old_info & STRSET_MATCHED) - /* we don't need to apply pattern match again */ - len = old->len; - else + if (ctx->matchedState == NULL) { - /* apply pattern match */ - len = do_pattern_match(winstate, pattern, old->data, old->len); - if (len <= 0) - { - /* no match. we can discard it */ - return resultlen; - } + /* No previous match, always save */ + shouldUpdate = true; } - if (len < resultlen) + else if (state->altPriority < ctx->matchedState->altPriority) { - /* shorter match. we can discard it */ - return resultlen; + /* New state has better lexical order priority */ + shouldUpdate = true; } - - /* - * Match length is the longest so far - */ - resultlen = len; /* remember the longest match */ - - /* freeze the pattern string */ - new = makeStringInfo(); - enlargeStringInfo(new, old->len + 1); - appendStringInfoString(new, old->data); - /* set frozen mark */ - string_set_add(new_str_set, new, STRSET_FROZEN); - - /* - * Search new_str_set to find out frozen entries that have shorter match - * length. Mark them as "discard" so that they are discarded in the next - * round. - */ - new_str_size = - string_set_get_size(new_str_set) - 1; - - /* loop over new_str_set */ - for (new_index = 0; new_index < new_str_size; new_index++) + else if (state->altPriority == ctx->matchedState->altPriority && + matchEndRow > ctx->matchEndRow) { - int info; - - new = string_set_get(new_str_set, new_index, &info); + /* Same priority, but longer match */ + shouldUpdate = true; + } - /* - * If this is frozen and is not longer than the current longest match - * length, we don't need to keep this. - */ - if (info & STRSET_FROZEN && new->len < resultlen) - { - /* - * mark this set to discard in the next round - */ - info |= STRSET_DISCARDED; - new_str_set->info[new_index] = info; - } + if (shouldUpdate) + { + /* Reuse existing matchedState or allocate from free list */ + if (ctx->matchedState == NULL) + ctx->matchedState = nfa_state_alloc(winstate); + + /* Copy state data */ + memcpy(ctx->matchedState, state, winstate->nfaStateSize); + ctx->matchedState->next = NULL; + ctx->matchEndRow = matchEndRow; } - return resultlen; } /* - * do_pattern_match + * nfa_free_matched_state * - * Perform pattern match using 'pattern' against 'encoded_str' whose length is - * 'len' bytes (without null terminate). Returns matching number of rows if - * matching is succeeded. Otherwise returns 0. + * Return matchedState to free list for reuse. */ -static int -do_pattern_match(WindowAggState *winstate, char *pattern, char *encoded_str, int len) +static void +nfa_free_matched_state(WindowAggState *winstate, RPRNFAState *state) { - int plen; - int cflags = REG_EXTENDED; - size_t nmatch = 1; - int eflags = 0; - regmatch_t pmatch[1]; - int sts; - pg_wchar *data; - int data_len; - - /* - * Compile regexp if cache does not exist or existing cache is not same as - * "pattern". - */ - if (winstate->patbuf == NULL || strcmp(winstate->patbuf, pattern)) - { - MemoryContext oldContext = MemoryContextSwitchTo - (winstate->ss.ps.ps_ExprContext->ecxt_per_query_memory); - - pattern_regfree(winstate); - - /* we need to convert to char to pg_wchar */ - plen = strlen(pattern); - data = (pg_wchar *) palloc((plen + 1) * sizeof(pg_wchar)); - data_len = pg_mb2wchar_with_len(pattern, data, plen); - /* compile re */ - sts = pg_regcomp(&winstate->preg, /* compiled re */ - data, /* target pattern */ - data_len, /* length of pattern */ - cflags, /* compile option */ - C_COLLATION_OID /* collation */ - ); - pfree(data); - - if (sts != REG_OKAY) - { - /* re didn't compile (no need for pg_regfree, if so) */ - ereport(ERROR, - (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), - errmsg("invalid regular expression: %s", pattern))); - } - - /* save pattern */ - winstate->patbuf = pstrdup(pattern); - MemoryContextSwitchTo(oldContext); - } - - data = (pg_wchar *) palloc((len + 1) * sizeof(pg_wchar)); - data_len = pg_mb2wchar_with_len(encoded_str, data, len); - - /* execute the regular expression match */ - sts = pg_regexec( - &winstate->preg, /* compiled re */ - data, /* target string */ - data_len, /* length of encoded_str */ - 0, /* search start */ - NULL, /* rm details */ - nmatch, /* number of match sub re */ - pmatch, /* match result details */ - eflags); - - pfree(data); - - if (sts != REG_OKAY) - { - if (sts != REG_NOMATCH) - { - char errMsg[100]; - - pg_regerror(sts, &winstate->preg, errMsg, sizeof(errMsg)); - ereport(ERROR, - (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), - errmsg("regular expression failed: %s", errMsg))); - } - return 0; /* does not match */ - } - - len = pmatch[0].rm_eo; /* return match length */ - return len; -} + if (state != NULL) + nfa_state_free(winstate, state); +} /* - * pattern_regfree + * nfa_evaluate_row * - * Free the regcomp result for PARTTERN string. + * Evaluate all DEFINE variables for current row. + * Returns true if the row exists, false if out of partition. + * If row exists, fills varMatched array and sets *anyMatch if any variable matched. + * varMatched[i] = true if variable i matched at current row. */ -static void -pattern_regfree(WindowAggState *winstate) -{ - if (winstate->patbuf != NULL) - { - pg_regfree(&winstate->preg); /* free previous re */ - pfree(winstate->patbuf); - winstate->patbuf = NULL; - } -} - -/* - * evaluate_pattern - * - * Evaluate expression associated with PATTERN variable vname. current_pos is - * relative row position in a frame (starting from 0). If vname is evaluated - * to true, initial letters associated with vname is appended to - * encode_str. result is out paramater representing the expression evaluation - * result is true of false. - *--------- - * Return values are: - * >=0: the last match absolute row position - * otherwise out of frame. - *--------- - */ -static int64 -evaluate_pattern(WindowObject winobj, int64 current_pos, - char *vname, StringInfo encoded_str, bool *result) +static bool +nfa_evaluate_row(WindowObject winobj, int64 pos, bool *varMatched, bool *anyMatchOut) { WindowAggState *winstate = winobj->winstate; ExprContext *econtext = winstate->ss.ps.ps_ExprContext; - ListCell *lc1, - *lc2, - *lc3; - ExprState *pat; - Datum eval_result; - bool out_of_frame = false; - bool isnull; + int numDefineVars = list_length(winstate->defineVariableList); + ListCell *lc; + int varIdx = 0; + bool anyMatch = false; TupleTableSlot *slot; - forthree(lc1, winstate->defineVariableList, - lc2, winstate->defineClauseList, - lc3, winstate->defineInitial) - { - char initial; /* initial letter associated with vname */ - char *name = strVal(lfirst(lc1)); - - if (strcmp(vname, name)) - continue; - - initial = *(strVal(lfirst(lc3))); - - /* set expression to evaluate */ - pat = lfirst(lc2); - - /* get current, previous and next tuples */ - if (!get_slots(winobj, current_pos)) - { - out_of_frame = true; - } - else - { - /* evaluate the expression */ - eval_result = ExecEvalExpr(pat, econtext, &isnull); - if (isnull) - { - /* expression is NULL */ -#ifdef RPR_DEBUG - printf("expression for %s is NULL at row: " INT64_FORMAT "\n", - vname, current_pos); -#endif - *result = false; - } - else - { - if (!DatumGetBool(eval_result)) - { - /* expression is false */ -#ifdef RPR_DEBUG - printf("expression for %s is false at row: " INT64_FORMAT "\n", - vname, current_pos); -#endif - *result = false; - } - else - { - /* expression is true */ -#ifdef RPR_DEBUG - printf("expression for %s is true at row: " INT64_FORMAT "\n", - vname, current_pos); -#endif - appendStringInfoChar(encoded_str, initial); - *result = true; - } - } - - slot = winstate->temp_slot_1; - if (slot != winstate->null_slot) - ExecClearTuple(slot); - slot = winstate->prev_slot; - if (slot != winstate->null_slot) - ExecClearTuple(slot); - slot = winstate->next_slot; - if (slot != winstate->null_slot) - ExecClearTuple(slot); - - break; - } - - if (out_of_frame) - { - *result = false; - return -1; - } - } - return current_pos; -} + *anyMatchOut = false; -/* - * get_slots - * - * Get current, previous and next tuples. - * Returns false if current row is out of partition/full frame. - */ -static bool -get_slots(WindowObject winobj, int64 current_pos) -{ - WindowAggState *winstate = winobj->winstate; - TupleTableSlot *slot; - int ret; - ExprContext *econtext; - - econtext = winstate->ss.ps.ps_ExprContext; + /* + * Set up slots for current, previous, and next rows. + * We don't call get_slots() here to avoid recursion through + * row_is_in_frame -> update_reduced_frame -> nfa_match_pattern. + */ - /* set up current row tuple slot */ + /* Current row -> ecxt_outertuple */ slot = winstate->temp_slot_1; - if (!window_gettupleslot(winobj, current_pos, slot)) - { -#ifdef RPR_DEBUG - printf("current row is out of partition at:" INT64_FORMAT "\n", - current_pos); -#endif - return false; - } - ret = row_is_in_frame(winobj, current_pos, slot, false); - if (ret <= 0) - { -#ifdef RPR_DEBUG - printf("current row is out of frame at: " INT64_FORMAT "\n", - current_pos); -#endif - ExecClearTuple(slot); - return false; - } + if (!window_gettupleslot(winobj, pos, slot)) + return false; /* No row exists */ econtext->ecxt_outertuple = slot; - /* for PREV */ - if (current_pos > 0) + /* Previous row -> ecxt_scantuple (for PREV) */ + if (pos > 0) { slot = winstate->prev_slot; - if (!window_gettupleslot(winobj, current_pos - 1, slot)) - { -#ifdef RPR_DEBUG - elog(DEBUG1, "previous row is out of partition at: " INT64_FORMAT, - current_pos - 1); -#endif + if (!window_gettupleslot(winobj, pos - 1, slot)) econtext->ecxt_scantuple = winstate->null_slot; - } else - { - ret = row_is_in_frame(winobj, current_pos - 1, slot, false); - if (ret <= 0) - { -#ifdef RPR_DEBUG - elog(DEBUG1, "previous row is out of frame at: " INT64_FORMAT, - current_pos - 1); -#endif - ExecClearTuple(slot); - econtext->ecxt_scantuple = winstate->null_slot; - } - else - { - econtext->ecxt_scantuple = slot; - } - } + econtext->ecxt_scantuple = slot; } else econtext->ecxt_scantuple = winstate->null_slot; - /* for NEXT */ + /* Next row -> ecxt_innertuple (for NEXT) */ slot = winstate->next_slot; - if (!window_gettupleslot(winobj, current_pos + 1, slot)) - { -#ifdef RPR_DEBUG - printf("next row is out of partiton at: " INT64_FORMAT "\n", - current_pos + 1); -#endif + if (!window_gettupleslot(winobj, pos + 1, slot)) econtext->ecxt_innertuple = winstate->null_slot; - } else + econtext->ecxt_innertuple = slot; + + foreach(lc, winstate->defineClauseList) { - ret = row_is_in_frame(winobj, current_pos + 1, slot, false); - if (ret <= 0) + ExprState *exprState = (ExprState *) lfirst(lc); + Datum result; + bool isnull; + + /* Evaluate DEFINE expression */ + result = ExecEvalExpr(exprState, econtext, &isnull); + + if (!isnull && DatumGetBool(result)) { -#ifdef RPR_DEBUG - printf("next row is out of frame at: " INT64_FORMAT "\n", - current_pos + 1); -#endif - ExecClearTuple(slot); - econtext->ecxt_innertuple = winstate->null_slot; + varMatched[varIdx] = true; + anyMatch = true; } else - econtext->ecxt_innertuple = slot; + { + varMatched[varIdx] = false; + } + + varIdx++; + if (varIdx >= numDefineVars) + break; } - return true; + + *anyMatchOut = anyMatch; + return true; /* Row exists */ } /* - * pattern_initial + * nfa_context_alloc * - * Return pattern variable initial character - * matching with pattern variable name vname. - * If not found, return 0. + * Allocate an NFA context from free list or palloc. */ -static char -pattern_initial(WindowAggState *winstate, char *vname) +static RPRNFAContext * +nfa_context_alloc(WindowAggState *winstate) { - char initial; - char *name; - ListCell *lc1, - *lc2; + RPRNFAContext *ctx; - forboth(lc1, winstate->defineVariableList, - lc2, winstate->defineInitial) + if (winstate->nfaContextFree != NULL) { - name = strVal(lfirst(lc1)); /* DEFINE variable name */ - initial = *(strVal(lfirst(lc2))); /* DEFINE variable initial */ + ctx = winstate->nfaContextFree; + winstate->nfaContextFree = ctx->next; + } + else + { + /* Allocate in partition context for proper lifetime */ + MemoryContext oldContext = MemoryContextSwitchTo(winstate->partcontext); + ctx = palloc(sizeof(RPRNFAContext)); + MemoryContextSwitchTo(oldContext); + } + ctx->next = NULL; + ctx->prev = NULL; + ctx->states = NULL; + ctx->matchStartRow = -1; + ctx->matchEndRow = -1; + ctx->matchedState = NULL; - if (!strcmp(name, vname)) - return initial; /* found */ - } - return 0; + return ctx; } /* - * string_set_init + * nfa_unlink_context * - * Create dynamic set of StringInfo. + * Remove a context from the doubly-linked active context list. + * Updates head (nfaContext) and tail (nfaContextTail) as needed. */ -static StringSet * -string_set_init(void) +static void +nfa_unlink_context(WindowAggState *winstate, RPRNFAContext *ctx) { -/* Initial allocation size of str_set */ -#define STRING_SET_ALLOC_SIZE 1024 - - StringSet *string_set; - Size set_size; + if (ctx->prev != NULL) + ctx->prev->next = ctx->next; + else + winstate->nfaContext = ctx->next; /* was head */ - string_set = palloc0(sizeof(StringSet)); - string_set->set_index = 0; - set_size = STRING_SET_ALLOC_SIZE; - string_set->str_set = palloc(set_size * sizeof(StringInfo)); - string_set->info = palloc0(set_size * sizeof(int)); - string_set->set_size = set_size; + if (ctx->next != NULL) + ctx->next->prev = ctx->prev; + else + winstate->nfaContextTail = ctx->prev; /* was tail */ - return string_set; + ctx->next = NULL; + ctx->prev = NULL; } /* - * string_set_add + * nfa_context_free * - * Add StringInfo str to StringSet string_set. + * Return a context to free list. Also frees any states in the context. + * Note: Caller must unlink context from active list first using nfa_unlink_context. */ static void -string_set_add(StringSet *string_set, StringInfo str, int info) +nfa_context_free(WindowAggState *winstate, RPRNFAContext *ctx) { - Size set_size; - Size old_set_size; - - set_size = string_set->set_size; - if (string_set->set_index >= set_size) - { - old_set_size = set_size; - set_size *= 2; - string_set->str_set = repalloc(string_set->str_set, - set_size * sizeof(StringInfo)); - string_set->info = repalloc0(string_set->info, - old_set_size * sizeof(int), - set_size * sizeof(int)); - string_set->set_size = set_size; - } - - string_set->info[string_set->set_index] = info; - string_set->str_set[string_set->set_index++] = str; - - return; + if (ctx->states != NULL) + nfa_state_free_list(winstate, ctx->states); + if (ctx->matchedState != NULL) + nfa_free_matched_state(winstate, ctx->matchedState); + + ctx->states = NULL; + ctx->matchedState = NULL; + ctx->next = winstate->nfaContextFree; + winstate->nfaContextFree = ctx; } /* - * string_set_get + * nfa_start_context * - * Returns StringInfo specified by index. - * If there's no data yet, returns NULL. + * Start a new match context at given position. + * Adds context to winstate->nfaContext list. */ -static StringInfo -string_set_get(StringSet *string_set, int index, int *info) +static void +nfa_start_context(WindowAggState *winstate, int64 startPos) { - *info = 0; - - /* no data? */ - if (index == 0 && string_set->set_index == 0) - return NULL; - - if (index < 0 || index >= string_set->set_index) - elog(ERROR, "invalid index: %d", index); + RPRNFAContext *ctx; - *info = string_set->info[index]; + ctx = nfa_context_alloc(winstate); + ctx->matchStartRow = startPos; + ctx->states = nfa_state_alloc(winstate); /* initial state at elem 0 */ - return string_set->str_set[index]; + /* Add to head of active context list (doubly-linked) */ + ctx->next = winstate->nfaContext; + ctx->prev = NULL; + if (winstate->nfaContext != NULL) + winstate->nfaContext->prev = ctx; + else + winstate->nfaContextTail = ctx; /* first context becomes tail */ + winstate->nfaContext = ctx; } /* - * string_set_get_size + * nfa_find_context_for_pos * - * Returns the size of StringSet. - */ -static int -string_set_get_size(StringSet *string_set) -{ - return string_set->set_index; -} - -/* - * string_set_discard - * Discard StringSet. - * All memory including StringSet itself is freed. + * Find a context with the given start position. + * Returns NULL if not found. */ -static void -string_set_discard(StringSet *string_set) +static RPRNFAContext * +nfa_find_context_for_pos(WindowAggState *winstate, int64 pos) { - int i; + RPRNFAContext *ctx; - for (i = 0; i < string_set->set_index; i++) + for (ctx = winstate->nfaContext; ctx != NULL; ctx = ctx->next) { - StringInfo str = string_set->str_set[i]; - - if (str) - destroyStringInfo(str); + if (ctx->matchStartRow == pos) + return ctx; } - pfree(string_set->info); - pfree(string_set->str_set); - pfree(string_set); + return NULL; } /* - * variable_pos_init + * nfa_remove_contexts_up_to * - * Create and initialize variable postion structure + * Remove all contexts with matchStartRow <= endPos. + * Used by SKIP PAST LAST ROW to discard contexts within matched frame. */ -static VariablePos * -variable_pos_init(void) +static void +nfa_remove_contexts_up_to(WindowAggState *winstate, int64 endPos) { - VariablePos *variable_pos; + RPRNFAContext *ctx; + RPRNFAContext *next; - variable_pos = palloc(sizeof(VariablePos) * NUM_ALPHABETS); - MemSet(variable_pos, -1, sizeof(VariablePos) * NUM_ALPHABETS); - return variable_pos; + ctx = winstate->nfaContext; + while (ctx != NULL) + { + next = ctx->next; + if (ctx->matchStartRow <= endPos) + { + /* Remove this context */ + nfa_unlink_context(winstate, ctx); + nfa_context_free(winstate, ctx); + } + ctx = next; + } } /* - * variable_pos_register + * nfa_absorb_contexts + * + * Absorb newer contexts into older ones when states are fully covered. + * For pattern like A+, if older context (started earlier) has count >= newer + * context's count at the same element, the newer context produces subset matches. + * + * Absorption condition: + * - For unbounded quantifiers (max=INT32_MAX): older.counts >= newer.counts + * - For bounded quantifiers: older.counts == newer.counts * - * Register pattern variable whose initial is initial into postion index. - * pos is position of initial. - * If pos is already registered, register it at next empty slot. + * Note: List is newest-first, so we check if later nodes (older contexts) + * can absorb earlier nodes (newer contexts). */ static void -variable_pos_register(VariablePos *variable_pos, char initial, int pos) +nfa_absorb_contexts(WindowAggState *winstate, RPRNFAContext *excludeCtx, int64 currentPos) { - int index = initial - 'a'; - int slot; - int i; + RPRPattern *pattern = winstate->rpPattern; + RPRNFAContext *ctx; + int maxDepth; + + /* Need at least 2 contexts for absorption */ + if (winstate->nfaContext == NULL || winstate->nfaContext->next == NULL) + return; - if (pos < 0 || pos >= NUM_ALPHABETS) - elog(ERROR, "initial is not valid char: %c", initial); + if (pattern == NULL) + return; + + /* + * Only absorb for patterns marked as absorbable during planning. + * See computeAbsorbability() in rpr.c for criteria. + */ + if (!pattern->isAbsorbable) + return; + + maxDepth = pattern->maxDepth; + + /* + * Context absorption: A later context (started at higher row) can be + * absorbed by an earlier context if ALL states in the later context + * are "covered" by states in the earlier context. + * + * A later state is covered by an earlier state if: + * 1. They are at the same element + * 2. For unbounded elements (max == INT32_MAX): earlier.counts[d] >= later.counts[d] + * for all depths d + * 3. For bounded elements: counts must be exactly equal at all depths + * + * List is newest-first (head = highest matchStartRow). + * We iterate from head (newest) and check if older contexts can absorb it. + */ + ctx = winstate->nfaContext; - for (i = 0; i < NUM_ALPHABETS; i++) + while (ctx != NULL) { - slot = variable_pos[index].pos[i]; - if (slot < 0) + RPRNFAContext *nextCtx = ctx->next; + RPRNFAContext *older; + bool absorbed = false; + + /* Never absorb the excluded context (it's the target being completed) */ + if (ctx == excludeCtx) { - /* empty slot found */ - variable_pos[index].pos[i] = pos; - return; + ctx = nextCtx; + continue; } - } - elog(ERROR, "no empty slot for initial: %c", initial); -} -/* - * variable_pos_compare - * - * Returns true if initial1 can be followed by initial2 - */ -static bool -variable_pos_compare(VariablePos *variable_pos, char initial1, char initial2) -{ - int index1, - index2; - int pos1, - pos2; + /* Skip contexts that haven't started processing yet (just created for future row) */ + if (ctx->matchStartRow > currentPos) + { + ctx = nextCtx; + continue; + } - for (index1 = 0;; index1++) - { - pos1 = variable_pos_fetch(variable_pos, initial1, index1); - if (pos1 < 0) - break; + /* + * Handle completed contexts (states == NULL) separately. + * A completed context can be absorbed by an older completed context + * if the older one has the same or later matchEndRow. + */ + if (ctx->states == NULL) + { + /* Only completed contexts with valid matchEndRow can be absorbed */ + if (ctx->matchEndRow < 0) + { + ctx = nextCtx; + continue; + } + + /* Check if any older completed context can absorb this one */ + for (older = ctx->next; older != NULL && !absorbed; older = older->next) + { + /* Must have started earlier */ + if (older->matchStartRow >= ctx->matchStartRow) + continue; + + /* Older must also be completed with valid matchEndRow */ + if (older->states != NULL || older->matchEndRow < 0) + continue; - for (index2 = 0;; index2++) + /* + * Older context absorbs newer if it has the same or later + * matchEndRow, meaning all matches from newer are subsets. + */ + if (older->matchEndRow >= ctx->matchEndRow) + { + /* Absorb: remove ctx (newer) */ + nfa_unlink_context(winstate, ctx); + nfa_context_free(winstate, ctx); + absorbed = true; + } + } + + ctx = nextCtx; + continue; + } + + /* + * Check if all states in ctx are on absorbable branches. + * If any state is on a non-absorbable branch, skip this context. + */ { - pos2 = variable_pos_fetch(variable_pos, initial2, index2); - if (pos2 < 0) - break; - if (pos1 <= pos2) - return true; + RPRNFAState *s; + bool allAbsorbable = true; + + for (s = ctx->states; s != NULL && allAbsorbable; s = s->next) + { + RPRPatternElement *branchFirst; + + /* altPriority is the branch's first element index */ + if (s->altPriority < 0 || s->altPriority >= pattern->numElements) + continue; + + branchFirst = &pattern->elements[s->altPriority]; + if (!(branchFirst->flags & RPR_ELEM_ABSORBABLE)) + allAbsorbable = false; + } + + if (!allAbsorbable) + { + ctx = nextCtx; + continue; + } } + + /* + * Check if any older context can absorb this one. + * Older contexts have lower matchStartRow. + */ + for (older = ctx->next; older != NULL && !absorbed; older = older->next) + { + RPRNFAState *laterState; + RPRNFAState *earlierState; + bool canAbsorb; + int laterCount = 0; + int earlierCount = 0; + + /* Skip if not started earlier */ + if (older->matchStartRow >= ctx->matchStartRow) + continue; + + /* Skip contexts that haven't started processing yet */ + if (older->matchStartRow > currentPos) + continue; + + /* + * For in-progress ctx, older must also be in-progress. + * (Completed older contexts are handled above for completed ctx.) + */ + if (older->states == NULL) + continue; + + /* Count states in both contexts */ + for (laterState = ctx->states; laterState != NULL; laterState = laterState->next) + laterCount++; + for (earlierState = older->states; earlierState != NULL; earlierState = earlierState->next) + earlierCount++; + + /* Must have same number of states (same structure) */ + if (laterCount != earlierCount) + continue; + + /* + * Check if ALL states in ctx (later) are covered by states in older. + * Both must have the same set of element indices. + */ + canAbsorb = true; + for (laterState = ctx->states; laterState != NULL && canAbsorb; laterState = laterState->next) + { + bool found = false; + + for (earlierState = older->states; earlierState != NULL && !found; earlierState = earlierState->next) + { + RPRPatternElement *elem; + bool countsMatch; + int d; + + /* Must be at same element */ + if (earlierState->elemIdx != laterState->elemIdx) + continue; + + /* Must be on same branch (same altPriority) */ + if (earlierState->altPriority != laterState->altPriority) + continue; + + /* Handle invalid element index (terminal state) */ + if (laterState->elemIdx < 0 || laterState->elemIdx >= pattern->numElements) + { + found = true; + break; + } + + elem = &pattern->elements[laterState->elemIdx]; + countsMatch = true; + + if (elem->max == INT32_MAX) + { + /* Unbounded: earlier.count >= later.count at all depths */ + for (d = 0; d <= maxDepth && countsMatch; d++) + { + if (earlierState->counts[d] < laterState->counts[d]) + countsMatch = false; + } + } + else + { + /* Bounded: counts must be exactly equal at all depths */ + for (d = 0; d <= maxDepth && countsMatch; d++) + { + if (earlierState->counts[d] != laterState->counts[d]) + countsMatch = false; + } + } + + if (countsMatch) + found = true; + } + + if (!found) + canAbsorb = false; + } + + if (canAbsorb) + { + /* Absorb: remove ctx (newer) */ + nfa_unlink_context(winstate, ctx); + nfa_context_free(winstate, ctx); + absorbed = true; + } + } + + ctx = nextCtx; } - return false; } /* - * variable_pos_fetch + * nfa_finalize_boundary * - * Fetch position of pattern variable whose initial is initial, and whose index - * is index. If no postion was registered by initial, index, returns -1. + * Finalize NFA states at partition/frame boundary. + * Sets all varMatched to false and processes remaining states. */ -static int -variable_pos_fetch(VariablePos *variable_pos, char initial, int index) +static void +nfa_finalize_boundary(WindowAggState *winstate, RPRNFAContext *ctx, int64 matchEndPos) { - int pos = initial - 'a'; + RPRNFAState *states = ctx->states; + RPRNFAState *state; + RPRNFAState *nextState; + int numVars = list_length(winstate->defineVariableList); - if (pos < 0 || pos >= NUM_ALPHABETS) - elog(ERROR, "initial is not valid char: %c", initial); + ctx->states = NULL; - if (index < 0 || index >= NUM_ALPHABETS) - elog(ERROR, "index is not valid: %d", index); + for (int i = 0; i < numVars; i++) + winstate->nfaVarMatched[i] = false; - return variable_pos[pos].pos[index]; + for (state = states; state != NULL; state = nextState) + { + nextState = state->next; + state->next = NULL; + nfa_step_single(winstate, ctx, state, winstate->nfaVarMatched, matchEndPos); + } } /* - * variable_pos_build + * nfa_step_single * - * Build VariablePos structure and return it. + * Process one state through NFA for one row. + * New states are added to ctx->states. + * Match (FIN) is recorded in ctx->matchedState. + * When FIN is reached by matching (not skipping), matchEndRow is updated. */ -static VariablePos * -variable_pos_build(WindowAggState *winstate) +static void +nfa_step_single(WindowAggState *winstate, RPRNFAContext *ctx, + RPRNFAState *state, bool *varMatched, int64 currentPos) { - VariablePos *variable_pos; - StringInfo pattern_str; - int initial_index = 0; - ListCell *lc1, - *lc2; - - variable_pos = winstate->variable_pos = variable_pos_init(); - pattern_str = winstate->pattern_str = makeStringInfo(); - appendStringInfoChar(pattern_str, '^'); - - forboth(lc1, winstate->patternVariableList, - lc2, winstate->patternRegexpList) + RPRPattern *pattern = winstate->rpPattern; + RPRPatternElement *elements = pattern->elements; + RPRNFAState *pending = state; /* states to process in current row */ + + while (pending != NULL) { - char *vname = strVal(lfirst(lc1)); - char *quantifier = strVal(lfirst(lc2)); - char initial; + RPRPatternElement *elem; + bool matched; + int16 count; + int depth; - initial = pattern_initial(winstate, vname); - Assert(initial != 0); - appendStringInfoChar(pattern_str, initial); - if (quantifier[0]) - appendStringInfoChar(pattern_str, quantifier[0]); + state = pending; + pending = pending->next; + state->next = NULL; - /* - * Register the initial at initial_index. If the initial appears more - * than once, all of it's initial_index will be recorded. This could - * happen if a pattern variable appears in the PATTERN clause more - * than once like "UP DOWN UP" "UP UP UP". - */ - variable_pos_register(variable_pos, initial, initial_index); + Assert(state->elemIdx >= 0 && state->elemIdx < pattern->numElements); + elem = &elements[state->elemIdx]; + depth = elem->depth; - initial_index++; - } + if (RPRElemIsVar(elem)) + { + /* + * Variable: check if it matches current row. + * With DEFINE-first ordering, varId < numDefines has DEFINE expr, + * varId >= numDefines defaults to TRUE. + */ + int numDefines = list_length(winstate->defineVariableList); + + if (elem->varId >= numDefines) + matched = true; /* Not defined in DEFINE, defaults to TRUE */ + else + matched = varMatched[elem->varId]; + + count = state->counts[depth]; + + if (matched) + { + count++; + + if (elem->max != RPR_QUANTITY_INF && count > elem->max) + { + nfa_state_free(winstate, state); + continue; + } + + state->counts[depth] = count; + + /* Can repeat more? Clone for staying */ + if (elem->max == RPR_QUANTITY_INF || count < elem->max) + { + RPRNFAState *clone = nfa_state_clone(winstate, state->elemIdx, + state->altPriority, + state->counts, NULL); + nfa_add_state_unique(winstate, ctx, clone); + } + + /* Satisfied? Advance to next element */ + if (count >= elem->min) + { + RPRPatternElement *nextElem; + + state->counts[depth] = 0; + state->elemIdx = elem->next; + nextElem = &elements[state->elemIdx]; - return variable_pos; + if (RPRElemIsFin(nextElem)) + { + /* Match ends at current row since we matched */ + nfa_add_matched_state(winstate, ctx, state, currentPos); + nfa_state_free(winstate, state); + } + else if (RPRElemIsEnd(nextElem)) + { + /* + * END is epsilon transition - process immediately on same row. + * This ensures match end position is recorded at the row where + * the last VAR matched, not the next row. + */ + state->next = pending; + pending = state; + } + else + { + /* + * VAR, ALT - wait for next row. ALT dispatches to VARs that + * need input, so it must wait for the next row after VAR + * consumed the current row. + */ + nfa_add_state_unique(winstate, ctx, state); + } + } + else + { + nfa_state_free(winstate, state); + } + } + else + { + /* Not matched: can we skip? */ + if (count >= elem->min) + { + RPRPatternElement *nextElem; + + state->counts[depth] = 0; + state->elemIdx = elem->next; + nextElem = &elements[state->elemIdx]; + + if (RPRElemIsFin(nextElem)) + { + /* Match ends at previous row since current didn't match */ + nfa_add_matched_state(winstate, ctx, state, currentPos - 1); + nfa_state_free(winstate, state); + } + else if (RPRElemIsVar(nextElem)) + { + /* + * Current row was NOT consumed (skip case), so next VAR + * must be tried on the SAME row via pending list + */ + state->next = pending; + pending = state; + } + else + { + state->next = pending; + pending = state; + } + } + else + { + nfa_state_free(winstate, state); + } + } + } + else if (RPRElemIsFin(elem)) + { + /* Already at FIN - match ends at current row */ + nfa_add_matched_state(winstate, ctx, state, currentPos); + nfa_state_free(winstate, state); + } + else if (RPRElemIsAlt(elem)) + { + RPRElemIdx altIdx = elem->next; + bool first = true; + + /* + * ALT doesn't consume a row - it's just a dispatch point. + * All branches should be evaluated on the CURRENT row. + * Set altPriority to branch's elemIdx for lexical order tracking. + */ + while (altIdx >= 0 && altIdx < pattern->numElements) + { + RPRPatternElement *altElem = &elements[altIdx]; + + if (first) + { + state->elemIdx = altIdx; + state->altPriority = altIdx; /* lexical order */ + state->next = pending; + pending = state; + first = false; + } + else + { + pending = nfa_state_clone(winstate, altIdx, altIdx, + state->counts, pending); + } + + altIdx = altElem->jump; + } + + if (first) + nfa_state_free(winstate, state); + } + else if (RPRElemIsEnd(elem)) + { + count = state->counts[depth] + 1; + + if (count < elem->min) + { + /* + * Haven't reached minimum yet - must loop back. + * Add to ctx->states (next row) not pending (same row). + */ + state->counts[depth] = count; + for (int d = depth + 1; d < pattern->maxDepth; d++) + state->counts[d] = 0; + state->elemIdx = elem->jump; + nfa_add_state_unique(winstate, ctx, state); + } + else if (elem->max != RPR_QUANTITY_INF && count >= elem->max) + { + /* Reached maximum - must exit, continue processing */ + state->counts[depth] = 0; + state->elemIdx = elem->next; + state->next = pending; + pending = state; + } + else + { + /* + * Between min and max - can exit or continue. + * Exit state continues processing (pending). + * Loop state waits for next row (ctx->states). + */ + RPRNFAState *exitState = nfa_state_clone(winstate, elem->next, + state->altPriority, + state->counts, pending); + exitState->counts[depth] = 0; + pending = exitState; + + state->counts[depth] = count; + for (int d = depth + 1; d < pattern->maxDepth; d++) + state->counts[d] = 0; + state->elemIdx = elem->jump; + nfa_add_state_unique(winstate, ctx, state); + } + } + else + { + state->elemIdx = elem->next; + state->next = pending; + pending = state; + } + } } + diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index ff22a04abe5..5d69805fc24 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -16,6 +16,7 @@ #include "postgres.h" #include "miscadmin.h" +#include "nodes/plannodes.h" #include "utils/datum.h" @@ -166,6 +167,44 @@ _copyBitmapset(const Bitmapset *from) return bms_copy(from); } +static RPRPattern * +_copyRPRPattern(const RPRPattern *from) +{ + RPRPattern *newnode = makeNode(RPRPattern); + + COPY_SCALAR_FIELD(numVars); + COPY_SCALAR_FIELD(maxDepth); + COPY_SCALAR_FIELD(numElements); + + /* Deep copy the varNames array */ + if (from->numVars > 0) + { + newnode->varNames = palloc0(from->numVars * sizeof(char *)); + for (int i = 0; i < from->numVars; i++) + newnode->varNames[i] = pstrdup(from->varNames[i]); + } + else + { + newnode->varNames = NULL; + } + + /* Deep copy the elements array */ + if (from->numElements > 0) + { + newnode->elements = palloc(from->numElements * sizeof(RPRPatternElement)); + memcpy(newnode->elements, from->elements, + from->numElements * sizeof(RPRPatternElement)); + } + else + { + newnode->elements = NULL; + } + + COPY_SCALAR_FIELD(isAbsorbable); + + return newnode; +} + /* * copyObjectImpl -- implementation of copyObject(); see nodes/nodes.h diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 3d1a1adf86e..9410790e3d3 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -20,6 +20,7 @@ #include "postgres.h" #include "miscadmin.h" +#include "nodes/plannodes.h" #include "utils/datum.h" @@ -149,6 +150,38 @@ _equalBitmapset(const Bitmapset *a, const Bitmapset *b) return bms_equal(a, b); } +static bool +_equalRPRPattern(const RPRPattern *a, const RPRPattern *b) +{ + COMPARE_SCALAR_FIELD(numVars); + COMPARE_SCALAR_FIELD(maxDepth); + COMPARE_SCALAR_FIELD(numElements); + + /* Compare varNames array */ + if (a->numVars > 0) + { + if (a->varNames == NULL || b->varNames == NULL) + return false; + for (int i = 0; i < a->numVars; i++) + { + if (strcmp(a->varNames[i], b->varNames[i]) != 0) + return false; + } + } + + /* Compare elements array */ + if (a->numElements > 0) + { + if (a->elements == NULL || b->elements == NULL) + return false; + if (memcmp(a->elements, b->elements, + a->numElements * sizeof(RPRPatternElement)) != 0) + return false; + } + + return true; +} + /* * Lists are handled specially */ diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index c8eef2c75d2..1a70b016770 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -23,6 +23,7 @@ #include "nodes/bitmapset.h" #include "nodes/nodes.h" #include "nodes/pg_list.h" +#include "nodes/plannodes.h" #include "utils/datum.h" /* State flag that determines how nodeToStringInternal() should treat location fields */ @@ -718,6 +719,56 @@ _outA_Const(StringInfo str, const A_Const *node) WRITE_LOCATION_FIELD(location); } +static void +_outRPRPattern(StringInfo str, const RPRPattern *node) +{ + WRITE_NODE_TYPE("RPRPATTERN"); + + WRITE_INT_FIELD(numVars); + WRITE_INT_FIELD(maxDepth); + WRITE_INT_FIELD(numElements); + + /* Write varNames array as list of strings */ + appendStringInfoString(str, " :varNames"); + if (node->numVars > 0 && node->varNames != NULL) + { + appendStringInfoString(str, " ("); + for (int i = 0; i < node->numVars; i++) + { + if (i > 0) + appendStringInfoChar(str, ' '); + outToken(str, node->varNames[i]); + } + appendStringInfoChar(str, ')'); + } + else + appendStringInfoString(str, " <>"); + + /* Write elements array */ + appendStringInfoString(str, " :elements"); + if (node->numElements > 0 && node->elements != NULL) + { + appendStringInfoChar(str, ' '); + for (int i = 0; i < node->numElements; i++) + { + const RPRPatternElement *elem = &node->elements[i]; + + appendStringInfo(str, "(%d %u %d %d %d %d %d)", + (int) elem->varId, + (unsigned) elem->flags, + (int) elem->depth, + (int) elem->min, + (int) elem->max, + (int) elem->next, + (int) elem->jump); + } + } + else + appendStringInfoString(str, " <>"); + + WRITE_BOOL_FIELD(isAbsorbable); +} + /* * outNode - diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index c11728c0f17..3ed31ba539d 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -30,6 +30,7 @@ #include "miscadmin.h" #include "nodes/bitmapset.h" +#include "nodes/plannodes.h" #include "nodes/readfuncs.h" @@ -560,6 +561,84 @@ _readExtensibleNode(void) READ_DONE(); } +static RPRPattern * +_readRPRPattern(void) +{ + READ_LOCALS(RPRPattern); + + READ_INT_FIELD(numVars); + READ_INT_FIELD(maxDepth); + READ_INT_FIELD(numElements); + + /* Read varNames array */ + token = pg_strtok(&length); /* skip :varNames */ + token = pg_strtok(&length); /* get '(' or '<>' */ + if (local_node->numVars > 0 && token[0] == '(') + { + local_node->varNames = palloc(local_node->numVars * sizeof(char *)); + for (int i = 0; i < local_node->numVars; i++) + { + token = pg_strtok(&length); + local_node->varNames[i] = debackslash(token, length); + } + token = pg_strtok(&length); /* skip ')' */ + } + else + { + local_node->varNames = NULL; + } + + /* Read elements array */ + token = pg_strtok(&length); /* skip :elements */ + token = pg_strtok(&length); /* get '(' or '<>' */ + if (local_node->numElements > 0 && token[0] == '(') + { + local_node->elements = palloc0(local_node->numElements * sizeof(RPRPatternElement)); + for (int i = 0; i < local_node->numElements; i++) + { + RPRPatternElement *elem = &local_node->elements[i]; + int varId, flags, depth, min, max, next, jump; + + /* Parse "(varId flags depth min max next jump)" */ + token = pg_strtok(&length); + varId = atoi(token); + token = pg_strtok(&length); + flags = atoi(token); + token = pg_strtok(&length); + depth = atoi(token); + token = pg_strtok(&length); + min = atoi(token); + token = pg_strtok(&length); + max = atoi(token); + token = pg_strtok(&length); + next = atoi(token); + token = pg_strtok(&length); + jump = atoi(token); + token = pg_strtok(&length); /* skip ')' */ + + elem->varId = (RPRVarId) varId; + elem->flags = (RPRElemFlags) flags; + elem->depth = (RPRDepth) depth; + elem->min = (RPRQuantity) min; + elem->max = (RPRQuantity) max; + elem->next = (RPRElemIdx) next; + elem->jump = (RPRElemIdx) jump; + + /* Read next element's '(' or end */ + if (i < local_node->numElements - 1) + token = pg_strtok(&length); /* get '(' */ + } + } + else + { + local_node->elements = NULL; + } + + READ_BOOL_FIELD(isAbsorbable); + + READ_DONE(); +} + /* * parseNodeString diff --git a/src/backend/optimizer/plan/Makefile b/src/backend/optimizer/plan/Makefile index 80ef162e484..7e0167789d8 100644 --- a/src/backend/optimizer/plan/Makefile +++ b/src/backend/optimizer/plan/Makefile @@ -19,6 +19,7 @@ OBJS = \ planagg.o \ planmain.o \ planner.o \ + rpr.o \ setrefs.o \ subselect.o diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index c287d2d05fd..51334f80a5c 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -37,6 +37,7 @@ #include "optimizer/prep.h" #include "optimizer/restrictinfo.h" #include "optimizer/subselect.h" +#include "optimizer/rpr.h" #include "optimizer/tlist.h" #include "parser/parse_clause.h" #include "parser/parsetree.h" @@ -289,8 +290,9 @@ static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators, static WindowAgg *make_windowagg(List *tlist, WindowClause *wc, int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, - List *runCondition, RPSkipTo rpSkipTo, List *patternVariable, - List *patternRegexp, List *defineClause, List *defineInitial, + List *runCondition, RPSkipTo rpSkipTo, + RPRPattern *compiledPattern, + List *defineClause, List *defineInitial, List *qual, bool topWindow, Plan *lefttree); static Group *make_group(List *tlist, List *qual, int numGroupCols, @@ -2532,26 +2534,37 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) ordNumCols++; } - /* And finally we can make the WindowAgg node */ - plan = make_windowagg(tlist, - wc, - partNumCols, - partColIdx, - partOperators, - partCollations, - ordNumCols, - ordColIdx, - ordOperators, - ordCollations, - best_path->runCondition, - wc->rpSkipTo, - wc->patternVariable, - wc->patternRegexp, - wc->defineClause, - wc->defineInitial, - best_path->qual, - best_path->topwindow, - subplan); + /* Build defineVariableList from defineClause for pattern compilation */ + { + List *defineVariableList = NIL; + + foreach(lc, wc->defineClause) + { + TargetEntry *te = (TargetEntry *) lfirst(lc); + defineVariableList = lappend(defineVariableList, + makeString(pstrdup(te->resname))); + } + + /* And finally we can make the WindowAgg node */ + plan = make_windowagg(tlist, + wc, + partNumCols, + partColIdx, + partOperators, + partCollations, + ordNumCols, + ordColIdx, + ordOperators, + ordCollations, + best_path->runCondition, + wc->rpSkipTo, + buildRPRPattern(wc->rpPattern, defineVariableList), + wc->defineClause, + wc->defineInitial, + best_path->qual, + best_path->topwindow, + subplan); + } copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -6617,8 +6630,9 @@ static WindowAgg * make_windowagg(List *tlist, WindowClause *wc, int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations, int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations, - List *runCondition, RPSkipTo rpSkipTo, List *patternVariable, - List *patternRegexp, List *defineClause, List *defineInitial, + List *runCondition, RPSkipTo rpSkipTo, + RPRPattern *compiledPattern, + List *defineClause, List *defineInitial, List *qual, bool topWindow, Plan *lefttree) { WindowAgg *node = makeNode(WindowAgg); @@ -6647,8 +6661,10 @@ make_windowagg(List *tlist, WindowClause *wc, node->inRangeNullsFirst = wc->inRangeNullsFirst; node->topWindow = topWindow; node->rpSkipTo = rpSkipTo; - node->patternVariable = patternVariable; - node->patternRegexp = patternRegexp; + + /* Store compiled pattern for NFA execution */ + node->rpPattern = compiledPattern; + node->defineClause = defineClause; node->defineInitial = defineInitial; diff --git a/src/backend/optimizer/plan/meson.build b/src/backend/optimizer/plan/meson.build index c565b2adbcc..5b2381cd774 100644 --- a/src/backend/optimizer/plan/meson.build +++ b/src/backend/optimizer/plan/meson.build @@ -7,6 +7,7 @@ backend_sources += files( 'planagg.c', 'planmain.c', 'planner.c', + 'rpr.c', 'setrefs.c', 'subselect.c', ) diff --git a/src/backend/optimizer/plan/rpr.c b/src/backend/optimizer/plan/rpr.c new file mode 100644 index 00000000000..0eed6b865ae --- /dev/null +++ b/src/backend/optimizer/plan/rpr.c @@ -0,0 +1,1024 @@ +/*------------------------------------------------------------------------- + * + * rpr.c + * Row Pattern Recognition pattern compilation for planner + * + * This file contains functions for optimizing RPR pattern AST and + * compiling it to bytecode for execution by WindowAgg. + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/plan/rpr.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "nodes/makefuncs.h" +#include "optimizer/rpr.h" + +/* Forward declarations */ +static bool rprPatternEqual(RPRPatternNode *a, RPRPatternNode *b); +static RPRPatternNode *optimizeRPRPattern(RPRPatternNode *pattern); +static void scanRPRPattern(RPRPatternNode *node, char **varNames, int *numVars, + int *numElements, RPRDepth depth, RPRDepth *maxDepth); +static void fillRPRPattern(RPRPatternNode *node, RPRPattern *pat, + int *idx, RPRDepth depth); +static RPRVarId getVarIdFromPattern(RPRPattern *pat, const char *varName); +static void computeAbsorbability(RPRPattern *pattern); + +/* + * rprPatternChildrenEqual + * Compare children of two GROUP nodes for equality. + * + * Returns true if the children lists are structurally identical. + * Used for GROUP merge optimization where we ignore outer quantifiers. + */ +static bool +rprPatternChildrenEqual(List *a, List *b) +{ + ListCell *lca, + *lcb; + + if (list_length(a) != list_length(b)) + return false; + + forboth(lca, a, lcb, b) + { + if (!rprPatternEqual((RPRPatternNode *) lfirst(lca), + (RPRPatternNode *) lfirst(lcb))) + return false; + } + + return true; +} + +/* + * rprPatternEqual + * Compare two RPRPatternNode trees for equality. + * + * Returns true if the trees are structurally identical. + */ +static bool +rprPatternEqual(RPRPatternNode *a, RPRPatternNode *b) +{ + ListCell *lca, + *lcb; + + if (a == NULL && b == NULL) + return true; + if (a == NULL || b == NULL) + return false; + + /* Must have same node type and quantifiers */ + if (a->nodeType != b->nodeType) + return false; + if (a->min != b->min || a->max != b->max) + return false; + if (a->reluctant != b->reluctant) + return false; + + switch (a->nodeType) + { + case RPR_PATTERN_VAR: + return strcmp(a->varName, b->varName) == 0; + + case RPR_PATTERN_SEQ: + case RPR_PATTERN_ALT: + case RPR_PATTERN_GROUP: + /* Must have same number of children */ + if (list_length(a->children) != list_length(b->children)) + return false; + + /* Compare each child */ + forboth(lca, a->children, lcb, b->children) + { + if (!rprPatternEqual((RPRPatternNode *) lfirst(lca), + (RPRPatternNode *) lfirst(lcb))) + return false; + } + return true; + } + + return false; /* keep compiler quiet */ +} + +/* + * optimizeRPRPattern + * Optimize RPRPatternNode tree in a single pass. + * + * Optimizations applied (bottom-up, in order per node): + * 1. Flatten nested SEQ: (A (B C)) -> (A B C) + * 2. Flatten nested ALT: (A | (B | C)) -> (A | B | C) + * 3. Unwrap GROUP{1,1}: ((A)) -> A, (A B){1,1} -> A B + * 4. Quantifier multiplication: (A{2}){3} -> A{6} + * 5. Remove duplicate alternatives: (A | B | A) -> (A | B) + * 6. Merge consecutive vars: A A A -> A{3,3} + * 7. Remove single-item SEQ/ALT wrappers + */ +static RPRPatternNode * +optimizeRPRPattern(RPRPatternNode *pattern) +{ + ListCell *lc; + List *newChildren; + + if (pattern == NULL) + return NULL; + + switch (pattern->nodeType) + { + case RPR_PATTERN_VAR: + /* Leaf node - nothing to optimize */ + return pattern; + + case RPR_PATTERN_SEQ: + { + RPRPatternNode *prev = NULL; + + /* Recursively optimize children, flatten SEQ/GROUP{1,1} */ + newChildren = NIL; + foreach(lc, pattern->children) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + RPRPatternNode *opt = optimizeRPRPattern(child); + + /* Flatten GROUP{1,1} or nested SEQ */ + if ((opt->nodeType == RPR_PATTERN_GROUP && + opt->min == 1 && opt->max == 1 && !opt->reluctant) || + opt->nodeType == RPR_PATTERN_SEQ) + { + newChildren = list_concat(newChildren, + list_copy(opt->children)); + } + else + { + newChildren = lappend(newChildren, opt); + } + } + + /* + * Merge consecutive identical VAR nodes with any quantifier. + * A{m1,M1} A{m2,M2} -> A{m1+m2, M1+M2} + * where INF + x = INF + */ + { + List *mergedChildren = NIL; + + foreach(lc, newChildren) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + + if (child->nodeType == RPR_PATTERN_VAR && !child->reluctant) + { + if (prev != NULL && + prev->nodeType == RPR_PATTERN_VAR && + strcmp(prev->varName, child->varName) == 0 && + !prev->reluctant) + { + /* + * Merge: accumulate min/max into prev. + * INF + anything = INF + */ + prev->min += child->min; + if (prev->max == RPR_QUANTITY_INF || + child->max == RPR_QUANTITY_INF) + prev->max = RPR_QUANTITY_INF; + else + prev->max += child->max; + } + else + { + /* Flush previous and start new */ + if (prev != NULL) + mergedChildren = lappend(mergedChildren, prev); + prev = child; + } + } + else + { + /* Non-mergeable - flush previous */ + if (prev != NULL) + mergedChildren = lappend(mergedChildren, prev); + mergedChildren = lappend(mergedChildren, child); + prev = NULL; + } + } + + /* Flush remaining */ + if (prev != NULL) + mergedChildren = lappend(mergedChildren, prev); + + newChildren = mergedChildren; + } + + /* + * Merge sequence prefix/suffix into GROUP with matching children. + * A B (A B)+ -> (A B){2,} + * (A B)+ A B -> (A B){2,} + * A B (A B)+ A B -> (A B){3,} + */ + { + List *groupMergedChildren = NIL; + int numChildren = list_length(newChildren); + int i; + bool merged = false; + int skipUntil = -1; /* skip suffix elements already absorbed */ + + for (i = 0; i < numChildren; i++) + { + RPRPatternNode *child = (RPRPatternNode *) list_nth(newChildren, i); + + /* Skip elements that were absorbed as suffix */ + if (i < skipUntil) + continue; + + /* + * If this is a GROUP, see if preceding/following elements + * match its children. + * GROUP's content may be wrapped in a SEQ - unwrap for comparison. + */ + if (child->nodeType == RPR_PATTERN_GROUP && !child->reluctant) + { + List *groupContent = child->children; + int groupChildCount; + int prefixLen = list_length(groupMergedChildren); + + /* + * If GROUP has single SEQ child, compare with SEQ's children. + * e.g., (A B)+ is GROUP[SEQ[A,B]], we want to compare [A,B]. + */ + if (list_length(groupContent) == 1) + { + RPRPatternNode *inner = (RPRPatternNode *) linitial(groupContent); + + if (inner->nodeType == RPR_PATTERN_SEQ) + groupContent = inner->children; + } + + groupChildCount = list_length(groupContent); + + /* + * PREFIX MERGE: Check if preceding elements match. + * Keep absorbing as long as we have matching prefixes. + */ + while (prefixLen >= groupChildCount && groupChildCount > 0) + { + List *prefixElements = NIL; + int j; + + /* Extract last groupChildCount elements from prefix */ + for (j = prefixLen - groupChildCount; j < prefixLen; j++) + { + prefixElements = lappend(prefixElements, + list_nth(groupMergedChildren, j)); + } + + /* Compare with GROUP's (possibly unwrapped) children */ + if (rprPatternChildrenEqual(prefixElements, groupContent)) + { + /* + * Match! Merge by incrementing GROUP's min. + * Remove the prefix elements from output. + */ + child->min += 1; + + /* Rebuild groupMergedChildren without matched prefix */ + { + List *trimmed = NIL; + + for (j = 0; j < prefixLen - groupChildCount; j++) + { + trimmed = lappend(trimmed, + list_nth(groupMergedChildren, j)); + } + groupMergedChildren = trimmed; + prefixLen = list_length(groupMergedChildren); + } + merged = true; + } + else + { + list_free(prefixElements); + break; + } + + list_free(prefixElements); + } + + /* + * SUFFIX MERGE: Check if following elements match. + * Keep absorbing as long as we have matching suffixes. + */ + while (i + groupChildCount < numChildren && groupChildCount > 0) + { + List *suffixElements = NIL; + int j; + int suffixStart = i + 1; + + /* Adjust for already absorbed elements */ + if (skipUntil > suffixStart) + break; + + /* Extract next groupChildCount elements as suffix */ + for (j = 0; j < groupChildCount; j++) + { + int idx = suffixStart + j; + + if (idx >= numChildren) + break; + suffixElements = lappend(suffixElements, + list_nth(newChildren, idx)); + } + + /* Compare with GROUP's children */ + if (list_length(suffixElements) == groupChildCount && + rprPatternChildrenEqual(suffixElements, groupContent)) + { + /* + * Match! Absorb suffix by incrementing min and skipping. + */ + child->min += 1; + skipUntil = suffixStart + groupChildCount; + merged = true; + /* Update i to continue suffix check after absorbed elements */ + i = skipUntil - 1; + } + else + { + list_free(suffixElements); + break; + } + + list_free(suffixElements); + } + } + + groupMergedChildren = lappend(groupMergedChildren, child); + } + + if (merged) + newChildren = groupMergedChildren; + } + + pattern->children = newChildren; + + /* Unwrap single-item SEQ */ + if (list_length(pattern->children) == 1) + return (RPRPatternNode *) linitial(pattern->children); + + return pattern; + } + + case RPR_PATTERN_ALT: + { + ListCell *lc2; + + /* Recursively optimize children, flatten nested ALT */ + newChildren = NIL; + foreach(lc, pattern->children) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + RPRPatternNode *opt = optimizeRPRPattern(child); + + if (opt->nodeType == RPR_PATTERN_ALT) + { + newChildren = list_concat(newChildren, + list_copy(opt->children)); + } + else + { + newChildren = lappend(newChildren, opt); + } + } + + /* Remove duplicate alternatives */ + { + List *uniqueChildren = NIL; + + foreach(lc, newChildren) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + bool isDuplicate = false; + + foreach(lc2, uniqueChildren) + { + if (rprPatternEqual((RPRPatternNode *) lfirst(lc2), child)) + { + isDuplicate = true; + break; + } + } + + if (!isDuplicate) + uniqueChildren = lappend(uniqueChildren, child); + } + + pattern->children = uniqueChildren; + } + + /* Unwrap single-item ALT */ + if (list_length(pattern->children) == 1) + return (RPRPatternNode *) linitial(pattern->children); + + return pattern; + } + + case RPR_PATTERN_GROUP: + /* Recursively optimize children */ + newChildren = NIL; + foreach(lc, pattern->children) + { + RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + + newChildren = lappend(newChildren, optimizeRPRPattern(child)); + } + pattern->children = newChildren; + + /* Quantifier multiplication: (A{m}){n} -> A{m*n} */ + if (list_length(pattern->children) == 1 && !pattern->reluctant) + { + RPRPatternNode *child = (RPRPatternNode *) linitial(pattern->children); + + if (child->nodeType == RPR_PATTERN_VAR && !child->reluctant) + { + if (pattern->max != RPR_QUANTITY_INF && child->max != RPR_QUANTITY_INF) + { + int64 new_min_64 = (int64) pattern->min * child->min; + int64 new_max_64 = (int64) pattern->max * child->max; + + if (new_min_64 < RPR_QUANTITY_INF && new_max_64 < RPR_QUANTITY_INF) + { + child->min = (int) new_min_64; + child->max = (int) new_max_64; + return child; + } + } + } + } + + /* Unwrap GROUP{1,1} */ + if (pattern->min == 1 && pattern->max == 1 && !pattern->reluctant) + { + if (list_length(pattern->children) == 1) + return (RPRPatternNode *) linitial(pattern->children); + + /* Multiple children: convert to SEQ */ + pattern->nodeType = RPR_PATTERN_SEQ; + } + + return pattern; + } + + return pattern; /* keep compiler quiet */ +} + +/* + * scanRPRPattern + * Single-pass scan: collect variable names and count elements. + * + * Collects unique variable names in pattern encounter order (max RPR_VARID_MAX). + * Also counts elements and tracks maximum nesting depth. + */ +static void +scanRPRPattern(RPRPatternNode *node, char **varNames, int *numVars, + int *numElements, RPRDepth depth, RPRDepth *maxDepth) +{ + ListCell *lc; + int i; + + if (node == NULL) + return; + + /* Track maximum depth */ + if (depth > *maxDepth) + *maxDepth = depth; + + switch (node->nodeType) + { + case RPR_PATTERN_VAR: + /* Count element */ + (*numElements)++; + + /* Collect variable name if not already present */ + for (i = 0; i < *numVars; i++) + { + if (strcmp(varNames[i], node->varName) == 0) + return; /* Already have this variable */ + } + + /* New variable */ + if (*numVars >= RPR_VARID_MAX) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("too many pattern variables"), + errdetail("Maximum is %d.", RPR_VARID_MAX))); + + varNames[*numVars] = node->varName; + (*numVars)++; + break; + + case RPR_PATTERN_SEQ: + /* Sequence: just recurse into children */ + foreach(lc, node->children) + { + scanRPRPattern((RPRPatternNode *) lfirst(lc), varNames, numVars, + numElements, depth, maxDepth); + } + break; + + case RPR_PATTERN_GROUP: + /* Recurse into children at increased depth */ + foreach(lc, node->children) + { + scanRPRPattern((RPRPatternNode *) lfirst(lc), varNames, numVars, + numElements, depth + 1, maxDepth); + } + + /* Add END element if group has non-trivial quantifier */ + if (node->min != 1 || node->max != 1) + (*numElements)++; + break; + + case RPR_PATTERN_ALT: + /* Count ALT start element */ + (*numElements)++; + + /* Recurse into children at increased depth */ + foreach(lc, node->children) + { + scanRPRPattern((RPRPatternNode *) lfirst(lc), varNames, numVars, + numElements, depth + 1, maxDepth); + } + break; + } +} + +/* + * getVarIdFromPattern + * Get variable ID for a variable name from RPRPattern. + * + * Returns the index of the variable in the varNames array. + */ +static RPRVarId +getVarIdFromPattern(RPRPattern *pat, const char *varName) +{ + for (int i = 0; i < pat->numVars; i++) + { + if (strcmp(pat->varNames[i], varName) == 0) + return (RPRVarId) i; + } + + /* Should not happen - variable should already be collected */ + elog(ERROR, "pattern variable \"%s\" not found", varName); + pg_unreachable(); +} + +/* + * fillRPRPattern + * Fill the pattern elements array (second pass). + * + * This traverses the AST and fills in the pre-allocated elements array. + * The idx pointer tracks the current position in the array. + */ +static void +fillRPRPattern(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth depth) +{ + ListCell *lc; + RPRPatternElement *elem; + int groupStartIdx; + int altStartIdx; + List *altBranchStarts; + List *altEndPositions; + + if (node == NULL) + return; + + switch (node->nodeType) + { + case RPR_PATTERN_SEQ: + /* Sequence: fill each child in order */ + foreach(lc, node->children) + { + fillRPRPattern((RPRPatternNode *) lfirst(lc), pat, idx, depth); + } + break; + + case RPR_PATTERN_VAR: + /* Variable: create element with varId */ + elem = &pat->elements[*idx]; + memset(elem, 0, sizeof(RPRPatternElement)); + elem->varId = getVarIdFromPattern(pat, node->varName); + elem->depth = depth; + elem->min = node->min; + elem->max = (node->max == INT_MAX) ? RPR_QUANTITY_INF : node->max; + elem->next = RPR_ELEMIDX_INVALID; + elem->jump = RPR_ELEMIDX_INVALID; + if (node->reluctant) + elem->flags |= RPR_ELEM_RELUCTANT; + (*idx)++; + break; + + case RPR_PATTERN_GROUP: + groupStartIdx = *idx; + + /* Fill group content at increased depth */ + foreach(lc, node->children) + { + fillRPRPattern((RPRPatternNode *) lfirst(lc), pat, idx, depth + 1); + } + + /* Add group end marker if group has non-trivial quantifier */ + if (node->min != 1 || node->max != 1) + { + elem = &pat->elements[*idx]; + memset(elem, 0, sizeof(RPRPatternElement)); + elem->varId = RPR_VARID_END; + elem->depth = depth; /* parent depth for iteration count */ + elem->min = node->min; + elem->max = (node->max == INT_MAX) ? RPR_QUANTITY_INF : node->max; + elem->next = RPR_ELEMIDX_INVALID; + elem->jump = groupStartIdx; /* jump back to group start */ + if (node->reluctant) + elem->flags |= RPR_ELEM_RELUCTANT; + (*idx)++; + } + break; + + case RPR_PATTERN_ALT: + /* Add alternation start marker */ + altStartIdx = *idx; + elem = &pat->elements[*idx]; + memset(elem, 0, sizeof(RPRPatternElement)); + elem->varId = RPR_VARID_ALT; + elem->depth = depth; + elem->min = 1; + elem->max = 1; + elem->next = RPR_ELEMIDX_INVALID; + elem->jump = RPR_ELEMIDX_INVALID; + (*idx)++; + + altBranchStarts = NIL; + altEndPositions = NIL; + + /* Fill each alternative */ + foreach(lc, node->children) + { + RPRPatternNode *alt = (RPRPatternNode *) lfirst(lc); + int branchStart = *idx; + + altBranchStarts = lappend_int(altBranchStarts, branchStart); + + fillRPRPattern(alt, pat, idx, depth + 1); + + /* Record end position if any elements were added */ + if (*idx > branchStart) + altEndPositions = lappend_int(altEndPositions, *idx - 1); + } + + /* Set ALT_START.next to first alternative */ + if (list_length(altBranchStarts) > 0) + pat->elements[altStartIdx].next = linitial_int(altBranchStarts); + + /* Set jump on first element of each alternative to next alternative */ + foreach(lc, altBranchStarts) + { + int firstElemIdx = lfirst_int(lc); + + if (lnext(altBranchStarts, lc) != NULL) + { + int nextAltStart = lfirst_int(lnext(altBranchStarts, lc)); + + pat->elements[firstElemIdx].jump = nextAltStart; + } + /* Last alternative's jump stays as -1 (default) */ + } + + /* Set next on last element of each alternative to after the alternation */ + { + int afterAltIdx = *idx; + + foreach(lc, altEndPositions) + { + int endPos = lfirst_int(lc); + + if (pat->elements[endPos].next == RPR_ELEMIDX_INVALID) + pat->elements[endPos].next = afterAltIdx; + } + } + + list_free(altBranchStarts); + list_free(altEndPositions); + break; + } +} + +/* + * buildRPRPattern + * Build flat pattern element array from AST. + * + * Optimizes the pattern tree, then uses 2-pass: count elements, allocate and fill. + * Returns NULL if pattern is NULL. + * + * This function is called from createplan.c during plan creation. + */ +RPRPattern * +buildRPRPattern(RPRPatternNode *pattern, List *defineVariableList) +{ + RPRPattern *result; + RPRPatternNode *optimized; + char *varNamesStack[RPR_VARID_MAX + 1]; /* stack array for collection */ + int numVars = 0; + int numElements = 0; + RPRDepth maxDepth = 0; + int idx; + int finIdx; + int i; + RPRPatternElement *finElem; + ListCell *lc; + + if (pattern == NULL) + return NULL; + + /* Optimize the pattern tree (planner phase) */ + optimized = optimizeRPRPattern(pattern); + + /* + * First, populate varNames from defineVariableList in order. + * This ensures varId == defineIdx for all defined variables, + * eliminating the need for varIdToDefineIdx mapping. + */ + foreach(lc, defineVariableList) + { + char *varName = strVal(lfirst(lc)); + + if (numVars >= RPR_VARID_MAX) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("too many pattern variables"), + errdetail("Maximum is %d.", RPR_VARID_MAX))); + + varNamesStack[numVars] = varName; + numVars++; + } + + /* + * Pass 1: Single scan to collect variable names and count elements. + * Variables already in varNamesStack (from DEFINE) are skipped. + * Also counts elements and tracks max depth in one traversal. + */ + scanRPRPattern(optimized, varNamesStack, &numVars, + &numElements, 0, &maxDepth); + numElements++; /* +1 for FIN marker */ + + /* Check element count limit */ + if (numElements > RPR_ELEMIDX_MAX) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("pattern too complex"), + errdetail("Pattern has %d elements, maximum is %d.", + numElements, RPR_ELEMIDX_MAX))); + + /* + * Allocate result structure with makeNode for proper NodeTag. + */ + result = makeNode(RPRPattern); + result->numVars = numVars; + result->maxDepth = maxDepth + 1; /* +1: depth is 0-based, need counts[0..maxDepth] */ + result->numElements = numElements; + + /* Build varNames as char** array */ + if (numVars > 0) + { + result->varNames = palloc(numVars * sizeof(char *)); + for (i = 0; i < numVars; i++) + result->varNames[i] = pstrdup(varNamesStack[i]); + } + else + { + result->varNames = NULL; + } + + /* Allocate elements array separately (zero-init for reserved field) */ + result->elements = palloc0(numElements * sizeof(RPRPatternElement)); + + /* + * Pass 2: Fill elements directly into pre-allocated array. + */ + idx = 0; + fillRPRPattern(optimized, result, &idx, 0); + + /* Set up next pointers for elements that don't have one yet */ + finIdx = numElements - 1; /* FIN marker is at the last position */ + for (i = 0; i < finIdx; i++) + { + if (result->elements[i].next == RPR_ELEMIDX_INVALID) + { + /* Point to next element, or to FIN if this is the last */ + result->elements[i].next = (i < finIdx - 1) ? i + 1 : finIdx; + } + } + + /* Add FIN marker at the end */ + finElem = &result->elements[finIdx]; + memset(finElem, 0, sizeof(RPRPatternElement)); + finElem->varId = RPR_VARID_FIN; + finElem->depth = 0; + finElem->min = 1; + finElem->max = 1; + finElem->next = RPR_ELEMIDX_INVALID; + finElem->jump = RPR_ELEMIDX_INVALID; + + /* Compute context absorption eligibility */ + computeAbsorbability(result); + + return result; +} + +/* + * computeAbsorbability + * Determine if pattern supports context absorption optimization. + * + * Context absorption is safe when later matches are guaranteed to be + * suffixes of earlier matches (both row positions AND content). + * + * Sets RPR_ELEM_ABSORBABLE flag on the first element of each absorbable + * branch. A branch is absorbable if: + * - It starts with an unbounded element (greedy-first) + * - It has exactly one unbounded element + * - It's not inside an unbounded GROUP (GREEDY(ALT) case) + * + * pattern->isAbsorbable is set true if ANY branch is absorbable. + */ +static void +computeAbsorbability(RPRPattern *pattern) +{ + int i; + int maxAltDepth = -1; + RPRDepth minUnboundedGroupDepth = UINT8_MAX; + bool hasTopLevelAlt = false; + + pattern->isAbsorbable = false; + + if (pattern->numElements < 2) /* At minimum: one element + FIN */ + return; + + /* + * Scan elements to find ALT structure and unbounded GROUP depth. + */ + for (i = 0; i < pattern->numElements - 1; i++) + { + RPRPatternElement *elem = &pattern->elements[i]; + + if (elem->varId == RPR_VARID_ALT) + { + if (elem->depth > maxAltDepth) + maxAltDepth = elem->depth; + if (i == 0) + hasTopLevelAlt = true; + } + else if (elem->varId == RPR_VARID_END) + { + if (elem->max == INT32_MAX && elem->depth < minUnboundedGroupDepth) + minUnboundedGroupDepth = elem->depth; + } + } + + /* + * Check for GREEDY(ALT) pattern: ALT inside unbounded GROUP. + * In this case, no branch can be absorbable. + */ + if (maxAltDepth >= 0 && maxAltDepth > minUnboundedGroupDepth) + return; + + /* + * For top-level ALT patterns, check each branch separately. + * Set ABSORBABLE flag on branches that qualify. + */ + if (hasTopLevelAlt) + { + int branchStart = 1; /* after ALT marker */ + int patternEnd = pattern->numElements - 1; + + while (branchStart < patternEnd) + { + RPRPatternElement *branchFirst = &pattern->elements[branchStart]; + int branchEnd; + int branchUnbounded = 0; + int j; + bool branchAbsorbable = true; + + /* Find branch end */ + if (branchFirst->jump != RPR_ELEMIDX_INVALID) + branchEnd = branchFirst->jump; + else + branchEnd = patternEnd; + + /* First element of branch must be unbounded */ + if (branchFirst->max != INT32_MAX) + branchAbsorbable = false; + + /* Count unbounded in this branch - must be exactly 1 */ + if (branchAbsorbable) + { + for (j = branchStart; j < branchEnd; j++) + { + RPRPatternElement *elem = &pattern->elements[j]; + + if (elem->varId == RPR_VARID_END) + { + if (elem->max == INT32_MAX) + branchUnbounded++; + } + else if (elem->varId != RPR_VARID_ALT) + { + if (elem->max == INT32_MAX) + branchUnbounded++; + } + } + + if (branchUnbounded != 1) + branchAbsorbable = false; + } + + /* Set flag on branch's first element if absorbable */ + if (branchAbsorbable) + { + branchFirst->flags |= RPR_ELEM_ABSORBABLE; + pattern->isAbsorbable = true; + } + + branchStart = branchEnd; + } + return; + } + + /* + * Non-ALT pattern: check for absorbability. + * + * Case 1: First element is unbounded (A+ B, etc.) + * Case 2: Top-level unbounded GROUP (A B){2,} - GROUP END at depth 0 + * + * In both cases, must have exactly one unbounded element/group. + */ + { + RPRPatternElement *first = &pattern->elements[0]; + int numUnbounded = 0; + bool hasTopLevelUnboundedGroup = false; + RPRElemIdx topLevelGroupEnd = RPR_ELEMIDX_INVALID; + + /* Count unbounded elements and find top-level unbounded GROUP */ + for (i = 0; i < pattern->numElements - 1; i++) + { + RPRPatternElement *elem = &pattern->elements[i]; + + if (elem->varId == RPR_VARID_END) + { + if (elem->max == INT32_MAX) + { + numUnbounded++; + /* Check if this is a top-level GROUP (depth 0) */ + if (elem->depth == 0) + { + hasTopLevelUnboundedGroup = true; + topLevelGroupEnd = i; + } + } + } + else if (elem->varId != RPR_VARID_ALT) + { + if (elem->max == INT32_MAX) + numUnbounded++; + } + } + + /* Must have exactly one unbounded element/group */ + if (numUnbounded != 1) + return; + + /* + * Case 1: First element is unbounded (e.g., A+ B) + */ + if (first->max == INT32_MAX) + { + first->flags |= RPR_ELEM_ABSORBABLE; + pattern->isAbsorbable = true; + return; + } + + /* + * Case 2: Top-level unbounded GROUP that spans entire pattern. + * e.g., (A B){2,} where GROUP END is at the last position before FIN. + * The entire pattern content is inside this unbounded group. + */ + if (hasTopLevelUnboundedGroup && + topLevelGroupEnd == pattern->numElements - 2) + { + first->flags |= RPR_ELEM_ABSORBABLE; + pattern->isAbsorbable = true; + } + } +} diff --git a/src/backend/parser/Makefile b/src/backend/parser/Makefile index 8c0fe28d63f..f4c7d605fe3 100644 --- a/src/backend/parser/Makefile +++ b/src/backend/parser/Makefile @@ -29,6 +29,7 @@ OBJS = \ parse_oper.o \ parse_param.o \ parse_relation.o \ + parse_rpr.o \ parse_target.o \ parse_type.o \ parse_utilcmd.o \ diff --git a/src/backend/parser/README b/src/backend/parser/README index e26eb437a9f..2baffa9517e 100644 --- a/src/backend/parser/README +++ b/src/backend/parser/README @@ -26,6 +26,7 @@ parse_node.c create nodes for various structures parse_oper.c handle operators in expressions parse_param.c handle Params (for the cases used in the core backend) parse_relation.c support routines for tables and column handling +parse_rpr.c handle Row Pattern Recognition parse_target.c handle the result list of the query parse_type.c support routines for data type handling parse_utilcmd.c parse analysis for utility commands (done at execution time) diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 16443e932ab..ffb950ac61d 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -209,6 +209,7 @@ static void preprocess_pub_all_objtype_list(List *all_objects_list, static void preprocess_pubobj_list(List *pubobjspec_list, core_yyscan_t yyscanner); static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); +static RPRPatternNode *makeRPRQuantifier(int min, int max, bool reluctant); %} @@ -684,9 +685,10 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type row_pattern_definition %type opt_row_pattern_common_syntax - row_pattern_term + row_pattern row_pattern_alt row_pattern_seq + row_pattern_term row_pattern_primary + row_pattern_quantifier_opt %type row_pattern_definition_list - row_pattern %type opt_row_pattern_skip_to %type opt_row_pattern_initial_or_seek @@ -900,7 +902,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %nonassoc IDENT PARTITION RANGE ROWS GROUPS PRECEDING FOLLOWING CUBE ROLLUP SET KEYS OBJECT_P SCALAR VALUE_P WITH WITHOUT PATH AFTER INITIAL SEEK PATTERN_P -%left Op OPERATOR /* multi-character ops and user-defined operators */ +%left Op OPERATOR '|' /* multi-character ops and user-defined operators */ %left '+' '-' %left '*' '/' '%' %left '^' @@ -16838,7 +16840,7 @@ opt_row_pattern_skip_to opt_row_pattern_initial_or_seek RPCommonSyntax *n = makeNode(RPCommonSyntax); n->rpSkipTo = $1; n->initial = $2; - n->rpPatterns = $5; + n->rpPattern = (RPRPatternNode *) $5; n->rpDefs = $8; $$ = (Node *) n; } @@ -16874,28 +16876,253 @@ opt_row_pattern_initial_or_seek: ; row_pattern: - row_pattern_term { $$ = list_make1($1); } - | row_pattern row_pattern_term { $$ = lappend($1, $2); } + row_pattern_alt { $$ = $1; } + ; + +row_pattern_alt: + row_pattern_seq { $$ = $1; } + | row_pattern_alt '|' row_pattern_seq + { + RPRPatternNode *n; + /* If left side is already ALT, append to it */ + if (IsA($1, RPRPatternNode) && + ((RPRPatternNode *) $1)->nodeType == RPR_PATTERN_ALT) + { + n = (RPRPatternNode *) $1; + n->children = lappend(n->children, $3); + $$ = (Node *) n; + } + else + { + n = makeNode(RPRPatternNode); + n->nodeType = RPR_PATTERN_ALT; + n->children = list_make2($1, $3); + n->min = 1; + n->max = 1; + n->location = @1; + $$ = (Node *) n; + } + } + ; + +row_pattern_seq: + row_pattern_term { $$ = $1; } + | row_pattern_seq row_pattern_term + { + RPRPatternNode *n; + /* If left side is already SEQ, append to it */ + if (IsA($1, RPRPatternNode) && + ((RPRPatternNode *) $1)->nodeType == RPR_PATTERN_SEQ) + { + n = (RPRPatternNode *) $1; + n->children = lappend(n->children, $2); + $$ = (Node *) n; + } + else + { + n = makeNode(RPRPatternNode); + n->nodeType = RPR_PATTERN_SEQ; + n->children = list_make2($1, $2); + n->min = 1; + n->max = 1; + n->location = @1; + $$ = (Node *) n; + } + } ; row_pattern_term: - ColId { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "", (Node *)makeString($1), NULL, @1); } - | ColId '*' { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "*", (Node *)makeString($1), NULL, @1); } - | ColId '+' { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "+", (Node *)makeString($1), NULL, @1); } -/* - * '?' quantifier. We cannot write this directly "ColId '?'" because '?' is - * not a "self" token. So we let '?' matches Op first then check if it's '?' - * or not. - */ - | ColId Op + row_pattern_primary row_pattern_quantifier_opt + { + RPRPatternNode *n = (RPRPatternNode *) $1; + RPRPatternNode *q = (RPRPatternNode *) $2; + + n->min = q->min; + n->max = q->max; + n->reluctant = q->reluctant; + $$ = (Node *) n; + } + ; + +row_pattern_primary: + ColId { - if (strcmp("?", $2)) + RPRPatternNode *n = makeNode(RPRPatternNode); + n->nodeType = RPR_PATTERN_VAR; + n->varName = $1; + n->min = 1; + n->max = 1; + n->reluctant = false; + n->children = NIL; + n->location = @1; + $$ = (Node *) n; + } + | '(' row_pattern ')' + { + RPRPatternNode *inner = (RPRPatternNode *) $2; + RPRPatternNode *n = makeNode(RPRPatternNode); + n->nodeType = RPR_PATTERN_GROUP; + n->children = list_make1(inner); + n->min = 1; + n->max = 1; + n->reluctant = false; + n->location = @1; + $$ = (Node *) n; + } + ; + +row_pattern_quantifier_opt: + /* EMPTY */ { $$ = (Node *) makeRPRQuantifier(1, 1, false); } + | '*' { $$ = (Node *) makeRPRQuantifier(0, INT_MAX, false); } + | '+' { $$ = (Node *) makeRPRQuantifier(1, INT_MAX, false); } + | Op + { + /* Handle single Op: ? or reluctant quantifiers *?, +?, ?? */ + if (strcmp($1, "?") == 0) + $$ = (Node *) makeRPRQuantifier(0, 1, false); + else if (strcmp($1, "*?") == 0) + $$ = (Node *) makeRPRQuantifier(0, INT_MAX, true); + else if (strcmp($1, "+?") == 0) + $$ = (Node *) makeRPRQuantifier(1, INT_MAX, true); + else if (strcmp($1, "??") == 0) + $$ = (Node *) makeRPRQuantifier(0, 1, true); + else + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unsupported quantifier \"%s\"", $1), + parser_errposition(@1)); + } + /* RELUCTANT quantifiers (when lexer separates tokens) */ + | '*' Op + { + if (strcmp($2, "?") != 0) ereport(ERROR, errcode(ERRCODE_SYNTAX_ERROR), errmsg("unsupported quantifier"), parser_errposition(@2)); - - $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "?", (Node *)makeString($1), NULL, @1); + $$ = (Node *) makeRPRQuantifier(0, INT_MAX, true); + } + | '+' Op + { + if (strcmp($2, "?") != 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unsupported quantifier"), + parser_errposition(@2)); + $$ = (Node *) makeRPRQuantifier(1, INT_MAX, true); + } + | Op Op + { + if (strcmp($1, "?") != 0 || strcmp($2, "?") != 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unsupported quantifier"), + parser_errposition(@1)); + $$ = (Node *) makeRPRQuantifier(0, 1, true); + } + /* {n}, {n,}, {,m}, {n,m} quantifiers */ + | '{' Iconst '}' + { + if ($2 < 0 || $2 >= INT_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must be between 0 and %d", INT_MAX - 1), + parser_errposition(@2)); + $$ = (Node *) makeRPRQuantifier($2, $2, false); + } + | '{' Iconst ',' '}' + { + if ($2 < 0 || $2 >= INT_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must be between 0 and %d", INT_MAX - 1), + parser_errposition(@2)); + $$ = (Node *) makeRPRQuantifier($2, INT_MAX, false); + } + | '{' ',' Iconst '}' + { + if ($3 < 0 || $3 >= INT_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must be between 0 and %d", INT_MAX - 1), + parser_errposition(@3)); + $$ = (Node *) makeRPRQuantifier(0, $3, false); + } + | '{' Iconst ',' Iconst '}' + { + if ($2 < 0 || $4 < 0 || $2 >= INT_MAX || $4 >= INT_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must be between 0 and %d", INT_MAX - 1), + parser_errposition(@2)); + if ($2 > $4) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier minimum bound must not exceed maximum"), + parser_errposition(@2)); + $$ = (Node *) makeRPRQuantifier($2, $4, false); + } + /* Reluctant versions: {n}?, {n,}?, {,m}?, {n,m}? */ + | '{' Iconst '}' Op + { + if (strcmp($4, "?") != 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unsupported quantifier"), + parser_errposition(@4)); + if ($2 < 0 || $2 >= INT_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must be between 0 and %d", INT_MAX - 1), + parser_errposition(@2)); + $$ = (Node *) makeRPRQuantifier($2, $2, true); + } + | '{' Iconst ',' '}' Op + { + if (strcmp($5, "?") != 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unsupported quantifier"), + parser_errposition(@5)); + if ($2 < 0 || $2 >= INT_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must be between 0 and %d", INT_MAX - 1), + parser_errposition(@2)); + $$ = (Node *) makeRPRQuantifier($2, INT_MAX, true); + } + | '{' ',' Iconst '}' Op + { + if (strcmp($5, "?") != 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unsupported quantifier"), + parser_errposition(@5)); + if ($3 < 0 || $3 >= INT_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must be between 0 and %d", INT_MAX - 1), + parser_errposition(@3)); + $$ = (Node *) makeRPRQuantifier(0, $3, true); + } + | '{' Iconst ',' Iconst '}' Op + { + if (strcmp($6, "?") != 0) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unsupported quantifier"), + parser_errposition(@6)); + if ($2 < 0 || $4 < 0 || $2 >= INT_MAX || $4 >= INT_MAX) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier bound must be between 0 and %d", INT_MAX - 1), + parser_errposition(@2)); + if ($2 > $4) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("quantifier minimum bound must not exceed maximum"), + parser_errposition(@2)); + $$ = (Node *) makeRPRQuantifier($2, $4, true); } ; @@ -16958,12 +17185,15 @@ MathOp: '+' { $$ = "+"; } | LESS_EQUALS { $$ = "<="; } | GREATER_EQUALS { $$ = ">="; } | NOT_EQUALS { $$ = "<>"; } + | '|' { $$ = "|"; } ; qual_Op: Op { $$ = list_make1(makeString($1)); } | OPERATOR '(' any_operator ')' { $$ = $3; } + | '|' + { $$ = list_make1(makeString("|")); } ; qual_all_Op: @@ -20085,6 +20315,21 @@ makeRecursiveViewSelect(char *relname, List *aliases, Node *query) return (Node *) s; } +/* + * makeRPRQuantifier + * Create an RPRPatternNode with specified quantifier bounds. + */ +static RPRPatternNode * +makeRPRQuantifier(int min, int max, bool reluctant) +{ + RPRPatternNode *n = makeNode(RPRPatternNode); + + n->min = min; + n->max = max; + n->reluctant = reluctant; + return n; +} + /* parser_init() * Initialize to parse one query string */ diff --git a/src/backend/parser/meson.build b/src/backend/parser/meson.build index 924ee87a453..a07102b37a0 100644 --- a/src/backend/parser/meson.build +++ b/src/backend/parser/meson.build @@ -16,6 +16,7 @@ backend_sources += files( 'parse_oper.c', 'parse_param.c', 'parse_relation.c', + 'parse_rpr.c', 'parse_target.c', 'parse_type.c', 'parse_utilcmd.c', diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index f450cb3310c..f98ac7cfcb5 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -37,6 +37,7 @@ #include "parser/parse_func.h" #include "parser/parse_oper.h" #include "parser/parse_relation.h" +#include "parser/parse_rpr.h" #include "parser/parse_target.h" #include "parser/parse_type.h" #include "parser/parser.h" @@ -84,8 +85,6 @@ static void checkExprIsVarFree(ParseState *pstate, Node *n, const char *constructName); static TargetEntry *findTargetlistEntrySQL92(ParseState *pstate, Node *node, List **tlist, ParseExprKind exprKind); -static TargetEntry *findTargetlistEntrySQL99(ParseState *pstate, Node *node, - List **tlist, ParseExprKind exprKind); static int get_matching_location(int sortgroupref, List *sortgrouprefs, List *exprs); static List *resolve_unique_index_expr(ParseState *pstate, InferClause *infer, @@ -96,12 +95,6 @@ static WindowClause *findWindowClause(List *wclist, const char *name); static Node *transformFrameOffset(ParseState *pstate, int frameOptions, Oid rangeopfamily, Oid rangeopcintype, Oid *inRangeFunc, Node *clause); -static void transformRPR(ParseState *pstate, WindowClause *wc, WindowDef *windef, - List **targetlist); -static List *transformDefineClause(ParseState *pstate, WindowClause *wc, WindowDef *windef, - List **targetlist); -static void transformPatternClause(ParseState *pstate, WindowClause *wc, - WindowDef *windef); /* * transformFromClause - @@ -2173,7 +2166,7 @@ findTargetlistEntrySQL92(ParseState *pstate, Node *node, List **tlist, * tlist the target list (passed by reference so we can append to it) * exprKind identifies clause type being processed */ -static TargetEntry * +TargetEntry * findTargetlistEntrySQL99(ParseState *pstate, Node *node, List **tlist, ParseExprKind exprKind) { @@ -3900,254 +3893,3 @@ transformFrameOffset(ParseState *pstate, int frameOptions, return node; } - -/* - * transformRPR - * Process Row Pattern Recognition related clauses - */ -static void -transformRPR(ParseState *pstate, WindowClause *wc, WindowDef *windef, - List **targetlist) -{ - /* - * Window definition exists? - */ - if (windef == NULL) - return; - - /* - * Row Pattern Common Syntax clause exists? - */ - if (windef->rpCommonSyntax == NULL) - return; - - /* Check Frame option. Frame must start at current row */ - if ((wc->frameOptions & FRAMEOPTION_START_CURRENT_ROW) == 0) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("FRAME must start at current row when row pattern recognition is used"))); - - /* Transform AFTER MACH SKIP TO clause */ - wc->rpSkipTo = windef->rpCommonSyntax->rpSkipTo; - - /* Transform SEEK or INITIAL clause */ - wc->initial = windef->rpCommonSyntax->initial; - - /* Transform DEFINE clause into list of TargetEntry's */ - wc->defineClause = transformDefineClause(pstate, wc, windef, targetlist); - - /* Check PATTERN clause and copy to patternClause */ - transformPatternClause(pstate, wc, windef); -} - -/* - * transformDefineClause - * Process DEFINE clause and transform ResTarget into list of - * TargetEntry. - * - * XXX we only support column reference in row pattern definition search - * condition, e.g. "price". . is not supported, e.g. "A.price". - */ -static List * -transformDefineClause(ParseState *pstate, WindowClause *wc, WindowDef *windef, - List **targetlist) -{ - /* DEFINE variable name initials */ - static const char *defineVariableInitials = "abcdefghijklmnopqrstuvwxyz"; - - ListCell *lc, - *l; - ResTarget *restarget, - *r; - List *restargets; - List *defineClause; - char *name; - int initialLen; - int numinitials; - - /* - * If Row Definition Common Syntax exists, DEFINE clause must exist. (the - * raw parser should have already checked it.) - */ - Assert(windef->rpCommonSyntax->rpDefs != NULL); - - /* - * Check and add "A AS A IS TRUE" if pattern variable is missing in DEFINE - * per the SQL standard. - */ - restargets = NIL; - foreach(lc, windef->rpCommonSyntax->rpPatterns) - { - A_Expr *a; - bool found = false; - - if (!IsA(lfirst(lc), A_Expr)) - ereport(ERROR, - errmsg("node type is not A_Expr")); - - a = (A_Expr *) lfirst(lc); - name = strVal(a->lexpr); - - foreach(l, windef->rpCommonSyntax->rpDefs) - { - restarget = (ResTarget *) lfirst(l); - - if (!strcmp(restarget->name, name)) - { - found = true; - break; - } - } - - if (!found) - { - /* - * "name" is missing. So create "name AS name IS TRUE" ResTarget - * node and add it to the temporary list. - */ - A_Const *n; - - restarget = makeNode(ResTarget); - n = makeNode(A_Const); - n->val.boolval.type = T_Boolean; - n->val.boolval.boolval = true; - n->location = -1; - restarget->name = pstrdup(name); - restarget->indirection = NIL; - restarget->val = (Node *) n; - restarget->location = -1; - restargets = lappend((List *) restargets, restarget); - } - } - - if (list_length(restargets) >= 1) - { - /* add missing DEFINEs */ - windef->rpCommonSyntax->rpDefs = - list_concat(windef->rpCommonSyntax->rpDefs, restargets); - list_free(restargets); - } - - /* - * Check for duplicate row pattern definition variables. The standard - * requires that no two row pattern definition variable names shall be - * equivalent. - */ - restargets = NIL; - numinitials = 0; - initialLen = strlen(defineVariableInitials); - foreach(lc, windef->rpCommonSyntax->rpDefs) - { - char initial[2]; - - restarget = (ResTarget *) lfirst(lc); - name = restarget->name; - - /* - * Add DEFINE expression (Restarget->val) to the targetlist as a - * TargetEntry if it does not exist yet. Planner will add the column - * ref var node to the outer plan's target list later on. This makes - * DEFINE expression could access the outer tuple while evaluating - * PATTERN. - * - * XXX: adding whole expressions of DEFINE to the plan.targetlist is - * not so good, because it's not necessary to evalute the expression - * in the target list while running the plan. We should extract the - * var nodes only then add them to the plan.targetlist. - */ - findTargetlistEntrySQL99(pstate, (Node *) restarget->val, - targetlist, EXPR_KIND_RPR_DEFINE); - - /* - * Make sure that the row pattern definition search condition is a - * boolean expression. - */ - transformWhereClause(pstate, restarget->val, - EXPR_KIND_RPR_DEFINE, "DEFINE"); - - foreach(l, restargets) - { - char *n; - - r = (ResTarget *) lfirst(l); - n = r->name; - - if (!strcmp(n, name)) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("row pattern definition variable name \"%s\" appears more than once in DEFINE clause", - name), - parser_errposition(pstate, exprLocation((Node *) r)))); - } - - /* - * Create list of row pattern DEFINE variable name's initial. We - * assign [a-z] to them (up to 26 variable names are allowed). - */ - if (numinitials >= initialLen) - { - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("number of row pattern definition variable names exceeds %d", - initialLen), - parser_errposition(pstate, - exprLocation((Node *) restarget)))); - } - initial[0] = defineVariableInitials[numinitials++]; - initial[1] = '\0'; - wc->defineInitial = lappend(wc->defineInitial, - makeString(pstrdup(initial))); - - restargets = lappend(restargets, restarget); - } - list_free(restargets); - - /* turns a list of ResTarget's into a list of TargetEntry's */ - defineClause = transformTargetList(pstate, windef->rpCommonSyntax->rpDefs, - EXPR_KIND_RPR_DEFINE); - - /* mark column origins */ - markTargetListOrigins(pstate, defineClause); - - /* mark all nodes in the DEFINE clause tree with collation information */ - assign_expr_collations(pstate, (Node *) defineClause); - - return defineClause; -} - -/* - * transformPatternClause - * Process PATTERN clause and return PATTERN clause in the raw parse tree - * - * windef->rpCommonSyntax must exist. - */ -static void -transformPatternClause(ParseState *pstate, WindowClause *wc, - WindowDef *windef) -{ - ListCell *lc; - - Assert(windef->rpCommonSyntax != NULL); - - wc->patternVariable = NIL; - wc->patternRegexp = NIL; - foreach(lc, windef->rpCommonSyntax->rpPatterns) - { - A_Expr *a; - char *name; - char *regexp; - - if (!IsA(lfirst(lc), A_Expr)) - ereport(ERROR, - errmsg("node type is not A_Expr")); - - a = (A_Expr *) lfirst(lc); - name = strVal(a->lexpr); - - wc->patternVariable = lappend(wc->patternVariable, makeString(pstrdup(name))); - regexp = strVal(lfirst(list_head(a->name))); - - wc->patternRegexp = lappend(wc->patternRegexp, makeString(pstrdup(regexp))); - } -} diff --git a/src/backend/parser/parse_rpr.c b/src/backend/parser/parse_rpr.c new file mode 100644 index 00000000000..de648293b27 --- /dev/null +++ b/src/backend/parser/parse_rpr.c @@ -0,0 +1,340 @@ +/*------------------------------------------------------------------------- + * + * parse_rpr.c + * handle Row Pattern Recognition in parser + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/parser/parse_rpr.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "parser/parse_clause.h" +#include "parser/parse_collate.h" +#include "parser/parse_expr.h" +#include "parser/parse_rpr.h" +#include "parser/parse_target.h" + +/* DEFINE variable name initials */ +static const char *defineVariableInitials = "abcdefghijklmnopqrstuvwxyz"; + +/* Forward declarations */ +static void collectRPRPatternVarNames(RPRPatternNode *node, List **varNames); +static List *transformDefineClause(ParseState *pstate, WindowClause *wc, + WindowDef *windef, List **targetlist); +static void transformPatternClause(ParseState *pstate, WindowClause *wc, + WindowDef *windef); +static RPRPatternNode *copyRPRPatternNode(RPRPatternNode *node); + +/* + * transformRPR + * Process Row Pattern Recognition related clauses + */ +void +transformRPR(ParseState *pstate, WindowClause *wc, WindowDef *windef, + List **targetlist) +{ + /* + * Window definition exists? + */ + if (windef == NULL) + return; + + /* + * Row Pattern Common Syntax clause exists? + */ + if (windef->rpCommonSyntax == NULL) + return; + + /* Check Frame option. Frame must start at current row */ + if ((wc->frameOptions & FRAMEOPTION_START_CURRENT_ROW) == 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("FRAME must start at current row when row pattern recognition is used"))); + + /* Transform AFTER MATCH SKIP TO clause */ + wc->rpSkipTo = windef->rpCommonSyntax->rpSkipTo; + + /* Transform SEEK or INITIAL clause */ + wc->initial = windef->rpCommonSyntax->initial; + + /* Transform DEFINE clause into list of TargetEntry's */ + wc->defineClause = transformDefineClause(pstate, wc, windef, targetlist); + + /* Check PATTERN clause and copy to patternClause */ + transformPatternClause(pstate, wc, windef); +} + +/* + * collectRPRPatternVarNames + * Collect all variable names from RPRPatternNode tree. + */ +static void +collectRPRPatternVarNames(RPRPatternNode *node, List **varNames) +{ + ListCell *lc; + + if (node == NULL) + return; + + switch (node->nodeType) + { + case RPR_PATTERN_VAR: + /* Add variable name if not already in list */ + { + bool found = false; + + foreach(lc, *varNames) + { + if (strcmp(strVal(lfirst(lc)), node->varName) == 0) + { + found = true; + break; + } + } + if (!found) + *varNames = lappend(*varNames, makeString(pstrdup(node->varName))); + } + break; + + case RPR_PATTERN_SEQ: + case RPR_PATTERN_ALT: + case RPR_PATTERN_GROUP: + /* Recurse into children */ + foreach(lc, node->children) + { + collectRPRPatternVarNames((RPRPatternNode *) lfirst(lc), varNames); + } + break; + } +} + +/* + * transformDefineClause + * Process DEFINE clause and transform ResTarget into list of + * TargetEntry. + * + * XXX we only support column reference in row pattern definition search + * condition, e.g. "price". . is not supported, e.g. "A.price". + */ +static List * +transformDefineClause(ParseState *pstate, WindowClause *wc, WindowDef *windef, + List **targetlist) +{ + ListCell *lc, + *l; + ResTarget *restarget, + *r; + List *restargets; + List *defineClause; + char *name; + int initialLen; + int numinitials; + List *patternVarNames = NIL; + + /* + * If Row Definition Common Syntax exists, DEFINE clause must exist. (the + * raw parser should have already checked it.) + */ + Assert(windef->rpCommonSyntax->rpDefs != NULL); + + /* + * Collect all pattern variable names from the pattern tree. + */ + collectRPRPatternVarNames(windef->rpCommonSyntax->rpPattern, &patternVarNames); + + /* + * Check and add "A AS A IS TRUE" if pattern variable is missing in DEFINE + * per the SQL standard. + */ + restargets = NIL; + foreach(lc, patternVarNames) + { + bool found = false; + + name = strVal(lfirst(lc)); + + foreach(l, windef->rpCommonSyntax->rpDefs) + { + restarget = (ResTarget *) lfirst(l); + + if (!strcmp(restarget->name, name)) + { + found = true; + break; + } + } + + if (!found) + { + /* + * "name" is missing. So create "name AS name IS TRUE" ResTarget + * node and add it to the temporary list. + */ + A_Const *n; + + restarget = makeNode(ResTarget); + n = makeNode(A_Const); + n->val.boolval.type = T_Boolean; + n->val.boolval.boolval = true; + n->location = -1; + restarget->name = pstrdup(name); + restarget->indirection = NIL; + restarget->val = (Node *) n; + restarget->location = -1; + restargets = lappend((List *) restargets, restarget); + } + } + + if (list_length(restargets) >= 1) + { + /* add missing DEFINEs */ + windef->rpCommonSyntax->rpDefs = + list_concat(windef->rpCommonSyntax->rpDefs, restargets); + list_free(restargets); + } + + /* + * Check for duplicate row pattern definition variables. The standard + * requires that no two row pattern definition variable names shall be + * equivalent. + */ + restargets = NIL; + numinitials = 0; + initialLen = strlen(defineVariableInitials); + foreach(lc, windef->rpCommonSyntax->rpDefs) + { + char initial[2]; + + restarget = (ResTarget *) lfirst(lc); + name = restarget->name; + + /* + * Add DEFINE expression (Restarget->val) to the targetlist as a + * TargetEntry if it does not exist yet. Planner will add the column + * ref var node to the outer plan's target list later on. This makes + * DEFINE expression could access the outer tuple while evaluating + * PATTERN. + * + * XXX: adding whole expressions of DEFINE to the plan.targetlist is + * not so good, because it's not necessary to evalute the expression + * in the target list while running the plan. We should extract the + * var nodes only then add them to the plan.targetlist. + */ + findTargetlistEntrySQL99(pstate, (Node *) restarget->val, + targetlist, EXPR_KIND_RPR_DEFINE); + + /* + * Make sure that the row pattern definition search condition is a + * boolean expression. + */ + transformWhereClause(pstate, restarget->val, + EXPR_KIND_RPR_DEFINE, "DEFINE"); + + foreach(l, restargets) + { + char *n; + + r = (ResTarget *) lfirst(l); + n = r->name; + + if (!strcmp(n, name)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("row pattern definition variable name \"%s\" appears more than once in DEFINE clause", + name), + parser_errposition(pstate, exprLocation((Node *) r)))); + } + + /* + * Create list of row pattern DEFINE variable name's initial. We + * assign [a-z] to them (up to 26 variable names are allowed). + */ + if (numinitials >= initialLen) + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("number of row pattern definition variable names exceeds %d", + initialLen), + parser_errposition(pstate, + exprLocation((Node *) restarget)))); + } + initial[0] = defineVariableInitials[numinitials++]; + initial[1] = '\0'; + wc->defineInitial = lappend(wc->defineInitial, + makeString(pstrdup(initial))); + + restargets = lappend(restargets, restarget); + } + list_free(restargets); + + /* turns a list of ResTarget's into a list of TargetEntry's */ + defineClause = transformTargetList(pstate, windef->rpCommonSyntax->rpDefs, + EXPR_KIND_RPR_DEFINE); + + /* mark column origins */ + markTargetListOrigins(pstate, defineClause); + + /* mark all nodes in the DEFINE clause tree with collation information */ + assign_expr_collations(pstate, (Node *) defineClause); + + return defineClause; +} + +/* + * transformPatternClause + * Process PATTERN clause and return PATTERN clause in the raw parse tree + * + * windef->rpCommonSyntax must exist. + * + * The original (unoptimized) pattern is stored for deparsing (pg_get_viewdef). + * Optimization happens later in the planner phase (buildRPRPattern). + */ +static void +transformPatternClause(ParseState *pstate, WindowClause *wc, + WindowDef *windef) +{ + Assert(windef->rpCommonSyntax != NULL); + + /* Store original AST for deparsing (no optimization here) */ + wc->rpPattern = copyRPRPatternNode(windef->rpCommonSyntax->rpPattern); +} + +/* + * copyRPRPatternNode + * Deep copy an RPRPatternNode tree. + */ +static RPRPatternNode * +copyRPRPatternNode(RPRPatternNode *node) +{ + RPRPatternNode *copy; + ListCell *lc; + + if (node == NULL) + return NULL; + + copy = makeNode(RPRPatternNode); + copy->nodeType = node->nodeType; + copy->varName = node->varName ? pstrdup(node->varName) : NULL; + copy->min = node->min; + copy->max = node->max; + copy->reluctant = node->reluctant; + copy->children = NIL; + + foreach(lc, node->children) + { + copy->children = lappend(copy->children, + copyRPRPatternNode(lfirst(lc))); + } + + return copy; +} diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index 731d584106d..f6ca2406146 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -363,7 +363,7 @@ not_equals "!=" * If you change either set, adjust the character lists appearing in the * rule for "operator"! */ -self [,()\[\].;\:\+\-\*\/\%\^\<\>\=] +self [,()\[\].;\:\+\-\*\/\%\^\<\>\=\|] op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=] operator {op_chars}+ @@ -955,7 +955,7 @@ other . * that the "self" rule would have. */ if (nchars == 1 && - strchr(",()[].;:+-*/%^<>=", yytext[0])) + strchr(",()[].;:+-*/%^<>=|", yytext[0])) return yytext[0]; /* * Likewise, if what we have left is two chars, and diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 5ce0781edf4..b8a3ee2915b 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -438,10 +438,12 @@ static void get_rule_groupingset(GroupingSet *gset, List *targetlist, bool omit_parens, deparse_context *context); static void get_rule_orderby(List *orderList, List *targetList, bool force_colno, deparse_context *context); -static void get_rule_pattern(List *patternVariable, List *patternRegexp, - bool force_colno, deparse_context *context); -static void get_rule_define(List *defineClause, List *patternVariables, - bool force_colno, deparse_context *context); +static void append_pattern_quantifier(StringInfo buf, RPRPatternNode *node); +static void get_rule_pattern_node(RPRPatternNode *node, deparse_context *context); +static void get_rule_pattern(RPRPatternNode *rpPattern, bool force_colno, + deparse_context *context); +static void get_rule_define(List *defineClause, bool force_colno, + deparse_context *context); static void get_rule_windowclause(Query *query, deparse_context *context); static void get_rule_windowspec(WindowClause *wc, List *targetList, deparse_context *context); @@ -6749,37 +6751,101 @@ get_rule_orderby(List *orderList, List *targetList, } /* - * Display a PATTERN clause. + * Helper function to append quantifier string for pattern node */ static void -get_rule_pattern(List *patternVariable, List *patternRegexp, - bool force_colno, deparse_context *context) +append_pattern_quantifier(StringInfo buf, RPRPatternNode *node) +{ + bool has_quantifier = true; + + if (node->min == 1 && node->max == 1) + { + /* {1,1} = no quantifier */ + has_quantifier = false; + } + else if (node->min == 0 && node->max == INT_MAX) + appendStringInfoChar(buf, '*'); + else if (node->min == 1 && node->max == INT_MAX) + appendStringInfoChar(buf, '+'); + else if (node->min == 0 && node->max == 1) + appendStringInfoChar(buf, '?'); + else if (node->max == INT_MAX) + appendStringInfo(buf, "{%d,}", node->min); + else if (node->min == node->max) + appendStringInfo(buf, "{%d}", node->min); + else + appendStringInfo(buf, "{%d,%d}", node->min, node->max); + + if (node->reluctant && has_quantifier) + appendStringInfoChar(buf, '?'); +} + +/* + * Recursive helper to display RPRPatternNode tree + */ +static void +get_rule_pattern_node(RPRPatternNode *node, deparse_context *context) { StringInfo buf = context->buf; + ListCell *lc; const char *sep; - ListCell *lc_var, - *lc_reg = list_head(patternRegexp); - sep = ""; - appendStringInfoChar(buf, '('); - foreach(lc_var, patternVariable) - { - char *variable = strVal((String *) lfirst(lc_var)); - char *regexp = NULL; + if (node == NULL) + return; - if (lc_reg != NULL) - { - regexp = strVal((String *) lfirst(lc_reg)); + switch (node->nodeType) + { + case RPR_PATTERN_VAR: + appendStringInfoString(buf, node->varName); + append_pattern_quantifier(buf, node); + break; - lc_reg = lnext(patternRegexp, lc_reg); - } + case RPR_PATTERN_SEQ: + sep = ""; + foreach(lc, node->children) + { + appendStringInfoString(buf, sep); + get_rule_pattern_node((RPRPatternNode *) lfirst(lc), context); + sep = " "; + } + break; - appendStringInfo(buf, "%s%s", sep, variable); - if (regexp != NULL) - appendStringInfoString(buf, regexp); + case RPR_PATTERN_ALT: + sep = ""; + foreach(lc, node->children) + { + appendStringInfoString(buf, sep); + get_rule_pattern_node((RPRPatternNode *) lfirst(lc), context); + sep = " | "; + } + break; - sep = " "; + case RPR_PATTERN_GROUP: + appendStringInfoChar(buf, '('); + sep = ""; + foreach(lc, node->children) + { + appendStringInfoString(buf, sep); + get_rule_pattern_node((RPRPatternNode *) lfirst(lc), context); + sep = " "; + } + appendStringInfoChar(buf, ')'); + append_pattern_quantifier(buf, node); + break; } +} + +/* + * Display a PATTERN clause. + */ +static void +get_rule_pattern(RPRPatternNode *rpPattern, bool force_colno, + deparse_context *context) +{ + StringInfo buf = context->buf; + + appendStringInfoChar(buf, '('); + get_rule_pattern_node(rpPattern, context); appendStringInfoChar(buf, ')'); } @@ -6787,8 +6853,7 @@ get_rule_pattern(List *patternVariable, List *patternRegexp, * Display a DEFINE clause. */ static void -get_rule_define(List *defineClause, List *patternVariables, - bool force_colno, deparse_context *context) +get_rule_define(List *defineClause, bool force_colno, deparse_context *context) { StringInfo buf = context->buf; const char *sep; @@ -6922,13 +6987,12 @@ get_rule_windowspec(WindowClause *wc, List *targetList, appendStringInfoString(buf, "\n INITIAL"); needspace = true; } - if (wc->patternVariable) + if (wc->rpPattern) { if (needspace) appendStringInfoChar(buf, ' '); appendStringInfoString(buf, "\n PATTERN "); - get_rule_pattern(wc->patternVariable, wc->patternRegexp, - false, context); + get_rule_pattern(wc->rpPattern, false, context); needspace = true; } @@ -6937,8 +7001,7 @@ get_rule_windowspec(WindowClause *wc, List *targetList, if (needspace) appendStringInfoChar(buf, ' '); appendStringInfoString(buf, "\n DEFINE\n"); - get_rule_define(wc->defineClause, wc->patternVariable, - false, context); + get_rule_define(wc->defineClause, false, context); appendStringInfoChar(buf, ' '); } diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index b87077b47cb..714c0b62700 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -2634,30 +2634,32 @@ typedef enum WindowAggStatus #define RF_UNMATCHED 3 /* - * Allowed PATTERN variables positions. - * Used in RPR. - * - * pos represents the pattern variable defined order in DEFINE caluase. For - * example. "DEFINE START..., UP..., DOWN ..." and "PATTERN START UP DOWN UP" - * will create: - * VariablePos[0].pos[0] = 0; START - * VariablePos[1].pos[0] = 1; UP - * VariablePos[1].pos[1] = 3; UP - * VariablePos[2].pos[0] = 2; DOWN - * - * Note that UP has two pos because UP appears in PATTERN twice. - * - * By using this strucrture, we can know which pattern variable can be followed - * by which pattern variable(s). For example, START can be followed by UP and - * DOWN since START's pos is 0, and UP's pos is 1 or 3, DOWN's pos is 2. - * DOWN can be followed by UP since UP's pos is either 1 or 3. + * RPRNFAState - single NFA state for pattern matching * + * counts[] tracks repetition counts at each nesting depth. + * altPriority tracks lexical order for alternation (lower = earlier alternative). */ -#define NUM_ALPHABETS 26 /* we allow [a-z] variable initials */ -typedef struct VariablePos +typedef struct RPRNFAState { - int pos[NUM_ALPHABETS]; /* postions in PATTERN */ -} VariablePos; + struct RPRNFAState *next; /* next state in linked list */ + int16 elemIdx; /* current pattern element index */ + int16 altPriority; /* lexical order priority (lower = preferred) */ + int16 counts[FLEXIBLE_ARRAY_MEMBER]; /* repetition counts by depth */ +} RPRNFAState; + +/* + * RPRNFAContext - context for NFA pattern matching execution + */ +typedef struct RPRNFAContext +{ + struct RPRNFAContext *next; /* next context in linked list */ + struct RPRNFAContext *prev; /* previous context (for reverse traversal) */ + RPRNFAState *states; /* active states (linked list) */ + + int64 matchStartRow; /* row where match started */ + int64 matchEndRow; /* row where match ended (-1 = no match) */ + RPRNFAState *matchedState; /* FIN state for greedy fallback (cloned) */ +} RPRNFAContext; typedef struct WindowAggState { @@ -2720,18 +2722,21 @@ typedef struct WindowAggState /* these fields are used in Row pattern recognition: */ RPSkipTo rpSkipTo; /* Row Pattern Skip To type */ - List *patternVariableList; /* list of row pattern variables names - * (list of String) */ - List *patternRegexpList; /* list of row pattern regular expressions - * ('+' or ''. list of String) */ + struct RPRPattern *rpPattern; /* compiled pattern for NFA execution */ List *defineVariableList; /* list of row pattern definition * variables (list of String) */ List *defineClauseList; /* expression for row pattern definition * search conditions ExprState list */ List *defineInitial; /* list of row pattern definition variable * initials (list of String) */ - VariablePos *variable_pos; /* list of pattern variable positions */ - StringInfo pattern_str; /* PATTERN initials */ + RPRNFAContext *nfaContext; /* active matching contexts (head) */ + RPRNFAContext *nfaContextTail; /* tail of active contexts (for reverse traversal) */ + RPRNFAContext *nfaContextPending; /* matched but awaiting earlier starts */ + RPRNFAContext *nfaContextFree; /* recycled NFA context nodes */ + RPRNFAState *nfaStateFree; /* recycled NFA state nodes */ + Size nfaStateSize; /* pre-calculated RPRNFAState size */ + bool *nfaVarMatched; /* per-row cache: varMatched[varId] for varId < numDefines */ + int64 nfaLastProcessedRow; /* last row processed by NFA (-1 = none) */ MemoryContext partcontext; /* context for partition-lifespan data */ MemoryContext aggcontext; /* shared context for aggregate working data */ @@ -2772,10 +2777,6 @@ typedef struct WindowAggState */ char *reduced_frame_map; int64 alloc_sz; /* size of the map */ - - /* regular expression compiled result cache. Used for RPR. */ - char *patbuf; /* pattern to be compiled */ - regex_t preg; /* compiled re pattern */ } WindowAggState; /* ---------------- diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index d6da01d8620..094f35248fd 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -588,9 +588,34 @@ typedef enum RPSkipTo ST_PAST_LAST_ROW, /* SKIP TO PAST LAST ROW */ } RPSkipTo; +/* + * RPRPatternNodeType - Row Pattern Recognition pattern node types + */ +typedef enum RPRPatternNodeType +{ + RPR_PATTERN_VAR, /* variable reference */ + RPR_PATTERN_SEQ, /* sequence (concatenation) */ + RPR_PATTERN_ALT, /* alternation (|) */ + RPR_PATTERN_GROUP /* group (parentheses) */ +} RPRPatternNodeType; + +/* + * RPRPatternNode - Row Pattern Recognition pattern AST node + */ +typedef struct RPRPatternNode +{ + NodeTag type; /* T_RPRPatternNode */ + RPRPatternNodeType nodeType; /* VAR, SEQ, ALT, GROUP */ + int min; /* minimum repetitions (0 for *, ?) */ + int max; /* maximum repetitions (INT_MAX for *, +) */ + bool reluctant; /* true for *?, +?, ?? */ + ParseLoc location; /* token location, or -1 */ + char *varName; /* VAR: variable name */ + List *children; /* SEQ, ALT, GROUP: child nodes */ +} RPRPatternNode; + /* * RowPatternCommonSyntax - raw representation of row pattern common syntax - * */ typedef struct RPCommonSyntax { @@ -598,7 +623,7 @@ typedef struct RPCommonSyntax RPSkipTo rpSkipTo; /* Row Pattern AFTER MATCH SKIP type */ bool initial; /* true if is * initial */ - List *rpPatterns; /* PATTERN variables (list of A_Expr) */ + RPRPatternNode *rpPattern; /* PATTERN clause AST */ List *rpDefs; /* row pattern definitions clause (list of * ResTarget) */ } RPCommonSyntax; @@ -1616,8 +1641,8 @@ typedef struct GroupingSet * * "defineClause" is Row Pattern Recognition DEFINE clause (list of * TargetEntry). TargetEntry.resname represents row pattern definition - * variable name. "patternVariable" and "patternRegexp" represents PATTERN - * clause. + * variable name. "rpPattern" represents PATTERN clause as an AST tree + * (RPRPatternNode). * * The information relevant for the query jumbling is the partition clause * type and its bounds. @@ -1656,14 +1681,8 @@ typedef struct WindowClause List *defineClause; /* Row Pattern DEFINE variable initial names (list of String) */ List *defineInitial; - /* Row Pattern PATTERN variable name (list of String) */ - List *patternVariable; - - /* - * Row Pattern PATTERN regular expression quantifier ('+' or ''. list of - * String) - */ - List *patternRegexp; + /* Row Pattern PATTERN clause AST */ + RPRPatternNode *rpPattern; } WindowClause; /* diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 8edc7ead7e3..8d9097a8f4e 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -1224,6 +1224,69 @@ typedef struct Agg List *chain; } Agg; +/* ---------------- + * Row Pattern Recognition compiled pattern types + * ---------------- + */ + +/* Type definitions for RPR pattern elements */ +typedef uint8 RPRVarId; /* pattern variable ID */ +typedef uint8 RPRElemFlags; /* element flags */ +typedef uint8 RPRDepth; /* group nesting depth */ +typedef int32 RPRQuantity; /* quantifier min/max */ +typedef int16 RPRElemIdx; /* element array index */ + +/* + * RPRPatternElement - flat element for NFA pattern matching (16 bytes) + * + * Layout optimized for alignment (no padding holes): + * varId(1) + depth(1) + flags(1) + reserved(1) + min(4) + max(4) + next(2) + jump(2) + */ +typedef struct RPRPatternElement +{ + RPRVarId varId; /* variable ID, or special value for control */ + RPRDepth depth; /* group nesting depth */ + RPRElemFlags flags; /* flags (reluctant, etc.) */ + uint8 reserved; /* reserved padding byte */ + RPRQuantity min; /* quantifier minimum */ + RPRQuantity max; /* quantifier maximum */ + RPRElemIdx next; /* next element index */ + RPRElemIdx jump; /* jump target (for ALT/GROUP) */ +} RPRPatternElement; + +/* + * RPRPattern - compiled pattern for NFA execution + * + * Requires custom copy/out/read functions due to elements array. + */ +typedef struct RPRPattern +{ + pg_node_attr(custom_copy_equal, custom_read_write) + + NodeTag type; /* T_RPRPattern */ + int numVars; /* number of pattern variables */ + char **varNames; /* array of variable names (DEFINE order first) */ + RPRDepth maxDepth; /* maximum group nesting depth */ + int numElements; /* number of elements */ + RPRPatternElement *elements; /* array of pattern elements */ + + /* + * Context absorption optimization. + * + * Absorption is only safe when later matches are guaranteed to be + * suffixes of earlier matches. This requires simple pattern structure: + * + * Case 1: No ALT, single unbounded element (A+, (A B)+) + * Case 2: Top-level ALT with each branch being single unbounded (A+ | B+) + * + * Complex patterns like A B (A B)+ could theoretically be transformed to + * (A B){2,} for absorption, but this changes lexical order and is not + * implemented. Similarly, (A|B)+ cannot be absorbed because different + * start positions produce different match contents (not suffix relation). + */ + bool isAbsorbable; /* true if pattern supports context absorption */ +} RPRPattern; + /* ---------------- * window aggregate node * ---------------- @@ -1297,14 +1360,8 @@ typedef struct WindowAgg /* Row Pattern Recognition AFTER MATCH SKIP clause */ RPSkipTo rpSkipTo; /* Row Pattern Skip To type */ - /* Row Pattern PATTERN variable name (list of String) */ - List *patternVariable; - - /* - * Row Pattern RPATTERN regular expression quantifier ('+' or ''. list of - * String) - */ - List *patternRegexp; + /* Compiled Row Pattern for NFA execution */ + struct RPRPattern *rpPattern; /* Row Pattern DEFINE clause (list of TargetEntry) */ List *defineClause; diff --git a/src/include/optimizer/rpr.h b/src/include/optimizer/rpr.h new file mode 100644 index 00000000000..95324e7a343 --- /dev/null +++ b/src/include/optimizer/rpr.h @@ -0,0 +1,46 @@ +/*------------------------------------------------------------------------- + * + * rpr.h + * Row Pattern Recognition pattern compilation for planner + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/optimizer/rpr.h + * + *------------------------------------------------------------------------- + */ +#ifndef OPTIMIZER_RPR_H +#define OPTIMIZER_RPR_H + +#include "nodes/parsenodes.h" +#include "nodes/plannodes.h" + +/* Limits and special values */ +#define RPR_VARID_MAX 252 /* max pattern variables: 252 */ +#define RPR_DEPTH_MAX UINT8_MAX /* max nesting depth: 255 */ +#define RPR_QUANTITY_INF INT32_MAX /* unbounded quantifier */ +#define RPR_ELEMIDX_MAX INT16_MAX /* max pattern elements */ +#define RPR_ELEMIDX_INVALID ((RPRElemIdx) -1) /* invalid index */ + +/* Special varId values for control elements (253-255) */ +#define RPR_VARID_ALT ((RPRVarId) 253) /* alternation start */ +#define RPR_VARID_END ((RPRVarId) 254) /* group end */ +#define RPR_VARID_FIN ((RPRVarId) 255) /* pattern finish */ + +/* Element flags */ +#define RPR_ELEM_RELUCTANT 0x01 /* reluctant (non-greedy) quantifier */ +#define RPR_ELEM_ABSORBABLE 0x02 /* branch supports context absorption */ + +/* Accessor macros for RPRPatternElement */ +#define RPRElemIsReluctant(e) ((e)->flags & RPR_ELEM_RELUCTANT) +#define RPRElemIsAbsorbable(e) ((e)->flags & RPR_ELEM_ABSORBABLE) +#define RPRElemIsVar(e) ((e)->varId <= RPR_VARID_MAX) +#define RPRElemIsAlt(e) ((e)->varId == RPR_VARID_ALT) +#define RPRElemIsEnd(e) ((e)->varId == RPR_VARID_END) +#define RPRElemIsFin(e) ((e)->varId == RPR_VARID_FIN) +#define RPRElemCanSkip(e) ((e)->min == 0) + +extern RPRPattern *buildRPRPattern(RPRPatternNode *pattern, List *defineVariableList); + +#endif /* OPTIMIZER_RPR_H */ diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h index fe234611007..8aaac881f2b 100644 --- a/src/include/parser/parse_clause.h +++ b/src/include/parser/parse_clause.h @@ -52,6 +52,9 @@ extern List *addTargetToSortList(ParseState *pstate, TargetEntry *tle, extern Index assignSortGroupRef(TargetEntry *tle, List *tlist); extern bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList); +extern TargetEntry *findTargetlistEntrySQL99(ParseState *pstate, Node *node, + List **tlist, ParseExprKind exprKind); + /* functions in parse_jsontable.c */ extern ParseNamespaceItem *transformJsonTable(ParseState *pstate, JsonTable *jt); diff --git a/src/include/parser/parse_rpr.h b/src/include/parser/parse_rpr.h new file mode 100644 index 00000000000..40e0258d2e6 --- /dev/null +++ b/src/include/parser/parse_rpr.h @@ -0,0 +1,22 @@ +/*------------------------------------------------------------------------- + * + * parse_rpr.h + * handle Row Pattern Recognition in parser + * + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/parser/parse_rpr.h + * + *------------------------------------------------------------------------- + */ +#ifndef PARSE_RPR_H +#define PARSE_RPR_H + +#include "parser/parse_node.h" + +extern void transformRPR(ParseState *pstate, WindowClause *wc, + WindowDef *windef, List **targetlist); + +#endif /* PARSE_RPR_H */ diff --git a/src/test/regress/expected/rpr.out b/src/test/regress/expected/rpr.out index cff052a66d7..cea986d6704 100644 --- a/src/test/regress/expected/rpr.out +++ b/src/test/regress/expected/rpr.out @@ -203,715 +203,1847 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER company2 | 07-10-2023 | 1300 | | | (20 rows) --- basic test with none-greedy pattern -SELECT company, tdate, price, count(*) OVER w - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (A A A) - DEFINE - A AS price >= 140 AND price <= 150 -); - company | tdate | price | count -----------+------------+-------+------- - company1 | 07-01-2023 | 100 | 0 - company1 | 07-02-2023 | 200 | 0 - company1 | 07-03-2023 | 150 | 3 - company1 | 07-04-2023 | 140 | 0 - company1 | 07-05-2023 | 150 | 0 - company1 | 07-06-2023 | 90 | 0 - company1 | 07-07-2023 | 110 | 0 - company1 | 07-08-2023 | 130 | 0 - company1 | 07-09-2023 | 120 | 0 - company1 | 07-10-2023 | 130 | 0 - company2 | 07-01-2023 | 50 | 0 - company2 | 07-02-2023 | 2000 | 0 - company2 | 07-03-2023 | 1500 | 0 - company2 | 07-04-2023 | 1400 | 0 - company2 | 07-05-2023 | 1500 | 0 - company2 | 07-06-2023 | 60 | 0 - company2 | 07-07-2023 | 1100 | 0 - company2 | 07-08-2023 | 1300 | 0 - company2 | 07-09-2023 | 1200 | 0 - company2 | 07-10-2023 | 1300 | 0 -(20 rows) - --- last_value() should remain consistent -SELECT company, tdate, price, last_value(price) OVER w +-- test using alternation (|) with sequence +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w FROM stock WINDOW w AS ( PARTITION BY company - ORDER BY tdate ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING INITIAL - PATTERN (START UP+ DOWN+) + PATTERN (START (UP | DOWN)) DEFINE START AS TRUE, UP AS price > PREV(price), DOWN AS price < PREV(price) ); - company | tdate | price | last_value -----------+------------+-------+------------ - company1 | 07-01-2023 | 100 | 140 - company1 | 07-02-2023 | 200 | - company1 | 07-03-2023 | 150 | - company1 | 07-04-2023 | 140 | - company1 | 07-05-2023 | 150 | - company1 | 07-06-2023 | 90 | 120 - company1 | 07-07-2023 | 110 | - company1 | 07-08-2023 | 130 | - company1 | 07-09-2023 | 120 | - company1 | 07-10-2023 | 130 | - company2 | 07-01-2023 | 50 | 1400 - company2 | 07-02-2023 | 2000 | - company2 | 07-03-2023 | 1500 | - company2 | 07-04-2023 | 1400 | - company2 | 07-05-2023 | 1500 | - company2 | 07-06-2023 | 60 | 1200 - company2 | 07-07-2023 | 1100 | - company2 | 07-08-2023 | 1300 | - company2 | 07-09-2023 | 1200 | - company2 | 07-10-2023 | 1300 | -(20 rows) - --- omit "START" in DEFINE but it is ok because "START AS TRUE" is --- implicitly defined. per spec. -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, - nth_value(tdate, 2) OVER w AS nth_second - FROM stock - WINDOW w AS ( - PARTITION BY company - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - INITIAL - PATTERN (START UP+ DOWN+) - DEFINE - UP AS price > PREV(price), - DOWN AS price < PREV(price) -); - company | tdate | price | first_value | last_value | nth_second -----------+------------+-------+-------------+------------+------------ - company1 | 07-01-2023 | 100 | 100 | 140 | 07-02-2023 - company1 | 07-02-2023 | 200 | | | - company1 | 07-03-2023 | 150 | | | - company1 | 07-04-2023 | 140 | | | - company1 | 07-05-2023 | 150 | | | - company1 | 07-06-2023 | 90 | 90 | 120 | 07-07-2023 - company1 | 07-07-2023 | 110 | | | - company1 | 07-08-2023 | 130 | | | - company1 | 07-09-2023 | 120 | | | - company1 | 07-10-2023 | 130 | | | - company2 | 07-01-2023 | 50 | 50 | 1400 | 07-02-2023 - company2 | 07-02-2023 | 2000 | | | - company2 | 07-03-2023 | 1500 | | | - company2 | 07-04-2023 | 1400 | | | - company2 | 07-05-2023 | 1500 | | | - company2 | 07-06-2023 | 60 | 60 | 1200 | 07-07-2023 - company2 | 07-07-2023 | 1100 | | | - company2 | 07-08-2023 | 1300 | | | - company2 | 07-09-2023 | 1200 | | | - company2 | 07-10-2023 | 1300 | | | + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 200 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | 150 | 140 + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | 150 | 90 + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | 110 | 130 + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | 120 | 130 + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 2000 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | 1500 | 1400 + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | 1500 | 60 + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | 1100 | 1300 + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | 1200 | 1300 + company2 | 07-10-2023 | 1300 | | (20 rows) --- the first row start with less than or equal to 100 +-- test using alternation (|) with group quantifier SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w FROM stock WINDOW w AS ( PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING INITIAL - PATTERN (LOWPRICE UP+ DOWN+) + PATTERN (START (UP | DOWN)+) DEFINE - LOWPRICE AS price <= 100, + START AS TRUE, UP AS price > PREV(price), DOWN AS price < PREV(price) ); company | tdate | price | first_value | last_value ----------+------------+-------+-------------+------------ - company1 | 07-01-2023 | 100 | 100 | 140 + company1 | 07-01-2023 | 100 | 100 | 130 company1 | 07-02-2023 | 200 | | company1 | 07-03-2023 | 150 | | company1 | 07-04-2023 | 140 | | company1 | 07-05-2023 | 150 | | - company1 | 07-06-2023 | 90 | 90 | 120 + company1 | 07-06-2023 | 90 | | company1 | 07-07-2023 | 110 | | company1 | 07-08-2023 | 130 | | company1 | 07-09-2023 | 120 | | company1 | 07-10-2023 | 130 | | - company2 | 07-01-2023 | 50 | 50 | 1400 + company2 | 07-01-2023 | 50 | 50 | 1300 company2 | 07-02-2023 | 2000 | | company2 | 07-03-2023 | 1500 | | company2 | 07-04-2023 | 1400 | | company2 | 07-05-2023 | 1500 | | - company2 | 07-06-2023 | 60 | 60 | 1200 + company2 | 07-06-2023 | 60 | | company2 | 07-07-2023 | 1100 | | company2 | 07-08-2023 | 1300 | | company2 | 07-09-2023 | 1200 | | company2 | 07-10-2023 | 1300 | | (20 rows) --- second row raises 120% +-- test using nested alternation SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w FROM stock WINDOW w AS ( PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING INITIAL - PATTERN (LOWPRICE UP+ DOWN+) + PATTERN (START ((UP DOWN) | FLAT)+) DEFINE - LOWPRICE AS price <= 100, - UP AS price > PREV(price) * 1.2, - DOWN AS price < PREV(price) + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price), + FLAT AS price = PREV(price) ); company | tdate | price | first_value | last_value ----------+------------+-------+-------------+------------ - company1 | 07-01-2023 | 100 | 100 | 140 + company1 | 07-01-2023 | 100 | 100 | 150 company1 | 07-02-2023 | 200 | | company1 | 07-03-2023 | 150 | | - company1 | 07-04-2023 | 140 | | + company1 | 07-04-2023 | 140 | 140 | 90 company1 | 07-05-2023 | 150 | | company1 | 07-06-2023 | 90 | | - company1 | 07-07-2023 | 110 | | + company1 | 07-07-2023 | 110 | 110 | 120 company1 | 07-08-2023 | 130 | | company1 | 07-09-2023 | 120 | | company1 | 07-10-2023 | 130 | | - company2 | 07-01-2023 | 50 | 50 | 1400 + company2 | 07-01-2023 | 50 | 50 | 1500 company2 | 07-02-2023 | 2000 | | company2 | 07-03-2023 | 1500 | | - company2 | 07-04-2023 | 1400 | | + company2 | 07-04-2023 | 1400 | 1400 | 60 company2 | 07-05-2023 | 1500 | | company2 | 07-06-2023 | 60 | | - company2 | 07-07-2023 | 1100 | | + company2 | 07-07-2023 | 1100 | 1100 | 1200 company2 | 07-08-2023 | 1300 | | company2 | 07-09-2023 | 1200 | | company2 | 07-10-2023 | 1300 | | (20 rows) --- using NEXT +-- test using group with quantifier SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w FROM stock WINDOW w AS ( PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING INITIAL - PATTERN (START UPDOWN) + PATTERN ((UP DOWN)+) DEFINE - START AS TRUE, - UPDOWN AS price > PREV(price) AND price > NEXT(price) + UP AS price > PREV(price), + DOWN AS price < PREV(price) ); company | tdate | price | first_value | last_value ----------+------------+-------+-------------+------------ - company1 | 07-01-2023 | 100 | 100 | 200 - company1 | 07-02-2023 | 200 | | + company1 | 07-01-2023 | 100 | | + company1 | 07-02-2023 | 200 | 200 | 150 company1 | 07-03-2023 | 150 | | - company1 | 07-04-2023 | 140 | 140 | 150 - company1 | 07-05-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | 150 | 90 company1 | 07-06-2023 | 90 | | - company1 | 07-07-2023 | 110 | 110 | 130 - company1 | 07-08-2023 | 130 | | + company1 | 07-07-2023 | 110 | | + company1 | 07-08-2023 | 130 | 130 | 120 company1 | 07-09-2023 | 120 | | company1 | 07-10-2023 | 130 | | - company2 | 07-01-2023 | 50 | 50 | 2000 - company2 | 07-02-2023 | 2000 | | + company2 | 07-01-2023 | 50 | | + company2 | 07-02-2023 | 2000 | 2000 | 1500 company2 | 07-03-2023 | 1500 | | - company2 | 07-04-2023 | 1400 | 1400 | 1500 - company2 | 07-05-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | 1500 | 60 company2 | 07-06-2023 | 60 | | - company2 | 07-07-2023 | 1100 | 1100 | 1300 - company2 | 07-08-2023 | 1300 | | + company2 | 07-07-2023 | 1100 | | + company2 | 07-08-2023 | 1300 | 1300 | 1200 company2 | 07-09-2023 | 1200 | | company2 | 07-10-2023 | 1300 | | (20 rows) --- using AFTER MATCH SKIP TO NEXT ROW +-- test using absolute threshold values (not relative PREV) +-- HIGH: price > 150, LOW: price < 100, MID: neutral range SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w FROM stock WINDOW w AS ( PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW INITIAL - PATTERN (START UPDOWN) + PATTERN (LOW MID* HIGH) DEFINE - START AS TRUE, - UPDOWN AS price > PREV(price) AND price > NEXT(price) + LOW AS price < 100, + MID AS price >= 100 AND price <= 150, + HIGH AS price > 150 ); company | tdate | price | first_value | last_value ----------+------------+-------+-------------+------------ - company1 | 07-01-2023 | 100 | 100 | 200 + company1 | 07-01-2023 | 100 | | company1 | 07-02-2023 | 200 | | company1 | 07-03-2023 | 150 | | - company1 | 07-04-2023 | 140 | 140 | 150 + company1 | 07-04-2023 | 140 | | company1 | 07-05-2023 | 150 | | company1 | 07-06-2023 | 90 | | - company1 | 07-07-2023 | 110 | 110 | 130 + company1 | 07-07-2023 | 110 | | company1 | 07-08-2023 | 130 | | company1 | 07-09-2023 | 120 | | company1 | 07-10-2023 | 130 | | company2 | 07-01-2023 | 50 | 50 | 2000 company2 | 07-02-2023 | 2000 | | company2 | 07-03-2023 | 1500 | | - company2 | 07-04-2023 | 1400 | 1400 | 1500 + company2 | 07-04-2023 | 1400 | | company2 | 07-05-2023 | 1500 | | - company2 | 07-06-2023 | 60 | | - company2 | 07-07-2023 | 1100 | 1100 | 1300 + company2 | 07-06-2023 | 60 | 60 | 1100 + company2 | 07-07-2023 | 1100 | | company2 | 07-08-2023 | 1300 | | company2 | 07-09-2023 | 1200 | | company2 | 07-10-2023 | 1300 | | (20 rows) --- match everything +-- test threshold-based pattern with alternation SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w FROM stock WINDOW w AS ( PARTITION BY company - ORDER BY tdate ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW INITIAL - PATTERN (A+) + PATTERN (LOW (MID | HIGH)+) DEFINE - A AS TRUE + LOW AS price < 100, + MID AS price >= 100 AND price <= 150, + HIGH AS price > 150 ); company | tdate | price | first_value | last_value ----------+------------+-------+-------------+------------ - company1 | 07-01-2023 | 100 | 100 | 130 + company1 | 07-01-2023 | 100 | | company1 | 07-02-2023 | 200 | | company1 | 07-03-2023 | 150 | | company1 | 07-04-2023 | 140 | | company1 | 07-05-2023 | 150 | | - company1 | 07-06-2023 | 90 | | + company1 | 07-06-2023 | 90 | 90 | 130 company1 | 07-07-2023 | 110 | | company1 | 07-08-2023 | 130 | | company1 | 07-09-2023 | 120 | | company1 | 07-10-2023 | 130 | | - company2 | 07-01-2023 | 50 | 50 | 1300 + company2 | 07-01-2023 | 50 | 50 | 1500 company2 | 07-02-2023 | 2000 | | company2 | 07-03-2023 | 1500 | | company2 | 07-04-2023 | 1400 | | company2 | 07-05-2023 | 1500 | | - company2 | 07-06-2023 | 60 | | + company2 | 07-06-2023 | 60 | 60 | 1300 company2 | 07-07-2023 | 1100 | | company2 | 07-08-2023 | 1300 | | company2 | 07-09-2023 | 1200 | | company2 | 07-10-2023 | 1300 | | (20 rows) --- backtracking with reclassification of rows --- using AFTER MATCH SKIP PAST LAST ROW -SELECT company, tdate, price, first_value(tdate) OVER w, last_value(tdate) OVER w +-- basic test with none-greedy pattern +SELECT company, tdate, price, count(*) OVER w FROM stock WINDOW w AS ( PARTITION BY company - ORDER BY tdate ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW INITIAL - PATTERN (A+ B+) + PATTERN (A A A) DEFINE - A AS price > 100, - B AS price > 100 + A AS price >= 140 AND price <= 150 ); - company | tdate | price | first_value | last_value -----------+------------+-------+-------------+------------ - company1 | 07-01-2023 | 100 | | - company1 | 07-02-2023 | 200 | 07-02-2023 | 07-05-2023 - company1 | 07-03-2023 | 150 | | - company1 | 07-04-2023 | 140 | | - company1 | 07-05-2023 | 150 | | - company1 | 07-06-2023 | 90 | | - company1 | 07-07-2023 | 110 | 07-07-2023 | 07-10-2023 - company1 | 07-08-2023 | 130 | | - company1 | 07-09-2023 | 120 | | - company1 | 07-10-2023 | 130 | | - company2 | 07-01-2023 | 50 | | - company2 | 07-02-2023 | 2000 | 07-02-2023 | 07-05-2023 - company2 | 07-03-2023 | 1500 | | - company2 | 07-04-2023 | 1400 | | - company2 | 07-05-2023 | 1500 | | - company2 | 07-06-2023 | 60 | | - company2 | 07-07-2023 | 1100 | 07-07-2023 | 07-10-2023 - company2 | 07-08-2023 | 1300 | | - company2 | 07-09-2023 | 1200 | | - company2 | 07-10-2023 | 1300 | | + company | tdate | price | count +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | 0 + company1 | 07-02-2023 | 200 | 0 + company1 | 07-03-2023 | 150 | 3 + company1 | 07-04-2023 | 140 | 0 + company1 | 07-05-2023 | 150 | 0 + company1 | 07-06-2023 | 90 | 0 + company1 | 07-07-2023 | 110 | 0 + company1 | 07-08-2023 | 130 | 0 + company1 | 07-09-2023 | 120 | 0 + company1 | 07-10-2023 | 130 | 0 + company2 | 07-01-2023 | 50 | 0 + company2 | 07-02-2023 | 2000 | 0 + company2 | 07-03-2023 | 1500 | 0 + company2 | 07-04-2023 | 1400 | 0 + company2 | 07-05-2023 | 1500 | 0 + company2 | 07-06-2023 | 60 | 0 + company2 | 07-07-2023 | 1100 | 0 + company2 | 07-08-2023 | 1300 | 0 + company2 | 07-09-2023 | 1200 | 0 + company2 | 07-10-2023 | 1300 | 0 (20 rows) --- backtracking with reclassification of rows --- using AFTER MATCH SKIP TO NEXT ROW -SELECT company, tdate, price, first_value(tdate) OVER w, last_value(tdate) OVER w +-- test using {n} quantifier (A A A should be optimized to A{3}) +SELECT company, tdate, price, count(*) OVER w FROM stock WINDOW w AS ( PARTITION BY company - ORDER BY tdate ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP TO NEXT ROW INITIAL - PATTERN (A+ B+) + PATTERN (A{3}) DEFINE - A AS price > 100, - B AS price > 100 + A AS price >= 140 AND price <= 150 ); - company | tdate | price | first_value | last_value -----------+------------+-------+-------------+------------ - company1 | 07-01-2023 | 100 | | - company1 | 07-02-2023 | 200 | 07-02-2023 | 07-05-2023 - company1 | 07-03-2023 | 150 | 07-03-2023 | 07-05-2023 - company1 | 07-04-2023 | 140 | 07-04-2023 | 07-05-2023 - company1 | 07-05-2023 | 150 | | - company1 | 07-06-2023 | 90 | | - company1 | 07-07-2023 | 110 | 07-07-2023 | 07-10-2023 - company1 | 07-08-2023 | 130 | 07-08-2023 | 07-10-2023 - company1 | 07-09-2023 | 120 | 07-09-2023 | 07-10-2023 - company1 | 07-10-2023 | 130 | | - company2 | 07-01-2023 | 50 | | - company2 | 07-02-2023 | 2000 | 07-02-2023 | 07-05-2023 - company2 | 07-03-2023 | 1500 | 07-03-2023 | 07-05-2023 - company2 | 07-04-2023 | 1400 | 07-04-2023 | 07-05-2023 - company2 | 07-05-2023 | 1500 | | - company2 | 07-06-2023 | 60 | | - company2 | 07-07-2023 | 1100 | 07-07-2023 | 07-10-2023 - company2 | 07-08-2023 | 1300 | 07-08-2023 | 07-10-2023 - company2 | 07-09-2023 | 1200 | 07-09-2023 | 07-10-2023 - company2 | 07-10-2023 | 1300 | | + company | tdate | price | count +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | 0 + company1 | 07-02-2023 | 200 | 0 + company1 | 07-03-2023 | 150 | 3 + company1 | 07-04-2023 | 140 | 0 + company1 | 07-05-2023 | 150 | 0 + company1 | 07-06-2023 | 90 | 0 + company1 | 07-07-2023 | 110 | 0 + company1 | 07-08-2023 | 130 | 0 + company1 | 07-09-2023 | 120 | 0 + company1 | 07-10-2023 | 130 | 0 + company2 | 07-01-2023 | 50 | 0 + company2 | 07-02-2023 | 2000 | 0 + company2 | 07-03-2023 | 1500 | 0 + company2 | 07-04-2023 | 1400 | 0 + company2 | 07-05-2023 | 1500 | 0 + company2 | 07-06-2023 | 60 | 0 + company2 | 07-07-2023 | 1100 | 0 + company2 | 07-08-2023 | 1300 | 0 + company2 | 07-09-2023 | 1200 | 0 + company2 | 07-10-2023 | 1300 | 0 (20 rows) --- ROWS BETWEEN CURRENT ROW AND offset FOLLOWING -SELECT company, tdate, price, first_value(tdate) OVER w, last_value(tdate) OVER w, - count(*) OVER w +-- test using {n,} quantifier (2 or more) +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,}) + DEFINE + A AS price > 100 +); + company | tdate | price | count +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | 0 + company1 | 07-02-2023 | 200 | 4 + company1 | 07-03-2023 | 150 | 0 + company1 | 07-04-2023 | 140 | 0 + company1 | 07-05-2023 | 150 | 0 + company1 | 07-06-2023 | 90 | 0 + company1 | 07-07-2023 | 110 | 4 + company1 | 07-08-2023 | 130 | 0 + company1 | 07-09-2023 | 120 | 0 + company1 | 07-10-2023 | 130 | 0 + company2 | 07-01-2023 | 50 | 0 + company2 | 07-02-2023 | 2000 | 4 + company2 | 07-03-2023 | 1500 | 0 + company2 | 07-04-2023 | 1400 | 0 + company2 | 07-05-2023 | 1500 | 0 + company2 | 07-06-2023 | 60 | 0 + company2 | 07-07-2023 | 1100 | 4 + company2 | 07-08-2023 | 1300 | 0 + company2 | 07-09-2023 | 1200 | 0 + company2 | 07-10-2023 | 1300 | 0 +(20 rows) + +-- test using {n,m} quantifier (2 to 4) +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,4}) + DEFINE + A AS price > 100 +); + company | tdate | price | count +----------+------------+-------+------- + company1 | 07-01-2023 | 100 | 0 + company1 | 07-02-2023 | 200 | 4 + company1 | 07-03-2023 | 150 | 0 + company1 | 07-04-2023 | 140 | 0 + company1 | 07-05-2023 | 150 | 0 + company1 | 07-06-2023 | 90 | 0 + company1 | 07-07-2023 | 110 | 4 + company1 | 07-08-2023 | 130 | 0 + company1 | 07-09-2023 | 120 | 0 + company1 | 07-10-2023 | 130 | 0 + company2 | 07-01-2023 | 50 | 0 + company2 | 07-02-2023 | 2000 | 4 + company2 | 07-03-2023 | 1500 | 0 + company2 | 07-04-2023 | 1400 | 0 + company2 | 07-05-2023 | 1500 | 0 + company2 | 07-06-2023 | 60 | 0 + company2 | 07-07-2023 | 1100 | 4 + company2 | 07-08-2023 | 1300 | 0 + company2 | 07-09-2023 | 1200 | 0 + company2 | 07-10-2023 | 1300 | 0 +(20 rows) + +-- last_value() should remain consistent +SELECT company, tdate, price, last_value(price) OVER w FROM stock WINDOW w AS ( PARTITION BY company ORDER BY tdate - ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING - AFTER MATCH SKIP PAST LAST ROW + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL PATTERN (START UP+ DOWN+) DEFINE START AS TRUE, UP AS price > PREV(price), DOWN AS price < PREV(price) ); - company | tdate | price | first_value | last_value | count -----------+------------+-------+-------------+------------+------- - company1 | 07-01-2023 | 100 | 07-01-2023 | 07-03-2023 | 3 - company1 | 07-02-2023 | 200 | | | 0 - company1 | 07-03-2023 | 150 | | | 0 - company1 | 07-04-2023 | 140 | 07-04-2023 | 07-06-2023 | 3 - company1 | 07-05-2023 | 150 | | | 0 - company1 | 07-06-2023 | 90 | | | 0 - company1 | 07-07-2023 | 110 | 07-07-2023 | 07-09-2023 | 3 - company1 | 07-08-2023 | 130 | | | 0 - company1 | 07-09-2023 | 120 | | | 0 - company1 | 07-10-2023 | 130 | | | 0 - company2 | 07-01-2023 | 50 | 07-01-2023 | 07-03-2023 | 3 - company2 | 07-02-2023 | 2000 | | | 0 - company2 | 07-03-2023 | 1500 | | | 0 - company2 | 07-04-2023 | 1400 | 07-04-2023 | 07-06-2023 | 3 - company2 | 07-05-2023 | 1500 | | | 0 - company2 | 07-06-2023 | 60 | | | 0 - company2 | 07-07-2023 | 1100 | 07-07-2023 | 07-09-2023 | 3 - company2 | 07-08-2023 | 1300 | | | 0 - company2 | 07-09-2023 | 1200 | | | 0 - company2 | 07-10-2023 | 1300 | | | 0 + company | tdate | price | last_value +----------+------------+-------+------------ + company1 | 07-01-2023 | 100 | 140 + company1 | 07-02-2023 | 200 | + company1 | 07-03-2023 | 150 | + company1 | 07-04-2023 | 140 | + company1 | 07-05-2023 | 150 | + company1 | 07-06-2023 | 90 | 120 + company1 | 07-07-2023 | 110 | + company1 | 07-08-2023 | 130 | + company1 | 07-09-2023 | 120 | + company1 | 07-10-2023 | 130 | + company2 | 07-01-2023 | 50 | 1400 + company2 | 07-02-2023 | 2000 | + company2 | 07-03-2023 | 1500 | + company2 | 07-04-2023 | 1400 | + company2 | 07-05-2023 | 1500 | + company2 | 07-06-2023 | 60 | 1200 + company2 | 07-07-2023 | 1100 | + company2 | 07-08-2023 | 1300 | + company2 | 07-09-2023 | 1200 | + company2 | 07-10-2023 | 1300 | (20 rows) --- --- Aggregates --- --- using AFTER MATCH SKIP PAST LAST ROW -SELECT company, tdate, price, - first_value(price) OVER w, - last_value(price) OVER w, - max(price) OVER w, - min(price) OVER w, - sum(price) OVER w, - avg(price) OVER w, - count(price) OVER w -FROM stock -WINDOW w AS ( -PARTITION BY company -ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING -AFTER MATCH SKIP PAST LAST ROW -INITIAL -PATTERN (START UP+ DOWN+) -DEFINE -START AS TRUE, -UP AS price > PREV(price), -DOWN AS price < PREV(price) +-- omit "START" in DEFINE but it is ok because "START AS TRUE" is +-- implicitly defined. per spec. +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, + nth_value(tdate, 2) OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + UP AS price > PREV(price), + DOWN AS price < PREV(price) ); - company | tdate | price | first_value | last_value | max | min | sum | avg | count -----------+------------+-------+-------------+------------+------+-----+------+-----------------------+------- - company1 | 07-01-2023 | 100 | 100 | 140 | 200 | 100 | 590 | 147.5000000000000000 | 4 - company1 | 07-02-2023 | 200 | | | | | | | 0 - company1 | 07-03-2023 | 150 | | | | | | | 0 - company1 | 07-04-2023 | 140 | | | | | | | 0 - company1 | 07-05-2023 | 150 | | | | | | | 0 - company1 | 07-06-2023 | 90 | 90 | 120 | 130 | 90 | 450 | 112.5000000000000000 | 4 - company1 | 07-07-2023 | 110 | | | | | | | 0 - company1 | 07-08-2023 | 130 | | | | | | | 0 - company1 | 07-09-2023 | 120 | | | | | | | 0 - company1 | 07-10-2023 | 130 | | | | | | | 0 - company2 | 07-01-2023 | 50 | 50 | 1400 | 2000 | 50 | 4950 | 1237.5000000000000000 | 4 - company2 | 07-02-2023 | 2000 | | | | | | | 0 - company2 | 07-03-2023 | 1500 | | | | | | | 0 - company2 | 07-04-2023 | 1400 | | | | | | | 0 - company2 | 07-05-2023 | 1500 | | | | | | | 0 - company2 | 07-06-2023 | 60 | 60 | 1200 | 1300 | 60 | 3660 | 915.0000000000000000 | 4 - company2 | 07-07-2023 | 1100 | | | | | | | 0 - company2 | 07-08-2023 | 1300 | | | | | | | 0 - company2 | 07-09-2023 | 1200 | | | | | | | 0 - company2 | 07-10-2023 | 1300 | | | | | | | 0 + company | tdate | price | first_value | last_value | nth_second +----------+------------+-------+-------------+------------+------------ + company1 | 07-01-2023 | 100 | 100 | 140 | 07-02-2023 + company1 | 07-02-2023 | 200 | | | + company1 | 07-03-2023 | 150 | | | + company1 | 07-04-2023 | 140 | | | + company1 | 07-05-2023 | 150 | | | + company1 | 07-06-2023 | 90 | 90 | 120 | 07-07-2023 + company1 | 07-07-2023 | 110 | | | + company1 | 07-08-2023 | 130 | | | + company1 | 07-09-2023 | 120 | | | + company1 | 07-10-2023 | 130 | | | + company2 | 07-01-2023 | 50 | 50 | 1400 | 07-02-2023 + company2 | 07-02-2023 | 2000 | | | + company2 | 07-03-2023 | 1500 | | | + company2 | 07-04-2023 | 1400 | | | + company2 | 07-05-2023 | 1500 | | | + company2 | 07-06-2023 | 60 | 60 | 1200 | 07-07-2023 + company2 | 07-07-2023 | 1100 | | | + company2 | 07-08-2023 | 1300 | | | + company2 | 07-09-2023 | 1200 | | | + company2 | 07-10-2023 | 1300 | | | (20 rows) --- using AFTER MATCH SKIP TO NEXT ROW -SELECT company, tdate, price, - first_value(price) OVER w, - last_value(price) OVER w, - max(price) OVER w, - min(price) OVER w, - sum(price) OVER w, - avg(price) OVER w, - count(price) OVER w -FROM stock -WINDOW w AS ( -PARTITION BY company -ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING -AFTER MATCH SKIP TO NEXT ROW -INITIAL -PATTERN (START UP+ DOWN+) -DEFINE -START AS TRUE, -UP AS price > PREV(price), -DOWN AS price < PREV(price) -); - company | tdate | price | first_value | last_value | max | min | sum | avg | count -----------+------------+-------+-------------+------------+------+------+------+-----------------------+------- - company1 | 07-01-2023 | 100 | 100 | 140 | 200 | 100 | 590 | 147.5000000000000000 | 4 - company1 | 07-02-2023 | 200 | | | | | | | 0 - company1 | 07-03-2023 | 150 | | | | | | | 0 - company1 | 07-04-2023 | 140 | 140 | 90 | 150 | 90 | 380 | 126.6666666666666667 | 3 - company1 | 07-05-2023 | 150 | | | | | | | 0 - company1 | 07-06-2023 | 90 | 90 | 120 | 130 | 90 | 450 | 112.5000000000000000 | 4 - company1 | 07-07-2023 | 110 | 110 | 120 | 130 | 110 | 360 | 120.0000000000000000 | 3 - company1 | 07-08-2023 | 130 | | | | | | | 0 - company1 | 07-09-2023 | 120 | | | | | | | 0 - company1 | 07-10-2023 | 130 | | | | | | | 0 - company2 | 07-01-2023 | 50 | 50 | 1400 | 2000 | 50 | 4950 | 1237.5000000000000000 | 4 - company2 | 07-02-2023 | 2000 | | | | | | | 0 - company2 | 07-03-2023 | 1500 | | | | | | | 0 - company2 | 07-04-2023 | 1400 | 1400 | 60 | 1500 | 60 | 2960 | 986.6666666666666667 | 3 - company2 | 07-05-2023 | 1500 | | | | | | | 0 - company2 | 07-06-2023 | 60 | 60 | 1200 | 1300 | 60 | 3660 | 915.0000000000000000 | 4 - company2 | 07-07-2023 | 1100 | 1100 | 1200 | 1300 | 1100 | 3600 | 1200.0000000000000000 | 3 - company2 | 07-08-2023 | 1300 | | | | | | | 0 - company2 | 07-09-2023 | 1200 | | | | | | | 0 - company2 | 07-10-2023 | 1300 | | | | | | | 0 +-- the first row start with less than or equal to 100 +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (LOWPRICE UP+ DOWN+) + DEFINE + LOWPRICE AS price <= 100, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 140 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | 90 | 120 + company1 | 07-07-2023 | 110 | | + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 1400 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | 60 | 1200 + company2 | 07-07-2023 | 1100 | | + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | (20 rows) --- JOIN case -CREATE TEMP TABLE t1 (i int, v1 int); -CREATE TEMP TABLE t2 (j int, v2 int); -INSERT INTO t1 VALUES(1,10); -INSERT INTO t1 VALUES(1,11); -INSERT INTO t1 VALUES(1,12); -INSERT INTO t2 VALUES(2,10); -INSERT INTO t2 VALUES(2,11); -INSERT INTO t2 VALUES(2,12); -SELECT * FROM t1, t2 WHERE t1.v1 <= 11 AND t2.v2 <= 11; - i | v1 | j | v2 ----+----+---+---- - 1 | 10 | 2 | 10 - 1 | 10 | 2 | 11 - 1 | 11 | 2 | 10 - 1 | 11 | 2 | 11 -(4 rows) +-- second row raises 120% +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (LOWPRICE UP+ DOWN+) + DEFINE + LOWPRICE AS price <= 100, + UP AS price > PREV(price) * 1.2, + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 140 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | | + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 1400 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | | + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- using NEXT +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UPDOWN) + DEFINE + START AS TRUE, + UPDOWN AS price > PREV(price) AND price > NEXT(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 200 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | 140 | 150 + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | 110 | 130 + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 2000 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | 1400 | 1500 + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | 1100 | 1300 + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- using AFTER MATCH SKIP TO NEXT ROW +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + INITIAL + PATTERN (START UPDOWN) + DEFINE + START AS TRUE, + UPDOWN AS price > PREV(price) AND price > NEXT(price) +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 200 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | 140 | 150 + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | 110 | 130 + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 2000 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | 1400 | 1500 + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | 1100 | 1300 + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- match everything +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (A+) + DEFINE + A AS TRUE +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | 100 | 130 + company1 | 07-02-2023 | 200 | | + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | | + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | 50 | 1300 + company2 | 07-02-2023 | 2000 | | + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | | + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- backtracking with reclassification of rows +-- using AFTER MATCH SKIP PAST LAST ROW +SELECT company, tdate, price, first_value(tdate) OVER w, last_value(tdate) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (A+ B+) + DEFINE + A AS price > 100, + B AS price > 100 +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | | + company1 | 07-02-2023 | 200 | 07-02-2023 | 07-05-2023 + company1 | 07-03-2023 | 150 | | + company1 | 07-04-2023 | 140 | | + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | 07-07-2023 | 07-10-2023 + company1 | 07-08-2023 | 130 | | + company1 | 07-09-2023 | 120 | | + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | | + company2 | 07-02-2023 | 2000 | 07-02-2023 | 07-05-2023 + company2 | 07-03-2023 | 1500 | | + company2 | 07-04-2023 | 1400 | | + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | 07-07-2023 | 07-10-2023 + company2 | 07-08-2023 | 1300 | | + company2 | 07-09-2023 | 1200 | | + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- backtracking with reclassification of rows +-- using AFTER MATCH SKIP TO NEXT ROW +SELECT company, tdate, price, first_value(tdate) OVER w, last_value(tdate) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + INITIAL + PATTERN (A+ B+) + DEFINE + A AS price > 100, + B AS price > 100 +); + company | tdate | price | first_value | last_value +----------+------------+-------+-------------+------------ + company1 | 07-01-2023 | 100 | | + company1 | 07-02-2023 | 200 | 07-02-2023 | 07-05-2023 + company1 | 07-03-2023 | 150 | 07-03-2023 | 07-05-2023 + company1 | 07-04-2023 | 140 | 07-04-2023 | 07-05-2023 + company1 | 07-05-2023 | 150 | | + company1 | 07-06-2023 | 90 | | + company1 | 07-07-2023 | 110 | 07-07-2023 | 07-10-2023 + company1 | 07-08-2023 | 130 | 07-08-2023 | 07-10-2023 + company1 | 07-09-2023 | 120 | 07-09-2023 | 07-10-2023 + company1 | 07-10-2023 | 130 | | + company2 | 07-01-2023 | 50 | | + company2 | 07-02-2023 | 2000 | 07-02-2023 | 07-05-2023 + company2 | 07-03-2023 | 1500 | 07-03-2023 | 07-05-2023 + company2 | 07-04-2023 | 1400 | 07-04-2023 | 07-05-2023 + company2 | 07-05-2023 | 1500 | | + company2 | 07-06-2023 | 60 | | + company2 | 07-07-2023 | 1100 | 07-07-2023 | 07-10-2023 + company2 | 07-08-2023 | 1300 | 07-08-2023 | 07-10-2023 + company2 | 07-09-2023 | 1200 | 07-09-2023 | 07-10-2023 + company2 | 07-10-2023 | 1300 | | +(20 rows) + +-- ROWS BETWEEN CURRENT ROW AND offset FOLLOWING +SELECT company, tdate, price, first_value(tdate) OVER w, last_value(tdate) OVER w, + count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value | count +----------+------------+-------+-------------+------------+------- + company1 | 07-01-2023 | 100 | 07-01-2023 | 07-03-2023 | 3 + company1 | 07-02-2023 | 200 | | | 0 + company1 | 07-03-2023 | 150 | | | 0 + company1 | 07-04-2023 | 140 | 07-04-2023 | 07-06-2023 | 3 + company1 | 07-05-2023 | 150 | | | 0 + company1 | 07-06-2023 | 90 | | | 0 + company1 | 07-07-2023 | 110 | 07-07-2023 | 07-09-2023 | 3 + company1 | 07-08-2023 | 130 | | | 0 + company1 | 07-09-2023 | 120 | | | 0 + company1 | 07-10-2023 | 130 | | | 0 + company2 | 07-01-2023 | 50 | 07-01-2023 | 07-03-2023 | 3 + company2 | 07-02-2023 | 2000 | | | 0 + company2 | 07-03-2023 | 1500 | | | 0 + company2 | 07-04-2023 | 1400 | 07-04-2023 | 07-06-2023 | 3 + company2 | 07-05-2023 | 1500 | | | 0 + company2 | 07-06-2023 | 60 | | | 0 + company2 | 07-07-2023 | 1100 | 07-07-2023 | 07-09-2023 | 3 + company2 | 07-08-2023 | 1300 | | | 0 + company2 | 07-09-2023 | 1200 | | | 0 + company2 | 07-10-2023 | 1300 | | | 0 +(20 rows) + +-- +-- Aggregates +-- +-- using AFTER MATCH SKIP PAST LAST ROW +SELECT company, tdate, price, + first_value(price) OVER w, + last_value(price) OVER w, + max(price) OVER w, + min(price) OVER w, + sum(price) OVER w, + avg(price) OVER w, + count(price) OVER w +FROM stock +WINDOW w AS ( +PARTITION BY company +ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING +AFTER MATCH SKIP PAST LAST ROW +INITIAL +PATTERN (START UP+ DOWN+) +DEFINE +START AS TRUE, +UP AS price > PREV(price), +DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value | max | min | sum | avg | count +----------+------------+-------+-------------+------------+------+-----+------+-----------------------+------- + company1 | 07-01-2023 | 100 | 100 | 140 | 200 | 100 | 590 | 147.5000000000000000 | 4 + company1 | 07-02-2023 | 200 | | | | | | | 0 + company1 | 07-03-2023 | 150 | | | | | | | 0 + company1 | 07-04-2023 | 140 | | | | | | | 0 + company1 | 07-05-2023 | 150 | | | | | | | 0 + company1 | 07-06-2023 | 90 | 90 | 120 | 130 | 90 | 450 | 112.5000000000000000 | 4 + company1 | 07-07-2023 | 110 | | | | | | | 0 + company1 | 07-08-2023 | 130 | | | | | | | 0 + company1 | 07-09-2023 | 120 | | | | | | | 0 + company1 | 07-10-2023 | 130 | | | | | | | 0 + company2 | 07-01-2023 | 50 | 50 | 1400 | 2000 | 50 | 4950 | 1237.5000000000000000 | 4 + company2 | 07-02-2023 | 2000 | | | | | | | 0 + company2 | 07-03-2023 | 1500 | | | | | | | 0 + company2 | 07-04-2023 | 1400 | | | | | | | 0 + company2 | 07-05-2023 | 1500 | | | | | | | 0 + company2 | 07-06-2023 | 60 | 60 | 1200 | 1300 | 60 | 3660 | 915.0000000000000000 | 4 + company2 | 07-07-2023 | 1100 | | | | | | | 0 + company2 | 07-08-2023 | 1300 | | | | | | | 0 + company2 | 07-09-2023 | 1200 | | | | | | | 0 + company2 | 07-10-2023 | 1300 | | | | | | | 0 +(20 rows) + +-- using AFTER MATCH SKIP TO NEXT ROW +SELECT company, tdate, price, + first_value(price) OVER w, + last_value(price) OVER w, + max(price) OVER w, + min(price) OVER w, + sum(price) OVER w, + avg(price) OVER w, + count(price) OVER w +FROM stock +WINDOW w AS ( +PARTITION BY company +ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING +AFTER MATCH SKIP TO NEXT ROW +INITIAL +PATTERN (START UP+ DOWN+) +DEFINE +START AS TRUE, +UP AS price > PREV(price), +DOWN AS price < PREV(price) +); + company | tdate | price | first_value | last_value | max | min | sum | avg | count +----------+------------+-------+-------------+------------+------+------+------+-----------------------+------- + company1 | 07-01-2023 | 100 | 100 | 140 | 200 | 100 | 590 | 147.5000000000000000 | 4 + company1 | 07-02-2023 | 200 | | | | | | | 0 + company1 | 07-03-2023 | 150 | | | | | | | 0 + company1 | 07-04-2023 | 140 | 140 | 90 | 150 | 90 | 380 | 126.6666666666666667 | 3 + company1 | 07-05-2023 | 150 | | | | | | | 0 + company1 | 07-06-2023 | 90 | 90 | 120 | 130 | 90 | 450 | 112.5000000000000000 | 4 + company1 | 07-07-2023 | 110 | 110 | 120 | 130 | 110 | 360 | 120.0000000000000000 | 3 + company1 | 07-08-2023 | 130 | | | | | | | 0 + company1 | 07-09-2023 | 120 | | | | | | | 0 + company1 | 07-10-2023 | 130 | | | | | | | 0 + company2 | 07-01-2023 | 50 | 50 | 1400 | 2000 | 50 | 4950 | 1237.5000000000000000 | 4 + company2 | 07-02-2023 | 2000 | | | | | | | 0 + company2 | 07-03-2023 | 1500 | | | | | | | 0 + company2 | 07-04-2023 | 1400 | 1400 | 60 | 1500 | 60 | 2960 | 986.6666666666666667 | 3 + company2 | 07-05-2023 | 1500 | | | | | | | 0 + company2 | 07-06-2023 | 60 | 60 | 1200 | 1300 | 60 | 3660 | 915.0000000000000000 | 4 + company2 | 07-07-2023 | 1100 | 1100 | 1200 | 1300 | 1100 | 3600 | 1200.0000000000000000 | 3 + company2 | 07-08-2023 | 1300 | | | | | | | 0 + company2 | 07-09-2023 | 1200 | | | | | | | 0 + company2 | 07-10-2023 | 1300 | | | | | | | 0 +(20 rows) + +-- JOIN case +CREATE TEMP TABLE t1 (i int, v1 int); +CREATE TEMP TABLE t2 (j int, v2 int); +INSERT INTO t1 VALUES(1,10); +INSERT INTO t1 VALUES(1,11); +INSERT INTO t1 VALUES(1,12); +INSERT INTO t2 VALUES(2,10); +INSERT INTO t2 VALUES(2,11); +INSERT INTO t2 VALUES(2,12); +SELECT * FROM t1, t2 WHERE t1.v1 <= 11 AND t2.v2 <= 11; + i | v1 | j | v2 +---+----+---+---- + 1 | 10 | 2 | 10 + 1 | 10 | 2 | 11 + 1 | 11 | 2 | 10 + 1 | 11 | 2 | 11 +(4 rows) + +SELECT *, count(*) OVER w FROM t1, t2 +WINDOW w AS ( + PARTITION BY t1.i + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A) + DEFINE + A AS v1 <= 11 AND v2 <= 11 +); + i | v1 | j | v2 | count +---+----+---+----+------- + 1 | 10 | 2 | 10 | 1 + 1 | 10 | 2 | 11 | 1 + 1 | 10 | 2 | 12 | 0 + 1 | 11 | 2 | 10 | 1 + 1 | 11 | 2 | 11 | 1 + 1 | 11 | 2 | 12 | 0 + 1 | 12 | 2 | 10 | 0 + 1 | 12 | 2 | 11 | 0 + 1 | 12 | 2 | 12 | 0 +(9 rows) + +-- WITH case +WITH wstock AS ( + SELECT * FROM stock WHERE tdate < '2023-07-08' +) +SELECT tdate, price, +first_value(tdate) OVER w, +count(*) OVER w + FROM wstock + WINDOW w AS ( + PARTITION BY company + ORDER BY tdate + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + tdate | price | first_value | count +------------+-------+-------------+------- + 07-01-2023 | 100 | 07-01-2023 | 4 + 07-02-2023 | 200 | | 0 + 07-03-2023 | 150 | | 0 + 07-04-2023 | 140 | | 0 + 07-05-2023 | 150 | | 0 + 07-06-2023 | 90 | | 0 + 07-07-2023 | 110 | | 0 + 07-01-2023 | 50 | 07-01-2023 | 4 + 07-02-2023 | 2000 | | 0 + 07-03-2023 | 1500 | | 0 + 07-04-2023 | 1400 | | 0 + 07-05-2023 | 1500 | | 0 + 07-06-2023 | 60 | | 0 + 07-07-2023 | 1100 | | 0 +(14 rows) + +-- PREV has multiple column reference +CREATE TEMP TABLE rpr1 (id INTEGER, i SERIAL, j INTEGER); +INSERT INTO rpr1(id, j) SELECT 1, g*2 FROM generate_series(1, 10) AS g; +SELECT id, i, j, count(*) OVER w + FROM rpr1 + WINDOW w AS ( + PARTITION BY id + ORDER BY i + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN (START COND+) + DEFINE + START AS TRUE, + COND AS PREV(i + j + 1) < 10 +); + id | i | j | count +----+----+----+------- + 1 | 1 | 2 | 3 + 1 | 2 | 4 | 0 + 1 | 3 | 6 | 0 + 1 | 4 | 8 | 0 + 1 | 5 | 10 | 0 + 1 | 6 | 12 | 0 + 1 | 7 | 14 | 0 + 1 | 8 | 16 | 0 + 1 | 9 | 18 | 0 + 1 | 10 | 20 | 0 +(10 rows) + +-- Smoke test for larger partitions. +WITH s AS ( + SELECT v, count(*) OVER w AS c + FROM (SELECT generate_series(1, 5000) v) + WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ( r+ ) + DEFINE r AS TRUE + ) +) +-- Should be exactly one long match across all rows. +SELECT * FROM s WHERE c > 0; + v | c +---+------ + 1 | 5000 +(1 row) + +WITH s AS ( + SELECT v, count(*) OVER w AS c + FROM (SELECT generate_series(1, 5000) v) + WINDOW w AS ( + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + PATTERN ( r ) + DEFINE r AS TRUE + ) +) +-- Every row should be its own match. +SELECT count(*) FROM s WHERE c > 0; + count +------- + 5000 +(1 row) + +-- View and pg_get_viewdef tests. +CREATE TEMP VIEW v_window AS +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, + nth_value(tdate, 2) OVER w AS nth_second + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START UP+ DOWN+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); +SELECT * FROM v_window; + company | tdate | price | first_value | last_value | nth_second +----------+------------+-------+-------------+------------+------------ + company1 | 07-01-2023 | 100 | 100 | 140 | 07-02-2023 + company1 | 07-02-2023 | 200 | | | + company1 | 07-03-2023 | 150 | | | + company1 | 07-04-2023 | 140 | | | + company1 | 07-05-2023 | 150 | | | + company1 | 07-06-2023 | 90 | 90 | 120 | 07-07-2023 + company1 | 07-07-2023 | 110 | | | + company1 | 07-08-2023 | 130 | | | + company1 | 07-09-2023 | 120 | | | + company1 | 07-10-2023 | 130 | | | + company2 | 07-01-2023 | 50 | 50 | 1400 | 07-02-2023 + company2 | 07-02-2023 | 2000 | | | + company2 | 07-03-2023 | 1500 | | | + company2 | 07-04-2023 | 1400 | | | + company2 | 07-05-2023 | 1500 | | | + company2 | 07-06-2023 | 60 | 60 | 1200 | 07-07-2023 + company2 | 07-07-2023 | 1100 | | | + company2 | 07-08-2023 | 1300 | | | + company2 | 07-09-2023 | 1200 | | | + company2 | 07-10-2023 | 1300 | | | +(20 rows) + +SELECT pg_get_viewdef('v_window'); + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + first_value(price) OVER w AS first_value, + + last_value(price) OVER w AS last_value, + + nth_value(tdate, 2) OVER w AS nth_second + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (start up+ down+) + + DEFINE + + start AS true, + + up AS (price > prev(price)), + + down AS (price < prev(price)) ); +(1 row) + +-- +-- Pattern optimization tests +-- VIEW shows original pattern, EXPLAIN shows optimized pattern +-- +-- Test: duplicate alternatives removal (A | B | A)+ -> (A | B)+ +CREATE TEMP VIEW v_opt_dup AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A | B | A)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_dup'); -- original: ((a | b | a)+) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a | b | a)+) + + DEFINE + + a AS (price > 100), + + b AS (price <= 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_dup; -- optimized: ((a | b)+) + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_dup + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: ((a | b))+ + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: duplicate group removal ((A | B)+ | (A | B)+) -> (A | B)+ +CREATE TEMP VIEW v_opt_dup_group AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A | B)+ | (A | B)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_dup_group'); -- original: ((a | b)+ | (a | b)+) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a | b)+ | (a | b)+) + + DEFINE + + a AS (price > 100), + + b AS (price <= 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_dup_group; -- optimized: ((a | b)+) + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_dup_group + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: ((a | b))+ + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: consecutive vars merge (A A A) -> A{3} +CREATE TEMP VIEW v_opt_merge AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A A A) + DEFINE + A AS price >= 140 AND price <= 150 +); +SELECT pg_get_viewdef('v_opt_merge'); -- original: (a a a) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a a a) + + DEFINE + + a AS ((price >= 140) AND (price <= 150)) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge; -- optimized: a{3} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{3} + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: quantified vars merge (A A+ A) -> A{3,} +CREATE TEMP VIEW v_opt_merge_quant AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A A+ A) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_merge_quant'); -- original: (a a+ a) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a a+ a) + + DEFINE + + a AS (price > 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_quant; -- optimized: a{3,} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge_quant + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{3,} + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: merge two unbounded (A+ A+) -> A{2,} +CREATE TEMP VIEW v_opt_merge_unbounded AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A+ A+) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_merge_unbounded'); -- original: (a+ a+) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a+ a+) + + DEFINE + + a AS (price > 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_unbounded; -- optimized: a{2,} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge_unbounded + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{2,} + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: merge with zero-min (A* A+) -> A+ +CREATE TEMP VIEW v_opt_merge_star AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A* A+) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_merge_star'); -- original: (a* a+) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a* a+) + + DEFINE + + a AS (price > 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_star; -- optimized: a+ + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge_star + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a+ + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: complex merge (A A{2} A+ A{3}) -> A{7,} +CREATE TEMP VIEW v_opt_merge_complex AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A A{2} A+ A{3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_merge_complex'); -- original: (a a{2} a+ a{3}) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a a{2} a+ a{3}) + + DEFINE + + a AS (price > 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_complex; -- optimized: a{7,} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge_complex + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{7,} + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: group merge ((A B) (A B)+) -> (A B){2,} +CREATE TEMP VIEW v_opt_merge_group AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A B) (A B)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_merge_group'); -- original: ((a b) (a b)+) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a b) (a b)+) + + DEFINE + + a AS (price > 100), + + b AS (price <= 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group; -- expected: (a b){2,} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge_group + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b){2,} + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: group merge A B (A B)+ -> (A B){2,} +CREATE TEMP VIEW v_opt_merge_group2 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A B (A B)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_merge_group2'); -- original: (a b (a b)+) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a b (a b)+) + + DEFINE + + a AS (price > 100), + + b AS (price <= 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group2; -- expected: (a b){2,} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge_group2 + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b){2,} + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: group merge (A B) (A B)+ (A B) -> (A B){3,} +CREATE TEMP VIEW v_opt_merge_group3 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A B) (A B)+ (A B)) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_merge_group3'); -- original: ((a b) (a b)+ (a b)) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a b) (a b)+ (a b)) + + DEFINE + + a AS (price > 100), + + b AS (price <= 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group3; -- expected: (a b){3,} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge_group3 + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b){3,} + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: group merge A B A B (A B)+ A B A B -> (A B){5,} +CREATE TEMP VIEW v_opt_merge_group4 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A B A B (A B)+ A B A B) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_merge_group4'); -- original: (a b a b (a b)+ a b a b) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a b a b (a b)+ a b a b) + + DEFINE + + a AS (price > 100), + + b AS (price <= 100) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group4; -- expected: (a b){5,} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge_group4 + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: (a b){5,} + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: group merge C A B (A B)+ A B C -> C (A B){3,} C +CREATE TEMP VIEW v_opt_merge_group5 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (C A B (A B)+ A B C) + DEFINE + A AS price > 100, + B AS price <= 100, + C AS price > 200 +); +SELECT pg_get_viewdef('v_opt_merge_group5'); -- original: (c a b (a b)+ a b c) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (c a b (a b)+ a b c) + + DEFINE + + a AS (price > 100), + + b AS (price <= 100), + + c AS (price > 200) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group5; -- expected: c (a b){3,} c + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_merge_group5 + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: c (a b){3,} c + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test {n} quantifier display +CREATE TEMP VIEW v_quantifier_n AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_quantifier_n'); + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a{3}) + + DEFINE + + a AS (price > 100) ); +(1 row) + +-- Test {n,} quantifier display +CREATE TEMP VIEW v_quantifier_n_plus AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_quantifier_n_plus'); + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a{2,}) + + DEFINE + + a AS (price > 100) ); +(1 row) + +-- Test: flatten nested SEQ (A (B C)) -> A B C +CREATE TEMP VIEW v_opt_flatten_seq AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A (B C)) + DEFINE + A AS price > 100, + B AS price > 150, + C AS price < 150 +); +SELECT pg_get_viewdef('v_opt_flatten_seq'); -- original: (a (b c)) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (a (b c)) + + DEFINE + + a AS (price > 100), + + b AS (price > 150), + + c AS (price < 150) ); +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_flatten_seq; -- optimized: a b c + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_flatten_seq + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a b c + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: flatten nested ALT (A | (B | C)) -> (A | B | C) +CREATE TEMP VIEW v_opt_flatten_alt AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A | (B | C))+) + DEFINE + A AS price > 200, + B AS price > 100, + C AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_flatten_alt'); -- original: ((a | (b | c))+) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a | (b | c))+) + + DEFINE + + a AS (price > 200), + + b AS (price > 100), + + c AS (price <= 100) ); +(1 row) -SELECT *, count(*) OVER w FROM t1, t2 -WINDOW w AS ( - PARTITION BY t1.i +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_flatten_alt; -- optimized: ((a | b | c))+ + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_flatten_alt + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: ((a | b | c))+ + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: unwrap GROUP{1,1} ((A)) -> A +CREATE TEMP VIEW v_opt_unwrap_group AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING INITIAL - PATTERN (A) + PATTERN (((A))) DEFINE - A AS v1 <= 11 AND v2 <= 11 + A AS price > 100 ); - i | v1 | j | v2 | count ----+----+---+----+------- - 1 | 10 | 2 | 10 | 1 - 1 | 10 | 2 | 11 | 1 - 1 | 10 | 2 | 12 | 0 - 1 | 11 | 2 | 10 | 1 - 1 | 11 | 2 | 11 | 1 - 1 | 11 | 2 | 12 | 0 - 1 | 12 | 2 | 10 | 0 - 1 | 12 | 2 | 11 | 0 - 1 | 12 | 2 | 12 | 0 -(9 rows) +SELECT pg_get_viewdef('v_opt_unwrap_group'); -- original: (((a))) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN (((a))) + + DEFINE + + a AS (price > 100) ); +(1 row) --- WITH case -WITH wstock AS ( - SELECT * FROM stock WHERE tdate < '2023-07-08' -) -SELECT tdate, price, -first_value(tdate) OVER w, -count(*) OVER w - FROM wstock +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_unwrap_group; -- optimized: a + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_unwrap_group + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: quantifier multiplication (A{2}){3} -> A{6} +CREATE TEMP VIEW v_opt_quant_mult AS +SELECT company, tdate, price, count(*) OVER w + FROM stock WINDOW w AS ( PARTITION BY company - ORDER BY tdate ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING INITIAL - PATTERN (START UP+ DOWN+) + PATTERN ((A{2}){3}) DEFINE - START AS TRUE, - UP AS price > PREV(price), - DOWN AS price < PREV(price) + A AS price > 100 ); - tdate | price | first_value | count -------------+-------+-------------+------- - 07-01-2023 | 100 | 07-01-2023 | 4 - 07-02-2023 | 200 | | 0 - 07-03-2023 | 150 | | 0 - 07-04-2023 | 140 | | 0 - 07-05-2023 | 150 | | 0 - 07-06-2023 | 90 | | 0 - 07-07-2023 | 110 | | 0 - 07-01-2023 | 50 | 07-01-2023 | 4 - 07-02-2023 | 2000 | | 0 - 07-03-2023 | 1500 | | 0 - 07-04-2023 | 1400 | | 0 - 07-05-2023 | 1500 | | 0 - 07-06-2023 | 60 | | 0 - 07-07-2023 | 1100 | | 0 -(14 rows) +SELECT pg_get_viewdef('v_opt_quant_mult'); -- original: ((a{2}){3}) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a{2}){3}) + + DEFINE + + a AS (price > 100) ); +(1 row) --- PREV has multiple column reference -CREATE TEMP TABLE rpr1 (id INTEGER, i SERIAL, j INTEGER); -INSERT INTO rpr1(id, j) SELECT 1, g*2 FROM generate_series(1, 10) AS g; -SELECT id, i, j, count(*) OVER w - FROM rpr1 +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult; -- optimized: a{6} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_quant_mult + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{6} + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + +-- Test: quantifier multiplication (A{2,4}){3} -> A{6,12} +CREATE TEMP VIEW v_opt_quant_mult_range AS +SELECT company, tdate, price, count(*) OVER w + FROM stock WINDOW w AS ( - PARTITION BY id - ORDER BY i + PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW INITIAL - PATTERN (START COND+) + PATTERN ((A{2,4}){3}) DEFINE - START AS TRUE, - COND AS PREV(i + j + 1) < 10 + A AS price > 100 ); - id | i | j | count -----+----+----+------- - 1 | 1 | 2 | 3 - 1 | 2 | 4 | 0 - 1 | 3 | 6 | 0 - 1 | 4 | 8 | 0 - 1 | 5 | 10 | 0 - 1 | 6 | 12 | 0 - 1 | 7 | 14 | 0 - 1 | 8 | 16 | 0 - 1 | 9 | 18 | 0 - 1 | 10 | 20 | 0 -(10 rows) - --- Smoke test for larger partitions. -WITH s AS ( - SELECT v, count(*) OVER w AS c - FROM (SELECT generate_series(1, 5000) v) - WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - INITIAL - PATTERN ( r+ ) - DEFINE r AS TRUE - ) -) --- Should be exactly one long match across all rows. -SELECT * FROM s WHERE c > 0; - v | c ----+------ - 1 | 5000 +SELECT pg_get_viewdef('v_opt_quant_mult_range'); -- original: ((a{2,4}){3}) + pg_get_viewdef +--------------------------------------------------------------------------------------- + SELECT company, + + tdate, + + price, + + count(*) OVER w AS count + + FROM stock + + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + + AFTER MATCH SKIP PAST LAST ROW + + INITIAL + + PATTERN ((a{2,4}){3}) + + DEFINE + + a AS (price > 100) ); (1 row) -WITH s AS ( - SELECT v, count(*) OVER w AS c - FROM (SELECT generate_series(1, 5000) v) - WINDOW w AS ( - ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING - AFTER MATCH SKIP PAST LAST ROW - INITIAL - PATTERN ( r ) - DEFINE r AS TRUE - ) -) --- Every row should be its own match. -SELECT count(*) FROM s WHERE c > 0; - count -------- - 5000 -(1 row) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult_range; -- optimized: a{6,12} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_quant_mult_range + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{6,12} + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) --- View and pg_get_viewdef tests. -CREATE TEMP VIEW v_window AS -SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w, - nth_value(tdate, 2) OVER w AS nth_second +-- Test: quantifier multiplication (A{2}){3,5} -> A{6,10} +CREATE TEMP VIEW v_opt_quant_mult_range2 AS +SELECT company, tdate, price, count(*) OVER w FROM stock WINDOW w AS ( PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING INITIAL - PATTERN (START UP+ DOWN+) + PATTERN ((A{2}){3,5}) DEFINE - START AS TRUE, - UP AS price > PREV(price), - DOWN AS price < PREV(price) + A AS price > 100 ); -SELECT * FROM v_window; - company | tdate | price | first_value | last_value | nth_second -----------+------------+-------+-------------+------------+------------ - company1 | 07-01-2023 | 100 | 100 | 140 | 07-02-2023 - company1 | 07-02-2023 | 200 | | | - company1 | 07-03-2023 | 150 | | | - company1 | 07-04-2023 | 140 | | | - company1 | 07-05-2023 | 150 | | | - company1 | 07-06-2023 | 90 | 90 | 120 | 07-07-2023 - company1 | 07-07-2023 | 110 | | | - company1 | 07-08-2023 | 130 | | | - company1 | 07-09-2023 | 120 | | | - company1 | 07-10-2023 | 130 | | | - company2 | 07-01-2023 | 50 | 50 | 1400 | 07-02-2023 - company2 | 07-02-2023 | 2000 | | | - company2 | 07-03-2023 | 1500 | | | - company2 | 07-04-2023 | 1400 | | | - company2 | 07-05-2023 | 1500 | | | - company2 | 07-06-2023 | 60 | 60 | 1200 | 07-07-2023 - company2 | 07-07-2023 | 1100 | | | - company2 | 07-08-2023 | 1300 | | | - company2 | 07-09-2023 | 1200 | | | - company2 | 07-10-2023 | 1300 | | | -(20 rows) - -SELECT pg_get_viewdef('v_window'); +SELECT pg_get_viewdef('v_opt_quant_mult_range2'); -- original: ((a{2}){3,5}) pg_get_viewdef --------------------------------------------------------------------------------------- SELECT company, + tdate, + price, + - first_value(price) OVER w AS first_value, + - last_value(price) OVER w AS last_value, + - nth_value(tdate, 2) OVER w AS nth_second + + count(*) OVER w AS count + FROM stock + WINDOW w AS (PARTITION BY company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + INITIAL + - PATTERN (start up+ down+) + + PATTERN ((a{2}){3,5}) + DEFINE + - start AS true, + - up AS (price > prev(price)), + - down AS (price < prev(price)) ); + a AS (price > 100) ); (1 row) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult_range2; -- optimized: a{6,10} + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Subquery Scan on v_opt_quant_mult_range2 + -> WindowAgg + Window: w AS (PARTITION BY stock.company ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + Pattern: a{6,10} + -> Sort + Sort Key: stock.company + -> Seq Scan on stock +(7 rows) + -- -- Error cases -- @@ -1030,7 +2162,7 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER UP AS price > PREV(1), DOWN AS price < PREV(1) ); -ERROR: unsupported quantifier +ERROR: unsupported quantifier "~" LINE 9: PATTERN (START UP~ DOWN+) ^ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w @@ -1047,9 +2179,7 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER UP AS price > PREV(1), DOWN AS price < PREV(1) ); -ERROR: unsupported quantifier -LINE 9: PATTERN (START UP+? DOWN+) - ^ +ERROR: row pattern navigation operation's argument must include at least one column reference -- Number of row pattern definition variable names must not exceed 26 -- Ok SELECT * FROM (SELECT 1 AS x) t @@ -1094,3 +2224,513 @@ PREV(price) c1 | 07-04-2023 | 150 | 0 (4 rows) +-- Overlapping match tests (requires multi-context for correct behavior) +-- Using array flags: 'X' = ANY(flags) for multi-TRUE support +-- Test 1: A B C D E | B C D | C D E F - three overlapping patterns +-- Different end points: B C D (4), A B C D E (5), C D E F (6) +WITH test_overlap1 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']), + (6, ARRAY['F']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap1 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E | B C D | C D E F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {B} | | + 3 | {C} | | + 4 | {D} | | + 5 | {E} | | + 6 | {F} | | +(6 rows) + +WITH test_overlap1 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']), + (6, ARRAY['F']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap1 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D E | B C D | C D E F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {B} | 2 | 4 + 3 | {C} | 3 | 6 + 4 | {D} | | + 5 | {E} | | + 6 | {F} | | +(6 rows) + +-- PAST LAST: only one match +-- TO NEXT ROW with multi-context: three matches +-- Row 1: A B C D E (1-5) +-- Row 2: B C D (2-4) <- ends first! +-- Row 3: C D E F (3-6) <- ends last! +-- Test 2: A B+ C | B+ D - long B sequence with different endings +WITH test_overlap2 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['B']), + (6, ARRAY['C']), + (7, ARRAY['B']), + (8, ARRAY['B']), + (9, ARRAY['B']), + (10, ARRAY['D']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap2 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B+ C | B+ D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 6 + 2 | {B} | | + 3 | {B} | | + 4 | {B} | | + 5 | {B} | | + 6 | {C} | | + 7 | {B} | 7 | 10 + 8 | {B} | | + 9 | {B} | | + 10 | {D} | | +(10 rows) + +-- Current result (correct): +-- Row 1: A B+ C (1-6) +-- Row 7-9: B+ D (7-10, 8-10, 9-10) +-- Note: Row 2-6 cannot match B+ D because Row 6 is C, not D +-- With absorption: 8-10 and 9-10 would be absorbed by 7-10 (earlier context covers later) +-- Test 3: Greedy quantifier with late failure - A B C+ D | A B +-- Pattern expects D after C+, but E comes instead ("betrayal") +WITH test_betrayal AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['C']), + (5, ARRAY['C']), + (6, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_betrayal +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C+ D | A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 2 + 2 | {B} | | + 3 | {C} | | + 4 | {C} | | + 5 | {C} | | + 6 | {E} | | +(6 rows) + +-- A B C+ D fails at Row 6 (E instead of D) +-- Question: Does it fallback to A B (1-2)? +-- Test 4: Lexical Order test - A B C | A B C D E +-- SQL standard: first matching alternative wins +WITH test_lexical AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_lexical +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C | A B C D E) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 3 + 2 | {B} | | + 3 | {C} | | + 4 | {D} | | + 5 | {E} | | +(5 rows) + +-- SQL standard Lexical Order: A B C (1-3) wins (first alternative) +-- Test 4b: Reversed pattern order - A B C D E | A B C +WITH test_lexical2 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_lexical2 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E | A B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 5 + 2 | {B} | | + 3 | {C} | | + 4 | {D} | | + 5 | {E} | | +(5 rows) + +-- SQL standard Lexical Order: A B C D E (1-5) wins (first alternative) +-- Test 5: Multiple TRUE in single row (overlapping pattern variables) +-- Each row matches multiple DEFINE conditions simultaneously +WITH test_multi_true AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), -- A and B both TRUE + (2, ARRAY['B','C']), -- B and C both TRUE + (3, ARRAY['C','D']), -- C and D both TRUE + (4, ARRAY['D','E']), -- D and E both TRUE + (5, ARRAY['E','_']) -- E only + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_multi_true +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,B} | 1 | 5 + 2 | {B,C} | | + 3 | {C,D} | | + 4 | {D,E} | | + 5 | {E,_} | | +(5 rows) + +-- Row 1: A=T, B=T → matches A +-- Row 2: B=T, C=T → matches B +-- Row 3: C=T, D=T → matches C +-- Row 4: D=T, E=T → matches D +-- Row 5: E=T → matches E +-- Result: match 1-5 (A B C D E) +-- Test 6: Diagonal pattern with multi-TRUE (shifted overlap) +WITH test_diagonal AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','_']), + (2, ARRAY['B','A']), + (3, ARRAY['C','B']), + (4, ARRAY['D','C']), + (5, ARRAY['_','D']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_diagonal +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A,_} | 1 | 4 + 2 | {B,A} | 2 | 5 + 3 | {C,B} | | + 4 | {D,C} | | + 5 | {_,D} | | +(5 rows) + +-- Possible matches: +-- Start Row 1: A(1) B(2) C(3) D(4) → 1-4 +-- Start Row 2: A(2) B(3) C(4) D(5) → 2-5 (because Row 2 has A too!) +-- =================================================================== +-- Context Absorption Tests +-- =================================================================== +-- Test absorption 1: Basic A+ pattern - later contexts absorbed by earlier +WITH test_absorb_basic AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_basic +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE A AS 'A' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | | + 3 | {A} | | + 4 | {A} | | + 5 | {B} | | +(5 rows) + +-- Pattern A+ is absorbable (unbounded first element, only one unbounded) +-- Without absorption: 4 matches (1-4, 2-4, 3-4, 4-4) +-- With absorption: Row 1 match (1-4), rows 2-4 absorbed +-- Test absorption 2: A+ B pattern - absorption with fixed suffix +WITH test_absorb_suffix AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_suffix +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | | + 3 | {A} | | + 4 | {B} | | + 5 | {X} | | +(5 rows) + +-- Pattern A+ B is absorbable (A+ unbounded first, B bounded suffix) +-- All potential matches end at same row (row 4 with B) +-- With absorption: Row 1 match (1-4), rows 2-3 absorbed +-- Test absorption 3: Per-branch absorption with ALT (B+ C | B+ D) +WITH test_absorb_alt AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['D']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_alt +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (B+ C | B+ D) + DEFINE + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {B} | 1 | 4 + 2 | {B} | | + 3 | {B} | | + 4 | {D} | | + 5 | {X} | | +(5 rows) + +-- Both branches B+ C and B+ D are absorbable (B+ unbounded first) +-- B+ D branch matches: potential 1-4, 2-4, 3-4 +-- Row 1 (1-4) absorbs Row 2 (2-4) and Row 3 (3-4) - same endpoint +-- Test absorption 4: Non-absorbable pattern (A B+ - unbounded not first) +WITH test_no_absorb AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_no_absorb +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {B} | | + 3 | {B} | | + 4 | {B} | | + 5 | {X} | | +(5 rows) + +-- Pattern A B+ is NOT absorbable (A bounded first, B+ unbounded but not first) +-- Only Row 1 can start match (only row with A), so only one match: 1-4 +-- Test absorption 5: GROUP merge enables absorption +WITH test_absorb_group AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_group +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A B) (A B)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 6 + 2 | {B} | | + 3 | {A} | | + 4 | {B} | | + 5 | {A} | | + 6 | {B} | | + 7 | {X} | | +(7 rows) + +-- Pattern optimized: (A B) (A B)+ -> (A B){2,} +-- Potential matches: 1-6 (3 reps), 3-6 (2 reps), 5-6 needs 2 reps (fail) +-- Row 1 (1-6) absorbs Row 3 (3-6) - same endpoint, top-level unbounded GROUP +-- Test absorption 6: Multiple unbounded - NOT absorbable +WITH test_multi_unbounded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_multi_unbounded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); + id | flags | match_start | match_end +----+-------+-------------+----------- + 1 | {A} | 1 | 4 + 2 | {A} | 2 | 4 + 3 | {B} | | + 4 | {B} | | + 5 | {X} | | +(5 rows) + +-- Pattern A+ B+ has TWO unbounded elements - NOT absorbable +-- Greedy A+ consumes rows 1-2, then B+ matches 3-4 +-- Row 1: 1-4, Row 2: 2-4 (different A+ counts, not absorbed) diff --git a/src/test/regress/sql/rpr.sql b/src/test/regress/sql/rpr.sql index 640e9807f59..fa8aa50fcb9 100644 --- a/src/test/regress/sql/rpr.sql +++ b/src/test/regress/sql/rpr.sql @@ -90,6 +90,91 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER DOWN AS price < PREV(price) ); +-- test using alternation (|) with sequence +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START (UP | DOWN)) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- test using alternation (|) with group quantifier +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START (UP | DOWN)+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- test using nested alternation +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (START ((UP DOWN) | FLAT)+) + DEFINE + START AS TRUE, + UP AS price > PREV(price), + DOWN AS price < PREV(price), + FLAT AS price = PREV(price) +); + +-- test using group with quantifier +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((UP DOWN)+) + DEFINE + UP AS price > PREV(price), + DOWN AS price < PREV(price) +); + +-- test using absolute threshold values (not relative PREV) +-- HIGH: price > 150, LOW: price < 100, MID: neutral range +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (LOW MID* HIGH) + DEFINE + LOW AS price < 100, + MID AS price >= 100 AND price <= 150, + HIGH AS price > 150 +); + +-- test threshold-based pattern with alternation +SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (LOW (MID | HIGH)+) + DEFINE + LOW AS price < 100, + MID AS price >= 100 AND price <= 150, + HIGH AS price > 150 +); + -- basic test with none-greedy pattern SELECT company, tdate, price, count(*) OVER w FROM stock @@ -102,6 +187,42 @@ SELECT company, tdate, price, count(*) OVER w A AS price >= 140 AND price <= 150 ); +-- test using {n} quantifier (A A A should be optimized to A{3}) +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{3}) + DEFINE + A AS price >= 140 AND price <= 150 +); + +-- test using {n,} quantifier (2 or more) +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,}) + DEFINE + A AS price > 100 +); + +-- test using {n,m} quantifier (2 to 4) +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,4}) + DEFINE + A AS price > 100 +); + -- last_value() should remain consistent SELECT company, tdate, price, last_value(price) OVER w FROM stock @@ -405,6 +526,321 @@ SELECT company, tdate, price, first_value(price) OVER w, last_value(price) OVER SELECT * FROM v_window; SELECT pg_get_viewdef('v_window'); +-- +-- Pattern optimization tests +-- VIEW shows original pattern, EXPLAIN shows optimized pattern +-- + +-- Test: duplicate alternatives removal (A | B | A)+ -> (A | B)+ +CREATE TEMP VIEW v_opt_dup AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A | B | A)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_dup'); -- original: ((a | b | a)+) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_dup; -- optimized: ((a | b)+) + +-- Test: duplicate group removal ((A | B)+ | (A | B)+) -> (A | B)+ +CREATE TEMP VIEW v_opt_dup_group AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A | B)+ | (A | B)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_dup_group'); -- original: ((a | b)+ | (a | b)+) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_dup_group; -- optimized: ((a | b)+) + +-- Test: consecutive vars merge (A A A) -> A{3} +CREATE TEMP VIEW v_opt_merge AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A A A) + DEFINE + A AS price >= 140 AND price <= 150 +); +SELECT pg_get_viewdef('v_opt_merge'); -- original: (a a a) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge; -- optimized: a{3} + +-- Test: quantified vars merge (A A+ A) -> A{3,} +CREATE TEMP VIEW v_opt_merge_quant AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A A+ A) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_merge_quant'); -- original: (a a+ a) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_quant; -- optimized: a{3,} + +-- Test: merge two unbounded (A+ A+) -> A{2,} +CREATE TEMP VIEW v_opt_merge_unbounded AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A+ A+) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_merge_unbounded'); -- original: (a+ a+) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_unbounded; -- optimized: a{2,} + +-- Test: merge with zero-min (A* A+) -> A+ +CREATE TEMP VIEW v_opt_merge_star AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A* A+) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_merge_star'); -- original: (a* a+) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_star; -- optimized: a+ + +-- Test: complex merge (A A{2} A+ A{3}) -> A{7,} +CREATE TEMP VIEW v_opt_merge_complex AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A A{2} A+ A{3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_merge_complex'); -- original: (a a{2} a+ a{3}) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_complex; -- optimized: a{7,} + +-- Test: group merge ((A B) (A B)+) -> (A B){2,} +CREATE TEMP VIEW v_opt_merge_group AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A B) (A B)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_merge_group'); -- original: ((a b) (a b)+) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group; -- expected: (a b){2,} + +-- Test: group merge A B (A B)+ -> (A B){2,} +CREATE TEMP VIEW v_opt_merge_group2 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A B (A B)+) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_merge_group2'); -- original: (a b (a b)+) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group2; -- expected: (a b){2,} + +-- Test: group merge (A B) (A B)+ (A B) -> (A B){3,} +CREATE TEMP VIEW v_opt_merge_group3 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A B) (A B)+ (A B)) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_merge_group3'); -- original: ((a b) (a b)+ (a b)) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group3; -- expected: (a b){3,} + +-- Test: group merge A B A B (A B)+ A B A B -> (A B){5,} +CREATE TEMP VIEW v_opt_merge_group4 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A B A B (A B)+ A B A B) + DEFINE + A AS price > 100, + B AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_merge_group4'); -- original: (a b a b (a b)+ a b a b) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group4; -- expected: (a b){5,} + +-- Test: group merge C A B (A B)+ A B C -> C (A B){3,} C +CREATE TEMP VIEW v_opt_merge_group5 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (C A B (A B)+ A B C) + DEFINE + A AS price > 100, + B AS price <= 100, + C AS price > 200 +); +SELECT pg_get_viewdef('v_opt_merge_group5'); -- original: (c a b (a b)+ a b c) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_merge_group5; -- expected: c (a b){3,} c + +-- Test {n} quantifier display +CREATE TEMP VIEW v_quantifier_n AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_quantifier_n'); + +-- Test {n,} quantifier display +CREATE TEMP VIEW v_quantifier_n_plus AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A{2,}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_quantifier_n_plus'); + +-- Test: flatten nested SEQ (A (B C)) -> A B C +CREATE TEMP VIEW v_opt_flatten_seq AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (A (B C)) + DEFINE + A AS price > 100, + B AS price > 150, + C AS price < 150 +); +SELECT pg_get_viewdef('v_opt_flatten_seq'); -- original: (a (b c)) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_flatten_seq; -- optimized: a b c + +-- Test: flatten nested ALT (A | (B | C)) -> (A | B | C) +CREATE TEMP VIEW v_opt_flatten_alt AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A | (B | C))+) + DEFINE + A AS price > 200, + B AS price > 100, + C AS price <= 100 +); +SELECT pg_get_viewdef('v_opt_flatten_alt'); -- original: ((a | (b | c))+) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_flatten_alt; -- optimized: ((a | b | c))+ + +-- Test: unwrap GROUP{1,1} ((A)) -> A +CREATE TEMP VIEW v_opt_unwrap_group AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN (((A))) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_unwrap_group'); -- original: (((a))) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_unwrap_group; -- optimized: a + +-- Test: quantifier multiplication (A{2}){3} -> A{6} +CREATE TEMP VIEW v_opt_quant_mult AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A{2}){3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_quant_mult'); -- original: ((a{2}){3}) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult; -- optimized: a{6} + +-- Test: quantifier multiplication (A{2,4}){3} -> A{6,12} +CREATE TEMP VIEW v_opt_quant_mult_range AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A{2,4}){3}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_quant_mult_range'); -- original: ((a{2,4}){3}) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult_range; -- optimized: a{6,12} + +-- Test: quantifier multiplication (A{2}){3,5} -> A{6,10} +CREATE TEMP VIEW v_opt_quant_mult_range2 AS +SELECT company, tdate, price, count(*) OVER w + FROM stock + WINDOW w AS ( + PARTITION BY company + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + INITIAL + PATTERN ((A{2}){3,5}) + DEFINE + A AS price > 100 +); +SELECT pg_get_viewdef('v_opt_quant_mult_range2'); -- original: ((a{2}){3,5}) +EXPLAIN (COSTS OFF) SELECT * FROM v_opt_quant_mult_range2; -- optimized: a{6,10} + -- -- Error cases -- @@ -565,3 +1001,394 @@ SELECT * FROM (SELECT 1 AS x) t DEFINE START AS TRUE, UP AS price > PREV(price), DOWN AS price < PREV(price) ); + + +-- Overlapping match tests (requires multi-context for correct behavior) +-- Using array flags: 'X' = ANY(flags) for multi-TRUE support + +-- Test 1: A B C D E | B C D | C D E F - three overlapping patterns +-- Different end points: B C D (4), A B C D E (5), C D E F (6) +WITH test_overlap1 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']), + (6, ARRAY['F']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap1 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E | B C D | C D E F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); + +WITH test_overlap1 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']), + (6, ARRAY['F']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap1 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D E | B C D | C D E F) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags), + F AS 'F' = ANY(flags) +); +-- PAST LAST: only one match +-- TO NEXT ROW with multi-context: three matches +-- Row 1: A B C D E (1-5) +-- Row 2: B C D (2-4) <- ends first! +-- Row 3: C D E F (3-6) <- ends last! + +-- Test 2: A B+ C | B+ D - long B sequence with different endings +WITH test_overlap2 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['B']), + (6, ARRAY['C']), + (7, ARRAY['B']), + (8, ARRAY['B']), + (9, ARRAY['B']), + (10, ARRAY['D']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_overlap2 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B+ C | B+ D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); +-- Current result (correct): +-- Row 1: A B+ C (1-6) +-- Row 7-9: B+ D (7-10, 8-10, 9-10) +-- Note: Row 2-6 cannot match B+ D because Row 6 is C, not D +-- With absorption: 8-10 and 9-10 would be absorbed by 7-10 (earlier context covers later) + +-- Test 3: Greedy quantifier with late failure - A B C+ D | A B +-- Pattern expects D after C+, but E comes instead ("betrayal") +WITH test_betrayal AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['C']), + (5, ARRAY['C']), + (6, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_betrayal +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C+ D | A B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); +-- A B C+ D fails at Row 6 (E instead of D) +-- Question: Does it fallback to A B (1-2)? + +-- Test 4: Lexical Order test - A B C | A B C D E +-- SQL standard: first matching alternative wins +WITH test_lexical AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_lexical +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C | A B C D E) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); +-- SQL standard Lexical Order: A B C (1-3) wins (first alternative) + +-- Test 4b: Reversed pattern order - A B C D E | A B C +WITH test_lexical2 AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['C']), + (4, ARRAY['D']), + (5, ARRAY['E']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_lexical2 +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E | A B C) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); +-- SQL standard Lexical Order: A B C D E (1-5) wins (first alternative) + +-- Test 5: Multiple TRUE in single row (overlapping pattern variables) +-- Each row matches multiple DEFINE conditions simultaneously +WITH test_multi_true AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','B']), -- A and B both TRUE + (2, ARRAY['B','C']), -- B and C both TRUE + (3, ARRAY['C','D']), -- C and D both TRUE + (4, ARRAY['D','E']), -- D and E both TRUE + (5, ARRAY['E','_']) -- E only + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_multi_true +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP PAST LAST ROW + PATTERN (A B C D E) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags), + E AS 'E' = ANY(flags) +); +-- Row 1: A=T, B=T → matches A +-- Row 2: B=T, C=T → matches B +-- Row 3: C=T, D=T → matches C +-- Row 4: D=T, E=T → matches D +-- Row 5: E=T → matches E +-- Result: match 1-5 (A B C D E) + +-- Test 6: Diagonal pattern with multi-TRUE (shifted overlap) +WITH test_diagonal AS ( + SELECT * FROM (VALUES + (1, ARRAY['A','_']), + (2, ARRAY['B','A']), + (3, ARRAY['C','B']), + (4, ARRAY['D','C']), + (5, ARRAY['_','D']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_diagonal +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B C D) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); +-- Possible matches: +-- Start Row 1: A(1) B(2) C(3) D(4) → 1-4 +-- Start Row 2: A(2) B(3) C(4) D(5) → 2-5 (because Row 2 has A too!) + +-- =================================================================== +-- Context Absorption Tests +-- =================================================================== + +-- Test absorption 1: Basic A+ pattern - later contexts absorbed by earlier +WITH test_absorb_basic AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['A']), + (5, ARRAY['B']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_basic +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+) + DEFINE A AS 'A' = ANY(flags) +); +-- Pattern A+ is absorbable (unbounded first element, only one unbounded) +-- Without absorption: 4 matches (1-4, 2-4, 3-4, 4-4) +-- With absorption: Row 1 match (1-4), rows 2-4 absorbed + +-- Test absorption 2: A+ B pattern - absorption with fixed suffix +WITH test_absorb_suffix AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_suffix +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); +-- Pattern A+ B is absorbable (A+ unbounded first, B bounded suffix) +-- All potential matches end at same row (row 4 with B) +-- With absorption: Row 1 match (1-4), rows 2-3 absorbed + +-- Test absorption 3: Per-branch absorption with ALT (B+ C | B+ D) +WITH test_absorb_alt AS ( + SELECT * FROM (VALUES + (1, ARRAY['B']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['D']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_alt +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (B+ C | B+ D) + DEFINE + B AS 'B' = ANY(flags), + C AS 'C' = ANY(flags), + D AS 'D' = ANY(flags) +); +-- Both branches B+ C and B+ D are absorbable (B+ unbounded first) +-- B+ D branch matches: potential 1-4, 2-4, 3-4 +-- Row 1 (1-4) absorbs Row 2 (2-4) and Row 3 (3-4) - same endpoint + +-- Test absorption 4: Non-absorbable pattern (A B+ - unbounded not first) +WITH test_no_absorb AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_no_absorb +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); +-- Pattern A B+ is NOT absorbable (A bounded first, B+ unbounded but not first) +-- Only Row 1 can start match (only row with A), so only one match: 1-4 + +-- Test absorption 5: GROUP merge enables absorption +WITH test_absorb_group AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['B']), + (3, ARRAY['A']), + (4, ARRAY['B']), + (5, ARRAY['A']), + (6, ARRAY['B']), + (7, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_absorb_group +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN ((A B) (A B)+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); +-- Pattern optimized: (A B) (A B)+ -> (A B){2,} +-- Potential matches: 1-6 (3 reps), 3-6 (2 reps), 5-6 needs 2 reps (fail) +-- Row 1 (1-6) absorbs Row 3 (3-6) - same endpoint, top-level unbounded GROUP + +-- Test absorption 6: Multiple unbounded - NOT absorbable +WITH test_multi_unbounded AS ( + SELECT * FROM (VALUES + (1, ARRAY['A']), + (2, ARRAY['A']), + (3, ARRAY['B']), + (4, ARRAY['B']), + (5, ARRAY['X']) + ) AS t(id, flags) +) +SELECT id, flags, first_value(id) OVER w AS match_start, last_value(id) OVER w AS match_end +FROM test_multi_unbounded +WINDOW w AS ( + ORDER BY id + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + AFTER MATCH SKIP TO NEXT ROW + PATTERN (A+ B+) + DEFINE + A AS 'A' = ANY(flags), + B AS 'B' = ANY(flags) +); +-- Pattern A+ B+ has TWO unbounded elements - NOT absorbable +-- Greedy A+ consumes rows 1-2, then B+ matches 3-4 +-- Row 1: 1-4, Row 2: 2-4 (different A+ counts, not absorbed) diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index d409561f602..1d24db449ad 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2440,6 +2440,13 @@ RI_ConstraintInfo RI_QueryHashEntry RI_QueryKey RPCommonSyntax +RPRDepth +RPRElemFlags +RPRElemIdx +RPRPattern +RPRPatternElement +RPRQuantity +RPRVarId RPSkipTo RTEKind RTEPermissionInfo -- 2.50.1 (Apple Git-155)