From fb3cbb6f99f0fe7b05027759454d7e0013225929 Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Fri, 20 Oct 2023 16:11:14 -0700 Subject: [PATCH 2/2] squash! Row pattern recognition patch (executor). - Extract pattern matching logic into match_pattern(). - Replace the str_set/initials implementation with a recursive backtracker. - Rework match_pattern in preparation for * quantifier: separate success from number of matched rows. - Handle `*` quantifier - Simplify the evaluate_pattern() signature. - Remove defineInitial and related code. --- src/backend/executor/nodeWindowAgg.c | 871 +++--------------------- src/backend/optimizer/plan/createplan.c | 4 - src/backend/parser/parse_clause.c | 31 - src/include/nodes/execnodes.h | 1 - src/include/nodes/parsenodes.h | 2 - src/include/nodes/plannodes.h | 3 - 6 files changed, 91 insertions(+), 821 deletions(-) diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index 2e1baef7ea..30abe60159 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -161,40 +161,6 @@ typedef struct WindowStatePerAggData bool restart; /* need to restart this agg in this cycle? */ } WindowStatePerAggData; -/* - * Set of StringInfo. Used in RPR. - */ -typedef struct StringSet { - StringInfo *str_set; - Size set_size; /* current array allocation size in number of items */ - int set_index; /* current used size */ -} StringSet; - -/* - * Allowed subsequent PATTERN variables positions. - * Used in RPR. - * - * pos represents the pattern variable defined order in DEFINE caluase. For - * example. "DEFINE START..., UP..., DOWN ..." and "PATTERN START UP DOWN UP" - * will create: - * VariablePos[0].pos[0] = 0; START - * VariablePos[1].pos[0] = 1; UP - * VariablePos[1].pos[1] = 3; UP - * VariablePos[2].pos[0] = 2; DOWN - * - * Note that UP has two pos because UP appears in PATTERN twice. - * - * By using this strucrture, we can know which pattern variable can be followed - * by which pattern variable(s). For example, START can be followed by UP and - * DOWN since START's pos is 0, and UP's pos is 1 or 3, DOWN's pos is 2. - * DOWN can be followed by UP since UP's pos is either 1 or 3. - * - */ -#define NUM_ALPHABETS 26 /* we allow [a-z] variable initials */ -typedef struct VariablePos { - int pos[NUM_ALPHABETS]; /* postion(s) in PATTERN */ -} VariablePos; - static void initialize_windowaggregate(WindowAggState *winstate, WindowStatePerFunc perfuncstate, WindowStatePerAgg peraggstate); @@ -249,27 +215,11 @@ static void register_reduced_frame_map(WindowAggState *winstate, int64 pos, int static void clear_reduced_frame_map(WindowAggState *winstate); static void update_reduced_frame(WindowObject winobj, int64 pos); -static int64 evaluate_pattern(WindowObject winobj, int64 current_pos, - char *vname, StringInfo encoded_str, bool *result); +static bool evaluate_pattern(WindowObject winobj, int64 current_pos, + char *vname); static bool get_slots(WindowObject winobj, int64 current_pos); -//static int search_str_set(char *pattern, StringSet *str_set, int *initial_orders); -static int search_str_set(char *pattern, StringSet *str_set, VariablePos *variable_pos); -static char pattern_initial(WindowAggState *winstate, char *vname); -static int do_pattern_match(char *pattern, char *encoded_str); - -static StringSet *string_set_init(void); -static void string_set_add(StringSet *string_set, StringInfo str); -static StringInfo string_set_get(StringSet *string_set, int index); -static int string_set_get_size(StringSet *string_set); -static void string_set_discard(StringSet *string_set); -static VariablePos *variable_pos_init(void); -static void variable_pos_register(VariablePos *variable_pos, char initial, int pos); -static bool variable_pos_compare(VariablePos *variable_pos, char initial1, char initial2); -static int variable_pos_fetch(VariablePos *variable_pos, char initial, int index); -static void variable_pos_discard(VariablePos *variable_pos); - /* * initialize_windowaggregate * parallel to initialize_aggregates in nodeAgg.c @@ -2839,7 +2789,6 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags) winstate->patternRegexpList = node->patternRegexp; /* Set up row pattern recognition DEFINE clause */ - winstate->defineInitial = node->defineInitial; winstate->defineVariableList = NIL; winstate->defineClauseList = NIL; if (node->defineClause != NIL) @@ -4063,145 +4012,75 @@ void register_reduced_frame_map(WindowAggState *winstate, int64 pos, int val) } /* - * update_reduced_frame - * Update reduced frame info. + * Match the pattern variables in the WindowObject against the rows at a given + * position. Returns true if the pattern matches successfully, and stores the + * number of matched rows. (*rows may be zero on success; consider for example + * the pattern `A*`.) */ -static -void update_reduced_frame(WindowObject winobj, int64 pos) +static bool +match_pattern(WindowObject winobj, int64 pos, int pattern_index, int *rows) { WindowAggState *winstate = winobj->winstate; - ListCell *lc1, *lc2; - bool expression_result; - int num_matched_rows; - int64 original_pos; - bool anymatch; - StringInfo encoded_str; - StringInfo pattern_str = makeStringInfo(); - StringSet *str_set; - int str_set_index = 0; - int initial_index; - VariablePos *variable_pos; + int submatch_rows = 0; - /* - * Set of pattern variables evaluated to true. - * Each character corresponds to pattern variable. - * Example: - * str_set[0] = "AB"; - * str_set[1] = "AC"; - * In this case at row 0 A and B are true, and A and C are true in row 1. - */ + char *vname = strVal(list_nth(winstate->patternVariableList, pattern_index)); + char *quantifier = strVal(list_nth(winstate->patternRegexpList, pattern_index)); - /* initialize pattern variables set */ - str_set = string_set_init(); + *rows = 0; - /* save original pos */ - original_pos = pos; - - /* - * Loop over until none of pattern matches or encounters end of frame. - */ - for (;;) + /* Try to match the current row against the pattern variable. */ + if (!evaluate_pattern(winobj, pos, vname)) { - int64 result_pos = -1; - - /* - * Loop over each PATTERN variable. - */ - anymatch = false; - encoded_str = makeStringInfo(); - - forboth(lc1, winstate->patternVariableList, lc2, winstate->patternRegexpList) - { - char *vname = strVal(lfirst(lc1)); - char *quantifier = strVal(lfirst(lc2)); - - elog(DEBUG1, "pos: " INT64_FORMAT " pattern vname: %s quantifier: %s", pos, vname, quantifier); - - expression_result = false; - - /* evaluate row pattern against current row */ - result_pos = evaluate_pattern(winobj, pos, vname, encoded_str, &expression_result); - if (expression_result) - { - elog(DEBUG1, "expression result is true"); - anymatch = true; - } - - /* - * If out of frame, we are done. - */ - if (result_pos < 0) - break; - } - - if (!anymatch) - { - /* none of patterns matched. */ - break; - } - - string_set_add(str_set, encoded_str); - - elog(DEBUG1, "pos: " INT64_FORMAT " str_set_index: %d encoded_str: %s", pos, str_set_index, encoded_str->data); - - /* move to next row */ + /* A failed match is only okay if we have the proper quantifier. */ + if (quantifier[0] != '*') + return false; + } + else + { + /* Matched. Advance to the next row. */ + (*rows)++; pos++; - if (result_pos < 0) + if (quantifier[0] == '+' || quantifier[0] == '*') { - /* out of frame */ - break; + /* Greedy. Try to rematch from the current variable first. */ + if (match_pattern(winobj, pos, pattern_index, &submatch_rows)) + goto success; } } - if (string_set_get_size(str_set) == 0) + /* If there's more to the pattern, try to match it now. */ + if (pattern_index + 1 < list_length(winstate->patternVariableList)) { - /* no match found in the first row */ - register_reduced_frame_map(winstate, original_pos, RF_UNMATCHED); - return; - } - - elog(DEBUG2, "pos: " INT64_FORMAT " encoded_str: %s", pos, encoded_str->data); - - /* build regular expression */ - pattern_str = makeStringInfo(); - appendStringInfoChar(pattern_str, '^'); - initial_index = 0; - - variable_pos = variable_pos_init(); - - forboth (lc1, winstate->patternVariableList, lc2, winstate->patternRegexpList) - { - char *vname = strVal(lfirst(lc1)); - char *quantifier = strVal(lfirst(lc2)); - char initial; - - initial = pattern_initial(winstate, vname); - Assert(initial != 0); - appendStringInfoChar(pattern_str, initial); - if (quantifier[0]) - appendStringInfoChar(pattern_str, quantifier[0]); + if (match_pattern(winobj, pos, pattern_index + 1, &submatch_rows)) + goto success; /* - * Register the initial at initial_index. If the initial appears more - * than once, all of it's initial_index will be recorded. This could - * happen if a pattern variable appears in the PATTERN clause more - * than once like "UP DOWN UP" "UP UP UP". + * We're not to the end of the pattern, and there's no way to advance + * the machine, so fail. + * + * TODO: reluctant qualifiers add another possible transition here */ - variable_pos_register(variable_pos, initial, initial_index); - - initial_index++; + *rows = 0; + return false; } - elog(DEBUG2, "pos: " INT64_FORMAT " pattern: %s", pos, pattern_str->data); +success: + *rows += submatch_rows; + return true; +} - /* look for matching pattern variable sequence */ - elog(DEBUG1, "search_str_set started"); - num_matched_rows = search_str_set(pattern_str->data, str_set, variable_pos); - elog(DEBUG1, "search_str_set returns: %d", num_matched_rows); +/* + * update_reduced_frame + * Update reduced frame info. + */ +static +void update_reduced_frame(WindowObject winobj, int64 pos) +{ + WindowAggState *winstate = winobj->winstate; + int num_matched_rows; - variable_pos_discard(variable_pos); - string_set_discard(str_set); + match_pattern(winobj, pos, 0, &num_matched_rows); /* * We are at the first row in the reduced frame. Save the number of @@ -4210,15 +4089,15 @@ void update_reduced_frame(WindowObject winobj, int64 pos) if (num_matched_rows <= 0) { /* no match */ - register_reduced_frame_map(winstate, original_pos, RF_UNMATCHED); + register_reduced_frame_map(winstate, pos, RF_UNMATCHED); } else { int64 i; - register_reduced_frame_map(winstate, original_pos, RF_FRAME_HEAD); + register_reduced_frame_map(winstate, pos, RF_FRAME_HEAD); - for (i = original_pos + 1; i < original_pos + num_matched_rows; i++) + for (i = pos + 1; i < pos + num_matched_rows; i++) { register_reduced_frame_map(winstate, i, RF_SKIPPED); } @@ -4228,436 +4107,76 @@ void update_reduced_frame(WindowObject winobj, int64 pos) } /* - * Perform pattern matching using pattern against str_set. pattern is a - * regular expression derived from PATTERN clause. Note that the regular - * expression string is prefixed by '^' and followed by initials represented - * in a same way as str_set. str_set is a set of StringInfo. Each StringInfo - * has a string comprising initials of pattern variable strings being true in - * a row. The initials are one of [a-y], parallel to the order of variable - * names in DEFINE clause. For example, if DEFINE has variables START, UP and - * DOWN, PATTERN HAS START, UP and DOWN, then the initials in PATTERN will be - * 'a', 'b' and 'c'. - * - * variable_pos is an array representing the order of pattern variable string - * initials in PATTERN clause. For example initial 'a' potion is in - * variable_pos[0].pos[0] = 0. Note that if the pattern is "START UP DOWN UP" - * (UP appears twice), then "UP" (initial is 'b') has two position 1 and - * 3. Thus variable_pos for b is variable_pos[1].pos[0] = 1 and - * variable_pos[1].pos[1] = 3. - * - * Returns the longest number of the matching rows. + * Evaluate expression associated with PATTERN variable vname for the given row + * position, and return the result. */ -static -int search_str_set(char *pattern, StringSet *str_set, VariablePos *variable_pos) -{ -#define MAX_CANDIDATE_NUM 10000 /* max pattern match candidate size */ -#define FREEZED_CHAR 'Z' /* a pattern is freezed if it ends with the char */ -#define DISCARD_CHAR 'z' /* a pattern is not need to keep */ - - int set_size; /* number of rows in the set */ - int resultlen; - int index; - StringSet *old_str_set, *new_str_set; - int new_str_size; - int len; - - set_size = string_set_get_size(str_set); - new_str_set = string_set_init(); - len = 0; - resultlen = 0; - - /* - * Generate all possible pattern variable name initials as a set of - * StringInfo named "new_str_set". For example, if we have two rows - * having "ab" (row 0) and "ac" (row 1) in the input str_set, new_str_set - * will have set of StringInfo "aa", "ac", "ba" and "bc" in the end. - */ - elog(DEBUG1, "pattern: %s set_size: %d", pattern, set_size); - for (index = 0; index < set_size; index++) - { - StringInfo str; /* search target row */ - char *p; - int old_set_size; - int i; - - elog(DEBUG1, "index: %d", index); - - if (index == 0) - { - /* copy variables in row 0 */ - str = string_set_get(str_set, index); - p = str->data; - - /* - * Loop over each new pattern variable char. - */ - while (*p) - { - StringInfo new = makeStringInfo(); - - /* add pattern variable char */ - appendStringInfoChar(new, *p); - /* add new one to string set */ - string_set_add(new_str_set, new); - elog(DEBUG1, "old_str: NULL new_str: %s", new->data); - p++; /* next pattern variable */ - } - } - else /* index != 0 */ - { - old_str_set = new_str_set; - new_str_set = string_set_init(); - str = string_set_get(str_set, index); - old_set_size = string_set_get_size(old_str_set); - - /* - * Loop over each rows in the previous result set. - */ - for (i = 0; i < old_set_size ; i++) - { - StringInfo new; - char last_old_char; - int old_str_len; - StringInfo old = string_set_get(old_str_set, i); - - p = old->data; - old_str_len = strlen(p); - if (old_str_len > 0) - last_old_char = p[old_str_len - 1]; - else - last_old_char = '\0'; - - /* Is this old set freezed? */ - if (last_old_char == FREEZED_CHAR) - { - /* if shorter match. we can discard it */ - if ((old_str_len - 1) < resultlen) - { - elog(DEBUG1, "discard this old set because shorter match: %s", old->data); - continue; - } - - elog(DEBUG1, "keep this old set: %s", old->data); - new = makeStringInfo(); - appendStringInfoString(new, old->data); - string_set_add(new_str_set, new); - continue; - } - /* Can this old set be discarded? */ - else if (last_old_char == DISCARD_CHAR) - { - elog(DEBUG1, "discard this old set: %s", old->data); - continue; - } - - elog(DEBUG1, "str->data: %s", str->data); - - /* - * loop over each pattern variable initial char in the input - * set. - */ - for (p = str->data; *p; p++) - { - /* - * Optimization. Check if the row's pattern variable - * initial character position is greater than or equal to - * the old set's last pattern variable initial character - * position. For example, if the old set's last pattern - * variable initials are "ab", then the new pattern - * variable initial can be "b" or "c" but can not be "a", - * if the initials in PATTERN is something like "a b c" or - * "a b+ c+" etc. This optimization is possible when we - * only allow "+" quantifier. - */ - if (variable_pos_compare(variable_pos, last_old_char, *p)) - { - /* copy source string */ - new = makeStringInfo(); - appendStringInfoString(new, old->data); - /* add pattern variable char */ - appendStringInfoChar(new, *p); - elog(DEBUG1, "old_str: %s new_str: %s", old->data, new->data); - - /* - * Adhoc optimization. If the first letter in the - * input string is the first and second position one - * and there's no associated quatifier '+', then we - * can dicard the input because there's no chace to - * expand the string further. - * - * For example, pattern "abc" cannot match "aa". - */ - elog(DEBUG1, "pattern[1]:%c pattern[2]:%c new[0]:%c new[1]:%c", - pattern[1], pattern[2], new->data[0], new->data[1]); - if (pattern[1] == new->data[0] && - pattern[1] == new->data[1] && - pattern[2] != '+' && - pattern[1] != pattern[2]) - { - elog(DEBUG1, "discard this new data: %s", - new->data); - pfree(new->data); - pfree(new); - continue; - } - - /* add new one to string set */ - string_set_add(new_str_set, new); - } - else - { - /* - * We are freezing this pattern string. Since there's - * no chance to expand the string further, we perform - * pattern matching against the string. If it does not - * match, we can discard it. - */ - len = do_pattern_match(pattern, old->data); - - if (len <= 0) - { - /* no match. we can discard it */ - continue; - } - - else if (len <= resultlen) - { - /* shorter match. we can discard it */ - continue; - } - else - { - /* match length is the longest so far */ - - int new_index; - - /* remember the longest match */ - resultlen = len; - - /* freeze the pattern string */ - new = makeStringInfo(); - appendStringInfoString(new, old->data); - /* add freezed mark */ - appendStringInfoChar(new, FREEZED_CHAR); - elog(DEBUG1, "old_str: %s new_str: %s", old->data, new->data); - string_set_add(new_str_set, new); - - /* - * Search new_str_set to find out freezed - * entries that have shorter match length. - * Mark them as "discard" so that they are - * discarded in the next round. - */ - - /* new_index_size should be the one before */ - new_str_size = string_set_get_size(new_str_set) - 1; - - /* loop over new_str_set */ - for (new_index = 0; new_index < new_str_size; new_index++) - { - char new_last_char; - int new_str_len; - - new = string_set_get(new_str_set, new_index); - new_str_len = strlen(new->data); - if (new_str_len > 0) - { - new_last_char = new->data[new_str_len - 1]; - if (new_last_char == FREEZED_CHAR && - (new_str_len - 1) <= len) - { - /* mark this set to discard in the next round */ - appendStringInfoChar(new, DISCARD_CHAR); - elog(DEBUG1, "add discard char: %s", new->data); - } - } - } - } - } - } - } - /* we no longer need old string set */ - string_set_discard(old_str_set); - } - } - - /* - * Perform pattern matching to find out the longest match. - */ - new_str_size = string_set_get_size(new_str_set); - elog(DEBUG1, "new_str_size: %d", new_str_size); - len = 0; - resultlen = 0; - - for (index = 0; index < new_str_size; index++) - { - StringInfo s; - - s = string_set_get(new_str_set, index); - if (s == NULL) - continue; /* no data */ - - elog(DEBUG1, "target string: %s", s->data); - - len = do_pattern_match(pattern, s->data); - if (len > resultlen) - { - /* remember the longest match */ - resultlen = len; - - /* - * If the size of result set is equal to the number of rows in the - * set, we are done because it's not possible that the number of - * matching rows exceeds the number of rows in the set. - */ - if (resultlen >= set_size) - break; - } - } - - /* we no longer need new string set */ - string_set_discard(new_str_set); - - return resultlen; -} - -/* - * do_pattern_match - * perform pattern match using pattern against encoded_str. - * returns matching number of rows if matching is succeeded. - * Otherwise returns 0. - */ -static -int do_pattern_match(char *pattern, char *encoded_str) -{ - Datum d; - text *res; - char *substr; - int len = 0; - text *pattern_text, *encoded_str_text; - - pattern_text = cstring_to_text(pattern); - encoded_str_text = cstring_to_text(encoded_str); - - /* - * We first perform pattern matching using regexp_instr, then call - * textregexsubstr to get matched substring to know how long the - * matched string is. That is the number of rows in the reduced window - * frame. The reason why we can't call textregexsubstr in the first - * place is, it errors out if pattern does not match. - */ - if (DatumGetInt32(DirectFunctionCall2Coll(regexp_instr, DEFAULT_COLLATION_OID, - PointerGetDatum(encoded_str_text), - PointerGetDatum(pattern_text)))) - { - d = DirectFunctionCall2Coll(textregexsubstr, - DEFAULT_COLLATION_OID, - PointerGetDatum(encoded_str_text), - PointerGetDatum(pattern_text)); - if (d != 0) - { - res = DatumGetTextPP(d); - substr = text_to_cstring(res); - len = strlen(substr); - pfree(substr); - } - } - pfree(encoded_str_text); - pfree(pattern_text); - - return len; -} - - -/* - * Evaluate expression associated with PATTERN variable vname. - * relpos is relative row position in a frame (starting from 0). - * "quantifier" is the quatifier part of the PATTERN regular expression. - * Currently only '+' is allowed. - * result is out paramater representing the expression evaluation result - * is true of false. - * Return values are: - * >=0: the last match absolute row position - * other wise out of frame. - */ -static -int64 evaluate_pattern(WindowObject winobj, int64 current_pos, - char *vname, StringInfo encoded_str, bool *result) +static bool +evaluate_pattern(WindowObject winobj, int64 current_pos, char *vname) { WindowAggState *winstate = winobj->winstate; ExprContext *econtext = winstate->ss.ps.ps_ExprContext; - ListCell *lc1, *lc2, *lc3; + ListCell *lc1, *lc2; ExprState *pat; Datum eval_result; - bool out_of_frame = false; bool isnull; TupleTableSlot *slot; + bool result; - forthree (lc1, winstate->defineVariableList, lc2, winstate->defineClauseList, lc3, winstate->defineInitial) + forboth (lc1, winstate->defineVariableList, lc2, winstate->defineClauseList) { - char initial; char *name = strVal(lfirst(lc1)); if (strcmp(vname, name)) continue; - initial = *(strVal(lfirst(lc3))); - /* set expression to evaluate */ pat = lfirst(lc2); /* get current, previous and next tuples */ if (!get_slots(winobj, current_pos)) { - out_of_frame = true; + /* out of frame */ + return false; + } + + /* evaluate the expression */ + eval_result = ExecEvalExpr(pat, econtext, &isnull); + if (isnull) + { + /* expression is NULL */ + elog(DEBUG1, "expression for %s is NULL at row: " INT64_FORMAT, vname, current_pos); + result = false; } else { - /* evaluate the expression */ - eval_result = ExecEvalExpr(pat, econtext, &isnull); - if (isnull) + if (!DatumGetBool(eval_result)) { - /* expression is NULL */ - elog(DEBUG1, "expression for %s is NULL at row: " INT64_FORMAT, vname, current_pos); - *result = false; + /* expression is false */ + elog(DEBUG1, "expression for %s is false at row: " INT64_FORMAT, vname, current_pos); + result = false; } else { - if (!DatumGetBool(eval_result)) - { - /* expression is false */ - elog(DEBUG1, "expression for %s is false at row: " INT64_FORMAT, vname, current_pos); - *result = false; - } - else - { - /* expression is true */ - elog(DEBUG1, "expression for %s is true at row: " INT64_FORMAT, vname, current_pos); - appendStringInfoChar(encoded_str, initial); - *result = true; - } + /* expression is true */ + elog(DEBUG1, "expression for %s is true at row: " INT64_FORMAT, vname, current_pos); + result = true; } - - slot = winstate->temp_slot_1; - if (slot != winstate->null_slot) - ExecClearTuple(slot); - slot = winstate->prev_slot; - if (slot != winstate->null_slot) - ExecClearTuple(slot); - slot = winstate->next_slot; - if (slot != winstate->null_slot) - ExecClearTuple(slot); - - break; } - if (out_of_frame) - { - *result = false; - return -1; - } + slot = winstate->temp_slot_1; + if (slot != winstate->null_slot) + ExecClearTuple(slot); + slot = winstate->prev_slot; + if (slot != winstate->null_slot) + ExecClearTuple(slot); + slot = winstate->next_slot; + if (slot != winstate->null_slot) + ExecClearTuple(slot); + + return result; } - return current_pos; + + pg_unreachable(); } /* @@ -4738,211 +4257,3 @@ bool get_slots(WindowObject winobj, int64 current_pos) } return true; } - -/* - * Return pattern variable initial character - * matching with pattern variable name vname. - * If not found, return 0. - */ -static -char pattern_initial(WindowAggState *winstate, char *vname) -{ - char initial; - char *name; - ListCell *lc1, *lc2; - - forboth (lc1, winstate->defineVariableList, lc2, winstate->defineInitial) - { - name = strVal(lfirst(lc1)); /* DEFINE variable name */ - initial = *(strVal(lfirst(lc2))); /* DEFINE variable initial */ - - - if (!strcmp(name, vname)) - return initial; /* found */ - } - return 0; -} - -/* - * string_set_init - * Create dynamic set of StringInfo. - */ -static -StringSet *string_set_init(void) -{ -/* Initial allocation size of str_set */ -#define STRING_SET_ALLOC_SIZE 1024 - - StringSet *string_set; - Size set_size; - - string_set = palloc0(sizeof(StringSet)); - string_set->set_index = 0; - set_size = STRING_SET_ALLOC_SIZE; - string_set->str_set = palloc(set_size * sizeof(StringInfo)); - string_set->set_size = set_size; - - return string_set; -} - -/* - * Add StringInfo str to StringSet string_set. - */ -static -void string_set_add(StringSet *string_set, StringInfo str) -{ - Size set_size; - - set_size = string_set->set_size; - if (string_set->set_index >= set_size) - { - set_size *= 2; - string_set->str_set = repalloc(string_set->str_set, - set_size * sizeof(StringInfo)); - string_set->set_size = set_size; - } - - string_set->str_set[string_set->set_index++] = str; - - return; -} - -/* - * Returns StringInfo specified by index. - * If there's no data yet, returns NULL. - */ -static -StringInfo string_set_get(StringSet *string_set, int index) -{ - /* no data? */ - if (index == 0 && string_set->set_index == 0) - return NULL; - - if (index < 0 ||index >= string_set->set_index) - elog(ERROR, "invalid index: %d", index); - - return string_set->str_set[index]; -} - -/* - * Returns the size of StringSet. - */ -static -int string_set_get_size(StringSet *string_set) -{ - return string_set->set_index; -} - -/* - * Discard StringSet. - * All memory including StringSet itself is freed. - */ -static -void string_set_discard(StringSet *string_set) -{ - int i; - - for (i = 0; i < string_set->set_index; i++) - { - StringInfo str = string_set->str_set[i]; - pfree(str->data); - pfree(str); - } - pfree(string_set->str_set); - pfree(string_set); -} - -/* - * Create and initialize variable postion structure - */ -static -VariablePos *variable_pos_init(void) -{ - VariablePos *variable_pos; - - variable_pos = palloc(sizeof(VariablePos) * NUM_ALPHABETS); - MemSet(variable_pos, -1, sizeof(VariablePos) * NUM_ALPHABETS); - return variable_pos; -} - -/* - * Register pattern variable whose initial is initial into postion index. - * pos is position of initial. - * If pos is already registered, register it at next empty slot. - */ -static -void variable_pos_register(VariablePos *variable_pos, char initial, int pos) -{ - int index = initial - 'a'; - int slot; - int i; - - if (pos < 0 || pos > NUM_ALPHABETS) - elog(ERROR, "initial is not valid char: %c", initial); - - for (i = 0; i < NUM_ALPHABETS; i++) - { - slot = variable_pos[index].pos[i]; - if (slot < 0) - { - /* empty slot found */ - variable_pos[index].pos[i] = pos; - return; - } - } - elog(ERROR, "no empty slot for initial: %c", initial); -} - -/* - * Returns true if initial1 can be followed by initial2 - */ -static -bool variable_pos_compare(VariablePos *variable_pos, char initial1, char initial2) -{ - int index1, index2; - int pos1, pos2; - - for (index1 = 0; ; index1++) - { - pos1 = variable_pos_fetch(variable_pos, initial1, index1); - if (pos1 < 0) - break; - - for (index2 = 0; ; index2++) - { - pos2 = variable_pos_fetch(variable_pos, initial2, index2); - if (pos2 < 0) - break; - if (pos1 <= pos2) - return true; - } - } - return false; -} - -/* - * Fetch position of pattern variable whose initial is initial, and whose index - * is index. If no postion was registered by initial, index, returns -1. - */ -static -int variable_pos_fetch(VariablePos *variable_pos, char initial, int index) -{ - int pos = initial - 'a'; - - if (pos < 0 || pos > NUM_ALPHABETS) - elog(ERROR, "initial is not valid char: %c", initial); - - if (index < 0 || index > NUM_ALPHABETS) - elog(ERROR, "index is not valid: %d", index); - - return variable_pos[pos].pos[index]; -} - -/* - * Discard VariablePos - */ -static -void variable_pos_discard(VariablePos *variable_pos) -{ - pfree(variable_pos); -} diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 469fcd156b..c2aba23d17 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -288,7 +288,6 @@ static WindowAgg *make_windowagg(List *tlist, Index winref, Oid startInRangeFunc, Oid endInRangeFunc, Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst, List *runCondition, RPSkipTo rpSkipTo, List *patternVariable, List *patternRegexp, List *defineClause, - List *defineInitial, List *qual, bool topWindow, Plan *lefttree); static Group *make_group(List *tlist, List *qual, int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, @@ -2703,7 +2702,6 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path) wc->patternVariable, wc->patternRegexp, wc->defineClause, - wc->defineInitial, best_path->qual, best_path->topwindow, subplan); @@ -6609,7 +6607,6 @@ make_windowagg(List *tlist, Index winref, Oid startInRangeFunc, Oid endInRangeFunc, Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst, List *runCondition, RPSkipTo rpSkipTo, List *patternVariable, List *patternRegexp, List *defineClause, - List *defineInitial, List *qual, bool topWindow, Plan *lefttree) { WindowAgg *node = makeNode(WindowAgg); @@ -6640,7 +6637,6 @@ make_windowagg(List *tlist, Index winref, node->patternVariable = patternVariable; node->patternRegexp = patternRegexp; node->defineClause = defineClause; - node->defineInitial = defineInitial; plan->targetlist = tlist; plan->lefttree = lefttree; diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index 9c347216f7..ab81f5cbd8 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -3875,16 +3875,11 @@ transformRPR(ParseState *pstate, WindowClause *wc, WindowDef *windef, List **tar static List * transformDefineClause(ParseState *pstate, WindowClause *wc, WindowDef *windef, List **targetlist) { - /* DEFINE variable name initials */ - static char *defineVariableInitials = "abcdefghijklmnopqrstuvwxyz"; - ListCell *lc, *l; ResTarget *restarget, *r; List *restargets; List *defineClause; char *name; - int initialLen; - int i; /* * If Row Definition Common Syntax exists, DEFINE clause must exist. @@ -3998,33 +3993,7 @@ transformDefineClause(ParseState *pstate, WindowClause *wc, WindowDef *windef, L restargets = lappend(restargets, restarget); } list_free(restargets); - - /* - * Create list of row pattern DEFINE variable name's initial. - * We assign [a-z] to them (up to 26 variable names are allowed). - */ restargets = NIL; - i = 0; - initialLen = strlen(defineVariableInitials); - - foreach(lc, windef->rpCommonSyntax->rpDefs) - { - char initial[2]; - - restarget = (ResTarget *)lfirst(lc); - name = restarget->name; - - if (i >= initialLen) - { - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("number of row pattern definition variable names exceeds %d", initialLen), - parser_errposition(pstate, exprLocation((Node *)restarget)))); - } - initial[0] = defineVariableInitials[i++]; - initial[1] = '\0'; - wc->defineInitial = lappend(wc->defineInitial, makeString(pstrdup(initial))); - } defineClause = transformTargetList(pstate, windef->rpCommonSyntax->rpDefs, EXPR_KIND_RPR_DEFINE); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 28d098b1cf..8e77646e8e 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -2530,7 +2530,6 @@ typedef struct WindowAggState List *defineVariableList; /* list of row pattern definition variables (list of String) */ List *defineClauseList; /* expression for row pattern definition * search conditions ExprState list */ - List *defineInitial; /* list of row pattern definition variable initials (list of String) */ MemoryContext partcontext; /* context for partition-lifespan data */ MemoryContext aggcontext; /* shared context for aggregate working data */ diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 31b04d6919..613f746861 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -1564,8 +1564,6 @@ typedef struct WindowClause bool initial; /* true if is initial */ /* Row Pattern DEFINE clause (list of TargetEntry) */ List *defineClause; - /* Row Pattern DEFINE variable initial names (list of String) */ - List *defineInitial; /* Row Pattern PATTERN variable name (list of String) */ List *patternVariable; /* Row Pattern PATTERN regular expression quantifier ('+' or ''. list of String) */ diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index e48b59517d..33f4aa4b72 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -1108,9 +1108,6 @@ typedef struct WindowAgg /* Row Pattern DEFINE clause (list of TargetEntry) */ List *defineClause; - /* Row Pattern DEFINE variable initial names (list of String) */ - List *defineInitial; - /* * false for all apart from the WindowAgg that's closest to the root of * the plan -- 2.39.2