From cc0e870b7fd54176f7808e8ed7f1ae4f9c242307 Mon Sep 17 00:00:00 2001 From: jian he Date: Fri, 19 Jun 2026 14:10:07 +0800 Subject: [PATCH v48 1/2] misc refactor mostly related to RPR_QUANTITY_INF Replace PG_INT32_MAX with RPR_QUANTITY_INF in both gram.y and rpr.c. To avoid include src/include/optimizer/rpr.h in gram.y, the RPR_QUANTITY_INF macro is explicitly defined within gram.y itself. Abstracting this value into a dedicated macro is future proof: the parsing logic remains correct even if the quantifier upper bound change from PG_INT32_MAX to any random value. Rename the bodyNullable and allNullable variables to match_empty in fillRPRPattern and its related functions. "nullable" typically implies whether a value can be SQL NULL, whereas this is more about if a pattern can match zero rows. Variable "match_empty" more accurately reflects this behavior. All associated comments have also been updated from "nullable" to "can match zero rows" or "match empty" for improved clarity. Refactors splitRPRTrailingAlt: to utilize the foreach_node macro and foreach_current_index function, removing the need to manually manage ListCell iteration and manual index counters. Adds a comment inside makeRPRQuantifier explicitly stating that other fields are irrelevant for a quantifier node. --- src/backend/optimizer/plan/rpr.c | 42 ++++++++--------- src/backend/parser/gram.y | 78 ++++++++++++++++++-------------- 2 files changed, 64 insertions(+), 56 deletions(-) diff --git a/src/backend/optimizer/plan/rpr.c b/src/backend/optimizer/plan/rpr.c index 62292508aa..c4a8e5d6d3 100644 --- a/src/backend/optimizer/plan/rpr.c +++ b/src/backend/optimizer/plan/rpr.c @@ -1176,7 +1176,7 @@ getVarIdFromPattern(RPRPattern *pat, const char *varName) * fillRPRPatternVar * Fill a VAR pattern element. * - * Returns true if this VAR is nullable (can match zero rows). + * Returns true if this VAR can match zero rows. */ static bool fillRPRPatternVar(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth depth) @@ -1187,7 +1187,7 @@ fillRPRPatternVar(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth dept elem->varId = getVarIdFromPattern(pat, node->varName); elem->depth = depth; elem->min = node->min; - elem->max = (node->max == PG_INT32_MAX) ? RPR_QUANTITY_INF : node->max; + elem->max = (node->max >= RPR_QUANTITY_INF) ? RPR_QUANTITY_INF : node->max; Assert(elem->min >= 0 && elem->min < RPR_QUANTITY_INF && elem->max >= 1 && (elem->max == RPR_QUANTITY_INF || elem->min <= elem->max)); @@ -1218,8 +1218,8 @@ fillRPRPatternVar(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth dept * END.jump points to the first child (loop-back path). * BEGIN.next and END.next are set later by finalizeRPRPattern(). * - * Returns true if this group is nullable. A group is nullable when its - * min is 0 (can be skipped entirely) or its body is nullable (every path + * Returns true if this group can match zero row. A group can match empty when + * its min is 0 (can be skipped entirely) or its body is match empty (every path * through the body can match zero rows). */ static bool @@ -1227,7 +1227,7 @@ fillRPRPatternGroup(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth de { int groupStartIdx = *idx; int beginIdx = -1; - bool bodyNullable = true; + bool match_empty = true; /* Add BEGIN marker if group has non-trivial quantifier (not {1,1}) */ if (node->min != 1 || node->max != 1) @@ -1239,7 +1239,7 @@ fillRPRPatternGroup(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth de elem->varId = RPR_VARID_BEGIN; elem->depth = depth; elem->min = node->min; - elem->max = (node->max == PG_INT32_MAX) ? RPR_QUANTITY_INF : node->max; + elem->max = (node->max >= RPR_QUANTITY_INF) ? RPR_QUANTITY_INF : node->max; Assert(elem->min >= 0 && elem->min < RPR_QUANTITY_INF && elem->max >= 1 && (elem->max == RPR_QUANTITY_INF || elem->min <= elem->max)); @@ -1254,7 +1254,7 @@ fillRPRPatternGroup(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth de foreach_node(RPRPatternNode, child, node->children) { if (!fillRPRPattern(child, pat, idx, depth + 1)) - bodyNullable = false; + match_empty = false; } /* Add group end marker if group has non-trivial quantifier (not {1,1}) */ @@ -1267,7 +1267,7 @@ fillRPRPatternGroup(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth de endElem->varId = RPR_VARID_END; endElem->depth = depth; endElem->min = node->min; - endElem->max = (node->max == PG_INT32_MAX) ? RPR_QUANTITY_INF : node->max; + endElem->max = (node->max >= RPR_QUANTITY_INF) ? RPR_QUANTITY_INF : node->max; Assert(endElem->min >= 0 && endElem->min < RPR_QUANTITY_INF && endElem->max >= 1 && (endElem->max == RPR_QUANTITY_INF || endElem->min <= endElem->max)); @@ -1277,11 +1277,11 @@ fillRPRPatternGroup(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth de endElem->flags |= RPR_ELEM_RELUCTANT; /* - * If the group body is nullable (all paths can match empty), mark the - * END element so that nfa_advance_end can fast-forward the iteration - * count to min when reached via empty-match skip paths. + * If the group body all paths can match empty, mark the END element + * so that nfa_advance_end can fast-forward the iteration count to min + * when reached via empty-match skip paths. */ - if (bodyNullable) + if (match_empty) endElem->flags |= RPR_ELEM_EMPTY_LOOP; (*idx)++; @@ -1290,7 +1290,7 @@ fillRPRPatternGroup(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth de beginElem->jump = *idx; /* skip: go to after END */ } - return (node->min == 0 || bodyNullable); + return (node->min == 0 || match_empty); } /* @@ -1300,8 +1300,8 @@ fillRPRPatternGroup(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth de * Creates the ALT marker, fills each alternative at increased depth, * sets jump pointers for backtracking, and next pointers for successful paths. * - * Returns true if any branch is nullable (OR semantics: one nullable - * branch suffices for the alternation to produce an empty match). + * Returns true if any branch can match zero rows (OR semantics: it suffices for + * one branch to accept an empty match). */ static bool fillRPRPatternAlt(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth depth) @@ -1312,7 +1312,7 @@ fillRPRPatternAlt(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth dept List *altBranchStarts = NIL; List *altEndPositions = NIL; int afterAltIdx; - bool anyNullable = false; + bool match_empty = false; /* Add alternation start marker */ elem = &pat->elements[*idx]; @@ -1332,7 +1332,7 @@ fillRPRPatternAlt(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth dept altBranchStarts = lappend_int(altBranchStarts, branchStart); if (fillRPRPattern(alt, pat, idx, depth + 1)) - anyNullable = true; + match_empty = true; altEndPositions = lappend_int(altEndPositions, *idx - 1); } @@ -1378,7 +1378,7 @@ fillRPRPatternAlt(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth dept list_free(altBranchStarts); list_free(altEndPositions); - return anyNullable; + return match_empty; } /* @@ -1395,7 +1395,7 @@ fillRPRPatternAlt(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth dept static bool fillRPRPattern(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth depth) { - bool allNullable = true; + bool match_empty = true; /* Pattern nodes from parser are never NULL */ Assert(node != NULL); @@ -1408,9 +1408,9 @@ fillRPRPattern(RPRPatternNode *node, RPRPattern *pat, int *idx, RPRDepth depth) foreach_node(RPRPatternNode, child, node->children) { if (!fillRPRPattern(child, pat, idx, depth)) - allNullable = false; + match_empty = false; } - return allNullable; + return match_empty; case RPR_PATTERN_VAR: return fillRPRPatternVar(node, pat, idx, depth); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 6eb01ea7f0..c76a5ea929 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -151,6 +151,14 @@ typedef struct KeyActions #define parser_yyerror(msg) scanner_yyerror(msg, yyscanner) #define parser_errposition(pos) scanner_errposition(pos, yyscanner) +/* + * unbounded quantifier for row pattern recognation. + * + * If you change this code, see also RPR_QUANTITY_INF reference in + * src/include/optimizer/rpr.h + */ +#define RPR_QUANTITY_INF PG_INT32_MAX + static void base_yyerror(YYLTYPE *yylloc, core_yyscan_t yyscanner, const char *msg); static RawStmt *makeRawStmt(Node *stmt, int stmt_location); @@ -17744,11 +17752,11 @@ row_pattern_quantifier_opt: } | '*' { - $$ = (Node *) makeRPRQuantifier(0, PG_INT32_MAX, false, @1); + $$ = (Node *) makeRPRQuantifier(0, RPR_QUANTITY_INF, false, @1); } | '+' { - $$ = (Node *) makeRPRQuantifier(1, PG_INT32_MAX, false, @1); + $$ = (Node *) makeRPRQuantifier(1, RPR_QUANTITY_INF, false, @1); } | Op { @@ -17756,19 +17764,19 @@ row_pattern_quantifier_opt: if (strcmp($1, "?") == 0) $$ = (Node *) makeRPRQuantifier(0, 1, false, @1); else if (strcmp($1, "*?") == 0) - $$ = (Node *) makeRPRQuantifier(0, PG_INT32_MAX, true, @1); + $$ = (Node *) makeRPRQuantifier(0, RPR_QUANTITY_INF, true, @1); else if (strcmp($1, "+?") == 0) - $$ = (Node *) makeRPRQuantifier(1, PG_INT32_MAX, true, @1); + $$ = (Node *) makeRPRQuantifier(1, RPR_QUANTITY_INF, true, @1); else if (strcmp($1, "??") == 0) $$ = (Node *) makeRPRQuantifier(0, 1, true, @1); else if (strcmp($1, "*|") == 0) { - $$ = (Node *) makeRPRQuantifier(0, PG_INT32_MAX, false, @1); + $$ = (Node *) makeRPRQuantifier(0, RPR_QUANTITY_INF, false, @1); ((RPRPatternNode *) $$)->trailing_alt = true; } else if (strcmp($1, "+|") == 0) { - $$ = (Node *) makeRPRQuantifier(1, PG_INT32_MAX, false, @1); + $$ = (Node *) makeRPRQuantifier(1, RPR_QUANTITY_INF, false, @1); ((RPRPatternNode *) $$)->trailing_alt = true; } else if (strcmp($1, "?|") == 0) @@ -17778,12 +17786,12 @@ row_pattern_quantifier_opt: } else if (strcmp($1, "*?|") == 0) { - $$ = (Node *) makeRPRQuantifier(0, PG_INT32_MAX, true, @1); + $$ = (Node *) makeRPRQuantifier(0, RPR_QUANTITY_INF, true, @1); ((RPRPatternNode *) $$)->trailing_alt = true; } else if (strcmp($1, "+?|") == 0) { - $$ = (Node *) makeRPRQuantifier(1, PG_INT32_MAX, true, @1); + $$ = (Node *) makeRPRQuantifier(1, RPR_QUANTITY_INF, true, @1); ((RPRPatternNode *) $$)->trailing_alt = true; } else if (strcmp($1, "??|") == 0) @@ -17802,11 +17810,11 @@ row_pattern_quantifier_opt: | '*' Op { if (strcmp($2, "?") == 0) - $$ = (Node *) makeRPRQuantifier(0, PG_INT32_MAX, true, @1); + $$ = (Node *) makeRPRQuantifier(0, RPR_QUANTITY_INF, true, @1); else if (strcmp($2, "?|") == 0) { /* "A* ?|B" = reluctant "A*?" plus alternation */ - $$ = (Node *) makeRPRQuantifier(0, PG_INT32_MAX, true, @1); + $$ = (Node *) makeRPRQuantifier(0, RPR_QUANTITY_INF, true, @1); ((RPRPatternNode *) $$)->trailing_alt = true; } else @@ -17819,11 +17827,11 @@ row_pattern_quantifier_opt: | '+' Op { if (strcmp($2, "?") == 0) - $$ = (Node *) makeRPRQuantifier(1, PG_INT32_MAX, true, @1); + $$ = (Node *) makeRPRQuantifier(1, RPR_QUANTITY_INF, true, @1); else if (strcmp($2, "?|") == 0) { /* "A+ ?|B" = reluctant "A+?" plus alternation */ - $$ = (Node *) makeRPRQuantifier(1, PG_INT32_MAX, true, @1); + $$ = (Node *) makeRPRQuantifier(1, RPR_QUANTITY_INF, true, @1); ((RPRPatternNode *) $$)->trailing_alt = true; } else @@ -17859,37 +17867,37 @@ row_pattern_quantifier_opt: /* {n}, {n,}, {,m}, {n,m} quantifiers */ | '{' Iconst '}' { - if ($2 <= 0 || $2 >= PG_INT32_MAX) + if ($2 <= 0 || $2 >= RPR_QUANTITY_INF) ereport(ERROR, errcode(ERRCODE_SYNTAX_ERROR), - errmsg("quantifier bound must be between 1 and %d", PG_INT32_MAX - 1), + errmsg("quantifier bound must be between 1 and %d", RPR_QUANTITY_INF - 1), parser_errposition(@2)); $$ = (Node *) makeRPRQuantifier($2, $2, false, @1); } | '{' Iconst ',' '}' { - if ($2 < 0 || $2 >= PG_INT32_MAX) + if ($2 < 0 || $2 >= RPR_QUANTITY_INF) ereport(ERROR, errcode(ERRCODE_SYNTAX_ERROR), - errmsg("quantifier bound must be between 0 and %d", PG_INT32_MAX - 1), + errmsg("quantifier bound must be between 0 and %d", RPR_QUANTITY_INF - 1), parser_errposition(@2)); - $$ = (Node *) makeRPRQuantifier($2, PG_INT32_MAX, false, @1); + $$ = (Node *) makeRPRQuantifier($2, RPR_QUANTITY_INF, false, @1); } | '{' ',' Iconst '}' { - if ($3 <= 0 || $3 >= PG_INT32_MAX) + if ($3 <= 0 || $3 >= RPR_QUANTITY_INF) ereport(ERROR, errcode(ERRCODE_SYNTAX_ERROR), - errmsg("quantifier bound must be between 1 and %d", PG_INT32_MAX - 1), + errmsg("quantifier bound must be between 1 and %d", RPR_QUANTITY_INF - 1), parser_errposition(@3)); $$ = (Node *) makeRPRQuantifier(0, $3, false, @1); } | '{' Iconst ',' Iconst '}' { - if ($2 < 0 || $4 <= 0 || $2 >= PG_INT32_MAX || $4 >= PG_INT32_MAX) + if ($2 < 0 || $4 <= 0 || $2 >= RPR_QUANTITY_INF || $4 >= RPR_QUANTITY_INF) ereport(ERROR, errcode(ERRCODE_SYNTAX_ERROR), - errmsg("quantifier bounds must be between 0 and %d with max >= 1", PG_INT32_MAX - 1), + errmsg("quantifier bounds must be between 0 and %d with max >= 1", RPR_QUANTITY_INF - 1), parser_errposition(@2)); if ($2 > $4) ereport(ERROR, @@ -17907,10 +17915,10 @@ row_pattern_quantifier_opt: errmsg("invalid token \"%s\" after range quantifier", rpr_invalid_quantifier_token($4)), errhint("Only \"?\" is allowed after {n} to make it reluctant."), parser_errposition(@4)); - if ($2 <= 0 || $2 >= PG_INT32_MAX) + if ($2 <= 0 || $2 >= RPR_QUANTITY_INF) ereport(ERROR, errcode(ERRCODE_SYNTAX_ERROR), - errmsg("quantifier bound must be between 1 and %d", PG_INT32_MAX - 1), + errmsg("quantifier bound must be between 1 and %d", RPR_QUANTITY_INF - 1), parser_errposition(@2)); $$ = (Node *) makeRPRQuantifier($2, $2, true, @1); if (strcmp($4, "?|") == 0) @@ -17924,12 +17932,12 @@ row_pattern_quantifier_opt: errmsg("invalid token \"%s\" after range quantifier", rpr_invalid_quantifier_token($5)), errhint("Only \"?\" is allowed after {n,} or {,m} to make it reluctant."), parser_errposition(@5)); - if ($2 < 0 || $2 >= PG_INT32_MAX) + if ($2 < 0 || $2 >= RPR_QUANTITY_INF) ereport(ERROR, errcode(ERRCODE_SYNTAX_ERROR), - errmsg("quantifier bound must be between 0 and %d", PG_INT32_MAX - 1), + errmsg("quantifier bound must be between 0 and %d", RPR_QUANTITY_INF - 1), parser_errposition(@2)); - $$ = (Node *) makeRPRQuantifier($2, PG_INT32_MAX, true, @1); + $$ = (Node *) makeRPRQuantifier($2, RPR_QUANTITY_INF, true, @1); if (strcmp($5, "?|") == 0) ((RPRPatternNode *) $$)->trailing_alt = true; } @@ -17941,10 +17949,10 @@ row_pattern_quantifier_opt: errmsg("invalid token \"%s\" after range quantifier", rpr_invalid_quantifier_token($5)), errhint("Only \"?\" is allowed after {n,} or {,m} to make it reluctant."), parser_errposition(@5)); - if ($3 <= 0 || $3 >= PG_INT32_MAX) + if ($3 <= 0 || $3 >= RPR_QUANTITY_INF) ereport(ERROR, errcode(ERRCODE_SYNTAX_ERROR), - errmsg("quantifier bound must be between 1 and %d", PG_INT32_MAX - 1), + errmsg("quantifier bound must be between 1 and %d", RPR_QUANTITY_INF - 1), parser_errposition(@3)); $$ = (Node *) makeRPRQuantifier(0, $3, true, @1); if (strcmp($5, "?|") == 0) @@ -17958,10 +17966,10 @@ row_pattern_quantifier_opt: errmsg("invalid token \"%s\" after range quantifier", rpr_invalid_quantifier_token($6)), errhint("Only \"?\" is allowed after {n,m} to make it reluctant."), parser_errposition(@6)); - if ($2 < 0 || $4 <= 0 || $2 >= PG_INT32_MAX || $4 >= PG_INT32_MAX) + if ($2 < 0 || $4 <= 0 || $2 >= RPR_QUANTITY_INF || $4 >= RPR_QUANTITY_INF) ereport(ERROR, errcode(ERRCODE_SYNTAX_ERROR), - errmsg("quantifier bounds must be between 0 and %d with max >= 1", PG_INT32_MAX - 1), + errmsg("quantifier bounds must be between 0 and %d with max >= 1", RPR_QUANTITY_INF - 1), parser_errposition(@2)); if ($2 > $4) ereport(ERROR, @@ -21407,6 +21415,8 @@ makeRPRQuantifier(int32 min, int32 max, bool reluctant, int location) n->max = max; n->reluctant = reluctant; n->location = location; + + /* Other fields are irrelevant for quantifier node */ return n; } @@ -21442,8 +21452,7 @@ makeRPRSeqOrSingle(List *children, int location) static RPRPatternNode * splitRPRTrailingAlt(RPRPatternNode *node, core_yyscan_t yyscanner) { - ListCell *lc; - int i = 0; + int i; if (node->nodeType != RPR_PATTERN_SEQ) { @@ -21458,9 +21467,9 @@ splitRPRTrailingAlt(RPRPatternNode *node, core_yyscan_t yyscanner) return node; } - foreach(lc, node->children) + foreach_node(RPRPatternNode, child, node->children) { - RPRPatternNode *child = (RPRPatternNode *) lfirst(lc); + i = foreach_current_index(child); if (child->trailing_alt) { @@ -21490,7 +21499,6 @@ splitRPRTrailingAlt(RPRPatternNode *node, core_yyscan_t yyscanner) altn->location = node->location; return altn; } - i++; } return node; } -- 2.34.1