From 15f1313ef66e964e588b0bf19ede676437ea5a42 Mon Sep 17 00:00:00 2001 From: Sami Imseih Date: Tue, 6 May 2025 13:52:11 -0500 Subject: [PATCH v1 1/1] Allow query jumble to squash a list external parameters --- .../pg_stat_statements/expected/squashing.out | 14 +- .../pg_stat_statements/pg_stat_statements.c | 84 +++--------- src/backend/nodes/gen_node_support.pl | 2 +- src/backend/nodes/queryjumblefuncs.c | 121 +++++++++++------- src/backend/parser/gram.y | 37 ++++-- src/backend/parser/parse_expr.c | 4 + src/include/nodes/parsenodes.h | 4 + src/include/nodes/primnodes.h | 2 + 8 files changed, 139 insertions(+), 129 deletions(-) diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out index 7b138af098c..d92cfbd35fb 100644 --- a/contrib/pg_stat_statements/expected/squashing.out +++ b/contrib/pg_stat_statements/expected/squashing.out @@ -246,7 +246,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; query | calls ----------------------------------------------------+------- SELECT * FROM test_squash_bigint WHERE data IN +| 1 - ($1 /*, ... */::bigint) | + ($1 /*, ... */) | SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) @@ -353,7 +353,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; query | calls ----------------------------------------------------+------- SELECT * FROM test_squash_cast WHERE data IN +| 1 - ($1 /*, ... */::int4::casttesttype) | + ($1 /*, ... */) | SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) @@ -376,7 +376,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; query | calls ----------------------------------------------------+------- SELECT * FROM test_squash_jsonb WHERE data IN +| 1 - (($1 /*, ... */)::jsonb) | + ($1 /*, ... */) | SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) @@ -393,10 +393,10 @@ SELECT * FROM test_squash WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6 (0 rows) SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls -------------------------------------------------------------+------- - SELECT * FROM test_squash WHERE id IN ($1 /*, ... */::oid) | 1 - SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 + query | calls +-------------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) -- Test constants evaluation in a CTE, which was causing issues in the past diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 9778407cba3..efcad87d684 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -2825,10 +2825,6 @@ generate_normalized_query(JumbleState *jstate, const char *query, n_quer_loc = 0, /* Normalized query byte location */ last_off = 0, /* Offset from start for previous tok */ last_tok_len = 0; /* Length (in bytes) of that tok */ - bool in_squashed = false; /* in a run of squashed consts? */ - int skipped_constants = 0; /* Position adjustment of later - * constants after squashed ones */ - /* * Get constants' lengths (core system only gives us locations). Note @@ -2842,9 +2838,6 @@ generate_normalized_query(JumbleState *jstate, const char *query, * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We * could refine that limit based on the max value of n for the current * query, but it hardly seems worth any extra effort to do so. - * - * Note this also gives enough room for the commented-out ", ..." list - * syntax used by constant squashing. */ norm_query_buflen = query_len + jstate->clocations_count * 10; @@ -2857,7 +2850,6 @@ generate_normalized_query(JumbleState *jstate, const char *query, tok_len; /* Length (in bytes) of that tok */ off = jstate->clocations[i].location; - /* Adjust recorded location if we're dealing with partial string */ off -= query_loc; @@ -2866,67 +2858,24 @@ generate_normalized_query(JumbleState *jstate, const char *query, if (tok_len < 0) continue; /* ignore any duplicates */ - /* - * What to do next depends on whether we're squashing constant lists, - * and whether we're already in a run of such constants. - */ - if (!jstate->clocations[i].squashed) - { - /* - * This location corresponds to a constant not to be squashed. - * Print what comes before the constant ... - */ - len_to_wrt = off - last_off; - len_to_wrt -= last_tok_len; - - Assert(len_to_wrt >= 0); + /* Copy next chunk (what precedes the next constant) */ + len_to_wrt = off - last_off; + len_to_wrt -= last_tok_len; - memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); - n_quer_loc += len_to_wrt; + Assert(len_to_wrt >= 0); + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; - /* ... and then a param symbol replacing the constant itself */ - n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", - i + 1 + jstate->highest_extern_param_id - skipped_constants); - - /* In case previous constants were merged away, stop doing that */ - in_squashed = false; - } - else if (!in_squashed) - { - /* - * This location is the start position of a run of constants to be - * squashed, so we need to print the representation of starting a - * group of stashed constants. - * - * Print what comes before the constant ... - */ - len_to_wrt = off - last_off; - len_to_wrt -= last_tok_len; - Assert(len_to_wrt >= 0); - Assert(i + 1 < jstate->clocations_count); - Assert(jstate->clocations[i + 1].squashed); - memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); - n_quer_loc += len_to_wrt; - - /* ... and then start a run of squashed constants */ - n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d /*, ... */", - i + 1 + jstate->highest_extern_param_id - skipped_constants); - - /* The next location will match the block below, to end the run */ - in_squashed = true; - - skipped_constants++; - } - else - { - /* - * The second location of a run of squashable elements; this - * indicates its end. - */ - in_squashed = false; - } + /* + * And insert a param symbol in place of the constant token. + * + * However, If we have a squashable list, insert a comment in place of + * the second and remaining values of the list. + */ + n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d%s", + i + 1 + jstate->highest_extern_param_id, + (jstate->clocations[i].squashed) ? " /*, ... */" : ""); - /* Otherwise the constant is squashed away -- move forward */ quer_loc = off + tok_len; last_off = off; last_tok_len = tok_len; @@ -3017,6 +2966,9 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query, Assert(loc >= 0); + if (locs[i].squashed) + continue; /* squashable list, ignore */ + if (loc <= last_loc) continue; /* Duplicate constant, ignore */ diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl index 77659b0f760..17ba3696226 100644 --- a/src/backend/nodes/gen_node_support.pl +++ b/src/backend/nodes/gen_node_support.pl @@ -1324,7 +1324,7 @@ _jumble${n}(JumbleState *jstate, Node *node) # Node type. Squash constants if requested. if ($query_jumble_squash) { - print $jff "\tJUMBLE_ELEMENTS($f);\n" + print $jff "\tJUMBLE_ELEMENTS($f, node);\n" unless $query_jumble_ignore; } else diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index d1e82a63f09..27d76a493be 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -60,10 +60,10 @@ static uint64 DoJumble(JumbleState *jstate, Node *node); static void AppendJumble(JumbleState *jstate, const unsigned char *value, Size size); static void FlushPendingNulls(JumbleState *jstate); -static void RecordConstLocation(JumbleState *jstate, - int location, bool squashed); +static void RecordExpressionLocation(JumbleState *jstate, + int location, int len); static void _jumbleNode(JumbleState *jstate, Node *node); -static void _jumbleElements(JumbleState *jstate, List *elements); +static void _jumbleElements(JumbleState *jstate, List *elements, Node *node); static void _jumbleA_Const(JumbleState *jstate, Node *node); static void _jumbleList(JumbleState *jstate, Node *node); static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node); @@ -381,7 +381,7 @@ FlushPendingNulls(JumbleState *jstate) * element contributes nothing to the jumble hash. */ static void -RecordConstLocation(JumbleState *jstate, int location, bool squashed) +RecordExpressionLocation(JumbleState *jstate, int location, int len) { /* -1 indicates unknown or undefined location */ if (location >= 0) @@ -396,9 +396,15 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed) sizeof(LocationLen)); } jstate->clocations[jstate->clocations_count].location = location; - /* initialize lengths to -1 to simplify third-party module usage */ - jstate->clocations[jstate->clocations_count].squashed = squashed; - jstate->clocations[jstate->clocations_count].length = -1; + + /* + * initialize lengths to -1 to simplify third-party module usage + * + * If we have a length that is greater than -1, this indicates a + * squashable list. + */ + jstate->clocations[jstate->clocations_count].length = (len > -1) ? len : -1; + jstate->clocations[jstate->clocations_count].squashed = (len > -1) ? true : false; jstate->clocations_count++; } } @@ -413,7 +419,7 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed) * - Otherwise test if the expression is a simple Const. */ static bool -IsSquashableConst(Node *element) +IsSquashableExpression(Node *element) { if (IsA(element, RelabelType)) element = (Node *) ((RelabelType *) element)->arg; @@ -437,22 +443,45 @@ IsSquashableConst(Node *element) { Node *arg = lfirst(temp); - if (!IsA(arg, Const)) /* XXX we could recurse here instead */ - return false; + switch (nodeTag(arg)) + { + case T_Const: + return true; + case T_Param: + { + Param *param = (Param *) element; + + return param->paramkind == PARAM_EXTERN; + } + default: + break; + } } - return true; + return false; } - if (!IsA(element, Const)) - return false; + switch (nodeTag(element)) + { + case T_Const: + return true; + case T_Param: + { + Param *param = (Param *) element; - return true; + return param->paramkind == PARAM_EXTERN; + } + default: + break; + } + + return false; } /* * Subroutine for _jumbleElements: Verify whether the provided list - * can be squashed, meaning it contains only constant expressions. + * can be squashed, meaning it contains only constant and external + * parameter expressions. * * Return value indicates if squashing is possible. * @@ -461,7 +490,7 @@ IsSquashableConst(Node *element) * expressions. */ static bool -IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr) +IsSquashableExpressionList(List *elements) { ListCell *temp; @@ -474,22 +503,19 @@ IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr) foreach(temp, elements) { - if (!IsSquashableConst(lfirst(temp))) + if (!IsSquashableExpression(lfirst(temp))) return false; } - *firstExpr = linitial(elements); - *lastExpr = llast(elements); - return true; } #define JUMBLE_NODE(item) \ _jumbleNode(jstate, (Node *) expr->item) -#define JUMBLE_ELEMENTS(list) \ - _jumbleElements(jstate, (List *) expr->list) +#define JUMBLE_ELEMENTS(list, node) \ + _jumbleElements(jstate, (List *) expr->list, node) #define JUMBLE_LOCATION(location) \ - RecordConstLocation(jstate, expr->location, false) + RecordExpressionLocation(jstate, expr->location, -1) #define JUMBLE_FIELD(item) \ do { \ if (sizeof(expr->item) == 8) \ @@ -517,36 +543,37 @@ do { \ #include "queryjumblefuncs.funcs.c" /* - * We jumble lists of constant elements as one individual item regardless - * of how many elements are in the list. This means different queries - * jumble to the same query_id, if the only difference is the number of - * elements in the list. + * We try to jumble lists of expressions as one individual item regardless + * of how many elements are in the list. This is know as squashing, which + * results in different queries jumbling to the same query_id, if the only + * difference is the number of elements in the list. + * + * We allow for Constants and Params of type external to be squashed. To + * be able to normalize such queries by stripping away the squashed away + * values, we must track the start and end of the expression list. */ static void -_jumbleElements(JumbleState *jstate, List *elements) +_jumbleElements(JumbleState *jstate, List *elements, Node *node) { - Node *first, - *last; + bool normalize_list = false; - if (IsSquashableConstList(elements, &first, &last)) + if (IsSquashableExpressionList(elements)) { - /* - * If this list of elements is squashable, keep track of the location - * of its first and last elements. When reading back the locations - * array, we'll see two consecutive locations with ->squashed set to - * true, indicating the location of initial and final elements of this - * list. - * - * For the limited set of cases we support now (implicit coerce via - * FuncExpr, Const) it's fine to use exprLocation of the 'last' - * expression, but if more complex composite expressions are to be - * supported (e.g., OpExpr or FuncExpr as an explicit call), more - * sophisticated tracking will be needed. - */ - RecordConstLocation(jstate, exprLocation(first), true); - RecordConstLocation(jstate, exprLocation(last), true); + if (IsA(node, ArrayExpr)) + { + ArrayExpr *aexpr = (ArrayExpr *) node; + + if (aexpr->expr_start > 0 && aexpr->expr_end > 0) + { + RecordExpressionLocation(jstate, + aexpr->expr_start + 1, + (aexpr->expr_end - aexpr->expr_start) - 1); + normalize_list = true; + } + } } - else + + if (!normalize_list) { _jumbleNode(jstate, (Node *) elements); } diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 3c4268b271a..dff65cc0f6c 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -184,7 +184,7 @@ static void doNegateFloat(Float *v); static Node *makeAndExpr(Node *lexpr, Node *rexpr, int location); static Node *makeOrExpr(Node *lexpr, Node *rexpr, int location); static Node *makeNotExpr(Node *expr, int location); -static Node *makeAArrayExpr(List *elements, int location); +static Node *makeAArrayExpr(List *elements, int location, int expr_end); static Node *makeSQLValueFunction(SQLValueFunctionOp op, int32 typmod, int location); static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args, @@ -206,6 +206,10 @@ static void preprocess_pubobj_list(List *pubobjspec_list, core_yyscan_t yyscanner); static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); +/* global variables */ +static ParseLoc expr_list_start = 0; +static ParseLoc expr_list_end = 0; + %} %pure-parser @@ -15298,7 +15302,12 @@ a_expr: c_expr { $$ = $1; } else { /* generate scalar IN expression */ - $$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "=", $1, $3, @2); + A_Expr *aexpr = makeSimpleA_Expr(AEXPR_IN, "=", $1, $3, @2); + aexpr->expr_start = expr_list_start; + aexpr->expr_end = expr_list_end; + $$ = (Node *) aexpr; + expr_list_start = 0; + expr_list_end = 0; } } | a_expr NOT_LA IN_P in_expr %prec NOT_LA @@ -15321,7 +15330,12 @@ a_expr: c_expr { $$ = $1; } else { /* generate scalar NOT IN expression */ - $$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "<>", $1, $4, @2); + A_Expr *aexpr = makeSimpleA_Expr(AEXPR_IN, "<>", $1, $4, @2); + aexpr->expr_start = expr_list_start; + aexpr->expr_end = expr_list_end; + $$ = (Node *) aexpr; + expr_list_start = 0; + expr_list_end = 0; } } | a_expr subquery_Op sub_type select_with_parens %prec Op @@ -16757,15 +16771,15 @@ type_list: Typename { $$ = list_make1($1); } array_expr: '[' expr_list ']' { - $$ = makeAArrayExpr($2, @1); + $$ = makeAArrayExpr($2, @1, @3); } | '[' array_expr_list ']' { - $$ = makeAArrayExpr($2, @1); + $$ = makeAArrayExpr($2, @1, @3); } | '[' ']' { - $$ = makeAArrayExpr(NIL, @1); + $$ = makeAArrayExpr(NIL, @1, @2); } ; @@ -16895,7 +16909,12 @@ in_expr: select_with_parens /* other fields will be filled later */ $$ = (Node *) n; } - | '(' expr_list ')' { $$ = (Node *) $2; } + | '(' expr_list ')' + { + $$ = (Node *) $2; + expr_list_start = @1; + expr_list_end = @3; + } ; /* @@ -19293,12 +19312,14 @@ makeNotExpr(Node *expr, int location) } static Node * -makeAArrayExpr(List *elements, int location) +makeAArrayExpr(List *elements, int location, int expr_end) { A_ArrayExpr *n = makeNode(A_ArrayExpr); n->elements = elements; n->location = location; + n->expr_start = location; + n->expr_end = expr_end; return (Node *) n; } diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index 1f8e2d54673..f54bf86b520 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -1224,6 +1224,8 @@ transformAExprIn(ParseState *pstate, A_Expr *a) newa->elements = aexprs; newa->multidims = false; newa->location = -1; + newa->expr_start = a->expr_start; + newa->expr_end = a->expr_end; result = (Node *) make_scalar_array_op(pstate, a->name, @@ -2166,6 +2168,8 @@ transformArrayExpr(ParseState *pstate, A_ArrayExpr *a, newa->element_typeid = element_type; newa->elements = newcoercedelems; newa->location = a->location; + newa->expr_start = a->expr_start; + newa->expr_end = a->expr_end; return (Node *) newa; } diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 4610fc61293..ee9cd1f25b9 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -347,6 +347,8 @@ typedef struct A_Expr Node *lexpr; /* left argument, or NULL if none */ Node *rexpr; /* right argument, or NULL if none */ ParseLoc location; /* token location, or -1 if unknown */ + ParseLoc expr_start; + ParseLoc expr_end; } A_Expr; /* @@ -502,6 +504,8 @@ typedef struct A_ArrayExpr NodeTag type; List *elements; /* array element expressions */ ParseLoc location; /* token location, or -1 if unknown */ + ParseLoc expr_start; + ParseLoc expr_end; } A_ArrayExpr; /* diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 7d3b4198f26..0d9cb292464 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -1399,6 +1399,8 @@ typedef struct ArrayExpr bool multidims pg_node_attr(query_jumble_ignore); /* token location, or -1 if unknown */ ParseLoc location; + ParseLoc expr_start; + ParseLoc expr_end; } ArrayExpr; /* -- 2.39.5 (Apple Git-154)