From 60739b6a458115fa571777281bacbfc057da0589 Mon Sep 17 00:00:00 2001 From: Sami Imseih Date: Wed, 30 Apr 2025 15:46:58 -0500 Subject: [PATCH v2] Allow query jumble to squash a list of external parameters 62d712ecf allows query jumbling to squash a list of constants, but not external parameters. The latter is important in practice, as such queries are often generated by ORMs. Allow to squash external parameters as well. --- .../pg_stat_statements/expected/squashing.out | 92 +++++++++++++++++++ .../pg_stat_statements/pg_stat_statements.c | 40 ++++---- contrib/pg_stat_statements/sql/squashing.sql | 39 ++++++++ doc/src/sgml/pgstatstatements.sgml | 4 +- src/backend/nodes/gen_node_support.pl | 2 +- src/backend/nodes/queryjumblefuncs.c | 50 ++++++---- src/include/nodes/queryjumble.h | 7 +- 7 files changed, 190 insertions(+), 44 deletions(-) diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out index 7b138af098c..477dbb8bf02 100644 --- a/contrib/pg_stat_statements/expected/squashing.out +++ b/contrib/pg_stat_statements/expected/squashing.out @@ -301,6 +301,98 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) +-- Test bind parameters +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash_bigint + WHERE data IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) + \bind 1 2 3 4 5 6 7 8 9 +; + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash_bigint + WHERE data IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) + \bind 1 2 3 4 5 6 7 8 9 10 +; + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash_bigint + WHERE data IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) + \bind 1 2 3 4 5 6 7 8 9 10 11 +; + id | data +----+------ +(0 rows) + +SELECT * FROM test_squash_bigint WHERE data IN + ($1::bigint, $2::bigint, $3::bigint, $4::bigint, $5::bigint, $6::bigint, + $7::bigint, $8::bigint, $9::bigint, $10::bigint) \bind 1 2 3 4 5 6 7 8 9 10 +; + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT * FROM test_squash_bigint +| 4 + WHERE data IN ($1 /*, ... */) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Test parameters order +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT * FROM test_squash_bigint + WHERE data IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) + AND id IN ($11, $12, $13, $14, $15, $16, $17, $18, $19, $20) + \bind 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 +; + id | data +----+------ +(0 rows) + +-- No new pgss entry +SELECT * FROM test_squash_bigint + WHERE data IN ($10, $9, $8, $7, $6, $5, $4, $3, $2, $1) + \bind 1 2 3 4 5 6 7 8 9 10 +; + id | data +----+------ +(0 rows) + +-- Test combination with constants, no new pgss entry +SELECT * FROM test_squash_bigint + WHERE data IN (1, 2, 3, 4, 5, $1, $2, $3, $4, $5) + \bind 1 2 3 4 5 +; + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT * FROM test_squash_bigint +| 2 + WHERE data IN ($1 /*, ... */) | + SELECT * FROM test_squash_bigint +| 1 + WHERE data IN ($1 /*, ... */) +| + AND id IN ($2 /*, ... */) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(3 rows) + -- CoerceViaIO -- Create some dummy type to force CoerceViaIO CREATE TYPE casttesttype; diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 9778407cba3..68bce2b0146 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -2825,9 +2825,11 @@ generate_normalized_query(JumbleState *jstate, const char *query, n_quer_loc = 0, /* Normalized query byte location */ last_off = 0, /* Offset from start for previous tok */ last_tok_len = 0; /* Length (in bytes) of that tok */ - bool in_squashed = false; /* in a run of squashed consts? */ - int skipped_constants = 0; /* Position adjustment of later - * constants after squashed ones */ + bool in_squashed = false; /* in a run of squashed constants + * or parameters? */ + int skipped_expressions = 0; /* Position adjustment of later + * constants or parameters after + * squashed ones */ /* @@ -2867,14 +2869,14 @@ generate_normalized_query(JumbleState *jstate, const char *query, continue; /* ignore any duplicates */ /* - * What to do next depends on whether we're squashing constant lists, - * and whether we're already in a run of such constants. + * What to do next depends on whether we're squashing lists, + * and whether we're already in a run of such squashed expressions. */ if (!jstate->clocations[i].squashed) { /* - * This location corresponds to a constant not to be squashed. - * Print what comes before the constant ... + * This location corresponds to an expression not to be squashed. + * Print what comes before the expression ... */ len_to_wrt = off - last_off; len_to_wrt -= last_tok_len; @@ -2884,21 +2886,21 @@ generate_normalized_query(JumbleState *jstate, const char *query, memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); n_quer_loc += len_to_wrt; - /* ... and then a param symbol replacing the constant itself */ + /* ... and then a param symbol replacing the expression itself */ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", - i + 1 + jstate->highest_extern_param_id - skipped_constants); + i + 1 + jstate->highest_extern_param_id - skipped_expressions); - /* In case previous constants were merged away, stop doing that */ + /* In case previous expressions were merged away, stop doing that */ in_squashed = false; } else if (!in_squashed) { /* - * This location is the start position of a run of constants to be - * squashed, so we need to print the representation of starting a - * group of stashed constants. + * This location is the start position of a run of expressions to + * be squashed, so we need to print the representation of starting + * a group of stashed expressions. * - * Print what comes before the constant ... + * Print what comes before the expression ... */ len_to_wrt = off - last_off; len_to_wrt -= last_tok_len; @@ -2908,25 +2910,25 @@ generate_normalized_query(JumbleState *jstate, const char *query, memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); n_quer_loc += len_to_wrt; - /* ... and then start a run of squashed constants */ + /* ... and then start a run of squashed expressions */ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d /*, ... */", - i + 1 + jstate->highest_extern_param_id - skipped_constants); + i + 1 + jstate->highest_extern_param_id - skipped_expressions); /* The next location will match the block below, to end the run */ in_squashed = true; - skipped_constants++; + skipped_expressions++; } else { /* - * The second location of a run of squashable elements; this + * The second location of a run of squashable expressions; this * indicates its end. */ in_squashed = false; } - /* Otherwise the constant is squashed away -- move forward */ + /* Otherwise the expression is squashed away -- move forward */ quer_loc = off + tok_len; last_off = off; last_tok_len = tok_len; diff --git a/contrib/pg_stat_statements/sql/squashing.sql b/contrib/pg_stat_statements/sql/squashing.sql index 908be81ff2b..7d6f920f047 100644 --- a/contrib/pg_stat_statements/sql/squashing.sql +++ b/contrib/pg_stat_statements/sql/squashing.sql @@ -97,6 +97,45 @@ SELECT * FROM test_squash_jsonb WHERE data IN (SELECT '"10"')::jsonb); SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; +-- Test bind parameters +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash_bigint + WHERE data IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) + \bind 1 2 3 4 5 6 7 8 9 +; +SELECT * FROM test_squash_bigint + WHERE data IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) + \bind 1 2 3 4 5 6 7 8 9 10 +; +SELECT * FROM test_squash_bigint + WHERE data IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) + \bind 1 2 3 4 5 6 7 8 9 10 11 +; +SELECT * FROM test_squash_bigint WHERE data IN + ($1::bigint, $2::bigint, $3::bigint, $4::bigint, $5::bigint, $6::bigint, + $7::bigint, $8::bigint, $9::bigint, $10::bigint) \bind 1 2 3 4 5 6 7 8 9 10 +; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Test parameters order +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT * FROM test_squash_bigint + WHERE data IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) + AND id IN ($11, $12, $13, $14, $15, $16, $17, $18, $19, $20) + \bind 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 +; +-- No new pgss entry +SELECT * FROM test_squash_bigint + WHERE data IN ($10, $9, $8, $7, $6, $5, $4, $3, $2, $1) + \bind 1 2 3 4 5 6 7 8 9 10 +; +-- Test combination with constants, no new pgss entry +SELECT * FROM test_squash_bigint + WHERE data IN (1, 2, 3, 4, 5, $1, $2, $3, $4, $5) + \bind 1 2 3 4 5 +; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + -- CoerceViaIO -- Create some dummy type to force CoerceViaIO diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml index 7baa07dcdbf..b9c8a917280 100644 --- a/doc/src/sgml/pgstatstatements.sgml +++ b/doc/src/sgml/pgstatstatements.sgml @@ -633,8 +633,8 @@ single pg_stat_statements entry; as explained above, this is expected to happen for semantically equivalent queries. In addition, if the only difference between queries is the number of elements - in a list of constants, the list will get squashed down to a single element but shown - with a commented-out list indicator: + in a list of constants or parameters, the list will get squashed down to a + single element but shown with a commented-out list indicator: =# SELECT pg_stat_statements_reset(); diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl index 77659b0f760..ddda924a275 100644 --- a/src/backend/nodes/gen_node_support.pl +++ b/src/backend/nodes/gen_node_support.pl @@ -1321,7 +1321,7 @@ _jumble${n}(JumbleState *jstate, Node *node) elsif (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/) and elem $1, @node_types) { - # Node type. Squash constants if requested. + # Node type. Squash lisf of expressions if requested. if ($query_jumble_squash) { print $jff "\tJUMBLE_ELEMENTS($f);\n" diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index d1e82a63f09..92485e511c3 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -373,12 +373,12 @@ FlushPendingNulls(JumbleState *jstate) /* - * Record location of constant within query string of query tree that is - * currently being walked. + * Record location of a constant or a parameter within query string of query + * tree that is currently being walked. * - * 'squashed' signals that the constant represents the first or the last - * element in a series of merged constants, and everything but the first/last - * element contributes nothing to the jumble hash. + * 'squashed' signals that the expression represents the first or the last + * element in a series of squashed expressions, and everything but the + * first/last element contributes nothing to the jumble hash. */ static void RecordConstLocation(JumbleState *jstate, int location, bool squashed) @@ -405,15 +405,16 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed) /* * Subroutine for _jumbleElements: Verify a few simple cases where we can - * deduce that the expression is a constant: + * deduce that the expression is squashable: * * - Ignore a possible wrapping RelabelType and CoerceViaIO. * - If it's a FuncExpr, check that the function is an implicit * cast and its arguments are Const. - * - Otherwise test if the expression is a simple Const. + * - Otherwise test if the expression is a simple Const or an + * external parameter. */ static bool -IsSquashableConst(Node *element) +IsSquashable(Node *element) { if (IsA(element, RelabelType)) element = (Node *) ((RelabelType *) element)->arg; @@ -444,15 +445,26 @@ IsSquashableConst(Node *element) return true; } - if (!IsA(element, Const)) - return false; + switch (nodeTag(element)) + { + case T_Const: + return true; + case T_Param: + { + Param *param = (Param *) element; - return true; + return param->paramkind == PARAM_EXTERN; + } + default: + break; + } + + return false; } /* * Subroutine for _jumbleElements: Verify whether the provided list - * can be squashed, meaning it contains only constant expressions. + * can be squashed, meaning it contains only constant expressions or params. * * Return value indicates if squashing is possible. * @@ -461,7 +473,7 @@ IsSquashableConst(Node *element) * expressions. */ static bool -IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr) +IsSquashableList(List *elements, Node **firstExpr, Node **lastExpr) { ListCell *temp; @@ -474,7 +486,7 @@ IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr) foreach(temp, elements) { - if (!IsSquashableConst(lfirst(temp))) + if (!IsSquashable(lfirst(temp))) return false; } @@ -517,9 +529,9 @@ do { \ #include "queryjumblefuncs.funcs.c" /* - * We jumble lists of constant elements as one individual item regardless - * of how many elements are in the list. This means different queries - * jumble to the same query_id, if the only difference is the number of + * We jumble lists of constant elements or parameters as one individual item + * regardless of how many elements are in the list. This means different + * queries jumble to the same query_id, if the only difference is the number of * elements in the list. */ static void @@ -528,7 +540,7 @@ _jumbleElements(JumbleState *jstate, List *elements) Node *first, *last; - if (IsSquashableConstList(elements, &first, &last)) + if (IsSquashableList(elements, &first, &last)) { /* * If this list of elements is squashable, keep track of the location @@ -538,7 +550,7 @@ _jumbleElements(JumbleState *jstate, List *elements) * list. * * For the limited set of cases we support now (implicit coerce via - * FuncExpr, Const) it's fine to use exprLocation of the 'last' + * FuncExpr, Const, Param) it's fine to use exprLocation of the 'last' * expression, but if more complex composite expressions are to be * supported (e.g., OpExpr or FuncExpr as an explicit call), more * sophisticated tracking will be needed. diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h index da7c7abed2e..3237455150a 100644 --- a/src/include/nodes/queryjumble.h +++ b/src/include/nodes/queryjumble.h @@ -17,7 +17,8 @@ #include "nodes/parsenodes.h" /* - * Struct for tracking locations/lengths of constants during normalization + * Struct for tracking locations/lengths of constants and parameters during + * normalization */ typedef struct LocationLen { @@ -26,7 +27,7 @@ typedef struct LocationLen /* * Indicates that this location represents the beginning or end of a run - * of squashed constants. + * of squashed expressions. */ bool squashed; } LocationLen; @@ -43,7 +44,7 @@ typedef struct JumbleState /* Number of bytes used in jumble[] */ Size jumble_len; - /* Array of locations of constants that should be removed */ + /* Array of locations of constants that should be removed and parameters */ LocationLen *clocations; /* Allocated length of clocations array */ base-commit: b0635bfda0535a7fc36cd11d10eecec4e2a96330 -- 2.45.1